mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
performance: R&R - optimize appendLaunchKernel family
Related-To: NEO-16017 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f13c18be8c
commit
d68cfab86c
@@ -328,6 +328,39 @@ KernelMutableState::~KernelMutableState() {
|
||||
}
|
||||
}
|
||||
|
||||
KernelSharedState::KernelSharedState(Module *module) {
|
||||
if (nullptr == module) {
|
||||
return;
|
||||
}
|
||||
this->module = module;
|
||||
this->implicitArgsVersion = module->getDevice()->getGfxCoreHelper().getImplicitArgsVersion();
|
||||
ModuleImp *moduleImp = reinterpret_cast<ModuleImp *>(this->module);
|
||||
if (moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion > 0) {
|
||||
this->implicitArgsVersion = moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion;
|
||||
}
|
||||
}
|
||||
|
||||
KernelSharedState::~KernelSharedState() {
|
||||
if (nullptr == this->module) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nullptr != this->privateMemoryGraphicsAllocation) {
|
||||
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
const auto *kernelImmData = this->kernelImmData;
|
||||
if (this->printfBuffer != nullptr) {
|
||||
// not allowed to call virtual function on destructor, so calling printOutput directly
|
||||
PrintfHandler::printOutput(kernelImmData, this->printfBuffer, this->module->getDevice(), false);
|
||||
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->printfBuffer);
|
||||
}
|
||||
|
||||
if (kernelImmData && kernelImmData->getDescriptor().kernelAttributes.flags.usesAssert && this->module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
|
||||
this->module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
|
||||
if (baseAddress) {
|
||||
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper();
|
||||
@@ -337,41 +370,6 @@ ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
KernelImp::KernelImp(Module *module) : module(module), sharedState{std::make_shared<KernelSharedState>()} {
|
||||
if (module) {
|
||||
this->sharedState->implicitArgsVersion = module->getDevice()->getGfxCoreHelper().getImplicitArgsVersion();
|
||||
ModuleImp *moduleImp = reinterpret_cast<ModuleImp *>(this->module);
|
||||
if (moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion > 0) {
|
||||
this->sharedState->implicitArgsVersion = moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KernelImp::KernelImp() : KernelImp(static_cast<Module *>(nullptr)) {}
|
||||
|
||||
KernelImp::~KernelImp() {
|
||||
/* Only original instance should release resources shared with clones */
|
||||
if (nullptr != cloneOrigin) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nullptr != sharedState->privateMemoryGraphicsAllocation) {
|
||||
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(sharedState->privateMemoryGraphicsAllocation);
|
||||
}
|
||||
|
||||
const auto *kernelImmData = this->sharedState->kernelImmData;
|
||||
if (this->sharedState->printfBuffer != nullptr) {
|
||||
// not allowed to call virtual function on destructor, so calling printOutput directly
|
||||
PrintfHandler::printOutput(kernelImmData, this->sharedState->printfBuffer, module->getDevice(), false);
|
||||
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->sharedState->printfBuffer);
|
||||
}
|
||||
|
||||
if (kernelImmData && kernelImmData->getDescriptor().kernelAttributes.flags.usesAssert && module &&
|
||||
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
|
||||
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) {
|
||||
size_t kSize = static_cast<size_t>(this->getImmutableData()->getKernelInfo()->heapInfo.kernelHeapSize);
|
||||
if (nullptr == pKernelBinary) {
|
||||
@@ -1330,21 +1328,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelImp> KernelImp::cloneWithStateOverride(const KernelMutableState *stateOverride) {
|
||||
std::unique_ptr<KernelImp> KernelImp::makeDependentClone() {
|
||||
DEBUG_BREAK_IF(nullptr == this->ownedSharedState.get());
|
||||
|
||||
auto *device{static_cast<DeviceImp *>(this->module->getDevice())};
|
||||
const auto productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
|
||||
KernelAllocatorFn allocator = kernelFactory[productFamily];
|
||||
auto clone = static_cast<KernelImp *>(allocator(module));
|
||||
auto clone = static_cast<KernelImp *>(allocator(nullptr));
|
||||
DEBUG_BREAK_IF(nullptr == clone);
|
||||
DEBUG_BREAK_IF(clone->ownedSharedState);
|
||||
|
||||
clone->cloneOrigin = this;
|
||||
clone->sharedState = this->sharedState;
|
||||
|
||||
if (stateOverride) {
|
||||
clone->privateState = *stateOverride;
|
||||
}
|
||||
|
||||
clone->module = this->module;
|
||||
clone->sharedState = this->ownedSharedState.get();
|
||||
clone->privateState = this->privateState;
|
||||
return std::unique_ptr<KernelImp>{clone};
|
||||
}
|
||||
|
||||
|
||||
@@ -35,9 +35,11 @@ struct KernelExt {
|
||||
};
|
||||
|
||||
struct KernelImp : Kernel {
|
||||
KernelImp(Module *module);
|
||||
KernelImp(Module *module) : module(module),
|
||||
ownedSharedState(module ? std::make_unique<KernelSharedState>(module) : nullptr),
|
||||
sharedState(ownedSharedState.get()) {}
|
||||
|
||||
~KernelImp() override;
|
||||
~KernelImp() override = default;
|
||||
|
||||
ze_result_t destroy() override {
|
||||
if (this->sharedState->devicePrintfKernelMutex == nullptr) {
|
||||
@@ -48,7 +50,7 @@ struct KernelImp : Kernel {
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<KernelImp> cloneWithStateOverride(const KernelMutableState *stateOverride);
|
||||
std::unique_ptr<KernelImp> makeDependentClone();
|
||||
|
||||
ze_result_t getBaseAddress(uint64_t *baseAddress) override;
|
||||
ze_result_t getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) override;
|
||||
@@ -258,7 +260,8 @@ struct KernelImp : Kernel {
|
||||
KernelMutableState &getPrivateState() { return privateState; }
|
||||
|
||||
protected:
|
||||
KernelImp();
|
||||
KernelImp() : ownedSharedState(std::make_unique<KernelSharedState>(module)),
|
||||
sharedState(ownedSharedState.get()) {}
|
||||
|
||||
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
|
||||
void createPrintfBuffer();
|
||||
@@ -272,9 +275,9 @@ struct KernelImp : Kernel {
|
||||
ArrayRef<uint8_t> getDynamicStateHeapDataSpan() { return ArrayRef<uint8_t>(privateState.dynamicStateHeapData.data(), privateState.dynamicStateHeapData.size()); }
|
||||
|
||||
Module *module = nullptr;
|
||||
KernelImp *cloneOrigin = nullptr;
|
||||
|
||||
std::shared_ptr<KernelSharedState> sharedState = nullptr;
|
||||
std::unique_ptr<KernelSharedState> ownedSharedState = nullptr;
|
||||
KernelSharedState *sharedState = nullptr;
|
||||
KernelMutableState privateState{};
|
||||
};
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace std {
|
||||
class mutex;
|
||||
}
|
||||
@@ -17,8 +19,17 @@ class GraphicsAllocation;
|
||||
|
||||
namespace L0 {
|
||||
struct KernelImmutableData;
|
||||
struct Module;
|
||||
|
||||
struct KernelSharedState {
|
||||
KernelSharedState(Module *module);
|
||||
KernelSharedState(const KernelSharedState &) = delete;
|
||||
KernelSharedState(KernelSharedState &&) noexcept = default;
|
||||
KernelSharedState &operator=(const KernelSharedState &) = delete;
|
||||
KernelSharedState &operator=(KernelSharedState &&) noexcept = default;
|
||||
~KernelSharedState();
|
||||
|
||||
Module *module = nullptr;
|
||||
const KernelImmutableData *kernelImmData = nullptr;
|
||||
|
||||
std::mutex *devicePrintfKernelMutex = nullptr;
|
||||
|
||||
@@ -422,15 +422,20 @@ TEST(GraphTestApiSubmit, GivenValidCmdListAndGraphThenGraphAppendReturnsSuccess)
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, err);
|
||||
}
|
||||
|
||||
TEST(GraphTestApiCapture, GivenCommandListInRecordStateThenCaptureCommandsInsteadOfExecutingThem) {
|
||||
using GraphTestApiCaptureWithDevice = Test<DeviceFixture>;
|
||||
|
||||
TEST_F(GraphTestApiCaptureWithDevice, GivenCommandListInRecordStateThenCaptureCommandsInsteadOfExecutingThem) {
|
||||
GraphsCleanupGuard graphCleanup;
|
||||
Mock<Context> ctx;
|
||||
Mock<Context> otherCtx;
|
||||
Mock<CommandList> cmdlist;
|
||||
auto cmdListHandle = cmdlist.toHandle();
|
||||
Mock<Event> event;
|
||||
Mock<Module> module(this->device, nullptr);
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.setModule(&module);
|
||||
Mock<KernelImp> kernel2;
|
||||
kernel2.setModule(&module);
|
||||
ze_image_handle_t imgA = nullptr;
|
||||
ze_image_handle_t imgB = nullptr;
|
||||
ze_device_handle_t device = nullptr;
|
||||
@@ -774,9 +779,9 @@ TEST_F(GraphTestInstantiationTest, WhenInstantiatingGraphThenBakeCommandsIntoCom
|
||||
Mock<Event> event;
|
||||
Mock<Module> module(this->device, nullptr);
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
kernel.setModule(&module);
|
||||
Mock<KernelImp> kernel2;
|
||||
kernel2.module = &module;
|
||||
kernel2.setModule(&module);
|
||||
ze_image_handle_t imgA = nullptr;
|
||||
ze_image_handle_t imgB = nullptr;
|
||||
zes_device_handle_t device = nullptr;
|
||||
@@ -1279,29 +1284,6 @@ TEST(ClosureExternalStorage, GivenEventWaitListThenRecordsItProperly) {
|
||||
EXPECT_EQ(eventHandles[9], storage.getEventsList(waitList2Id)[1]);
|
||||
}
|
||||
|
||||
TEST(ClosureExternalStorage, GivenKernelMutableStateThenRecordsItProperly) {
|
||||
KernelMutableState s1;
|
||||
s1.globalOffsets[0] = 5U;
|
||||
KernelMutableState s2;
|
||||
s2.globalOffsets[0] = 7U;
|
||||
|
||||
L0::ClosureExternalStorage storage;
|
||||
|
||||
auto kernelState1Id = storage.registerKernelState(std::move(s1));
|
||||
auto kernelState2Id = storage.registerKernelState(std::move(s2));
|
||||
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsListId, kernelState1Id);
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsListId, kernelState2Id);
|
||||
|
||||
EXPECT_EQ(nullptr, storage.getKernelMutableState(L0::ClosureExternalStorage::invalidKernelStateId));
|
||||
|
||||
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState1Id));
|
||||
EXPECT_EQ(5U, storage.getKernelMutableState(kernelState1Id)->globalOffsets[0]);
|
||||
|
||||
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState2Id));
|
||||
EXPECT_EQ(7U, storage.getKernelMutableState(kernelState2Id)->globalOffsets[0]);
|
||||
}
|
||||
|
||||
TEST(ClosureExternalStorage, GivenImageRegionThenRecordsItProperly) {
|
||||
ze_image_region_t r1 = {};
|
||||
r1.width = 5;
|
||||
|
||||
@@ -926,7 +926,8 @@ class ExtractKernelParametersTestFixture : public ModuleImmutableDataFixture, pu
|
||||
mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
|
||||
mockKernelImmData->mockKernelInfo->kernelDescriptor.kernelMetadata.kernelName = kernelName;
|
||||
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
|
||||
mockKernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
auto pModule = ModuleImmutableDataFixture::module.get();
|
||||
mockKernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(pModule);
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
|
||||
@@ -9,11 +9,15 @@
|
||||
#include "shared/test/common/mocks/mock_io_functions.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/experimental/test_graph.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
#include "level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h"
|
||||
#include "level_zero/experimental/source/graph/graph_export.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
namespace L0 {
|
||||
@@ -105,7 +109,7 @@ class GraphDotExporterFileTest : public GraphDotExporterTest {
|
||||
return {__VA_ARGS__}; \
|
||||
}
|
||||
|
||||
class ExtractParametersTestFixture {
|
||||
class ExtractParametersTestFixture : public DeviceFixture {
|
||||
protected:
|
||||
ClosureExternalStorage storage;
|
||||
|
||||
@@ -119,14 +123,20 @@ class ExtractParametersTestFixture {
|
||||
ze_external_semaphore_wait_params_ext_t dummyWaitParams = {};
|
||||
uint32_t dummyCountBuffer[1] = {1};
|
||||
ze_group_count_t dummyLaunchArgs = {1, 1, 1};
|
||||
std::unique_ptr<Mock<Module>> module = nullptr;
|
||||
Mock<KernelImp> kernel;
|
||||
ze_kernel_handle_t dummyKernels[1];
|
||||
|
||||
void setUp() {
|
||||
DeviceFixture::setUp();
|
||||
module = std::make_unique<Mock<Module>>(this->device, nullptr);
|
||||
kernel.setModule(module.get());
|
||||
dummyKernels[0] = &kernel;
|
||||
}
|
||||
|
||||
void tearDown() {}
|
||||
void tearDown() {
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
template <CaptureApi api, typename ApiArgsT>
|
||||
void expectAllApiArgsPresent(const ApiArgsT &args) {
|
||||
|
||||
@@ -164,7 +164,7 @@ void ModuleFixture::createKernel() {
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->initialize(&desc);
|
||||
if (NEO::ApiSpecificConfig::getBindlessMode(*device->getNEODevice())) {
|
||||
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.bufferAddressingMode = KernelDescriptor::Bindless;
|
||||
@@ -176,7 +176,7 @@ std::unique_ptr<WhiteBox<::L0::KernelImp>> ModuleFixture::createKernelWithName(s
|
||||
desc.pKernelName = name.c_str();
|
||||
|
||||
auto kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->initialize(&desc);
|
||||
return kernel;
|
||||
}
|
||||
@@ -217,7 +217,7 @@ void MultiDeviceModuleFixture::createKernel(uint32_t rootDeviceIndex) {
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
|
||||
kernel->module = modules[rootDeviceIndex].get();
|
||||
kernel->setModule(modules[rootDeviceIndex].get());
|
||||
kernel->initialize(&desc);
|
||||
}
|
||||
|
||||
|
||||
@@ -40,12 +40,12 @@ template <>
|
||||
struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
using BaseClass = ::L0::KernelImp;
|
||||
using BaseClass::BaseClass;
|
||||
using ::L0::KernelImp::cloneOrigin;
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::getCrossThreadDataSpan;
|
||||
using ::L0::KernelImp::getDynamicStateHeapDataSpan;
|
||||
using ::L0::KernelImp::getSurfaceStateHeapDataSpan;
|
||||
using ::L0::KernelImp::module;
|
||||
using ::L0::KernelImp::ownedSharedState;
|
||||
using ::L0::KernelImp::patchBindlessOffsetsInCrossThreadData;
|
||||
using ::L0::KernelImp::patchBindlessSurfaceState;
|
||||
using ::L0::KernelImp::patchSamplerBindlessOffsetsInCrossThreadData;
|
||||
@@ -66,7 +66,13 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
return const_cast<NEO::KernelDescriptor &>(this->sharedState->kernelImmData->getDescriptor());
|
||||
}
|
||||
|
||||
WhiteBox() : ::L0::KernelImp(nullptr) {}
|
||||
void setModule(Module *module) {
|
||||
this->module = module;
|
||||
DEBUG_BREAK_IF(!this->sharedState);
|
||||
this->sharedState->module = module;
|
||||
}
|
||||
|
||||
WhiteBox() : ::L0::KernelImp() {}
|
||||
};
|
||||
|
||||
template <>
|
||||
|
||||
@@ -42,7 +42,7 @@ TEST(KernelAssert, GivenKernelWithAssertWhenDestroyedThenAssertIsChecked) {
|
||||
|
||||
{
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
kernel.setModule(&module);
|
||||
|
||||
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
|
||||
}
|
||||
@@ -74,7 +74,7 @@ TEST(KernelAssert, GivenKernelWithAssertWhenNoAssertHandlerOnDestroyThenDestruct
|
||||
|
||||
{
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
kernel.setModule(&module);
|
||||
|
||||
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
|
||||
}
|
||||
@@ -113,7 +113,7 @@ TEST(KernelAssert, GivenKernelWithAssertAndImplicitArgsWhenInitializingKernelThe
|
||||
|
||||
MockModule module(&l0Device, nullptr, ModuleType::user);
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
kernel.setModule(&module);
|
||||
|
||||
kernel.descriptor.kernelMetadata.kernelName = "test";
|
||||
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
|
||||
|
||||
@@ -1595,7 +1595,7 @@ HWTEST_F(ImmediateCommandListTest,
|
||||
ze_result_t returnValue;
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
module->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
|
||||
@@ -236,7 +236,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -254,7 +254,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -277,7 +277,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToSynch
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -304,7 +304,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToAsync
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -339,7 +339,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendToSynchro
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -376,7 +376,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAppendedToCommandLi
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -419,7 +419,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToC
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -486,7 +486,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToC
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -548,7 +548,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAppendedToImmComman
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -575,7 +575,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToI
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -626,7 +626,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToI
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto kernel = new Mock<KernelImp>{};
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
|
||||
@@ -1329,7 +1329,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenImmCmdListAndKernelWithImageWriteA
|
||||
device->getNEODevice()->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper);
|
||||
|
||||
auto kernel = std::make_unique<Mock<KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
@@ -1369,7 +1369,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenRegularCommandListAndOutOfOrderExe
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
auto kernel = std::make_unique<Mock<KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
|
||||
|
||||
auto releaseHelper = std::make_unique<MockReleaseHelper>();
|
||||
@@ -1422,7 +1422,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenKernelWithImageWriteArgWhenAppendi
|
||||
device->getNEODevice()->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper);
|
||||
for (auto cmdListFlags : testedCmdListFlags) {
|
||||
auto kernel = std::make_unique<Mock<KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
@@ -1475,7 +1475,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenKernelWithImageWriteArgWhenAppendi
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenResettingRegularCommandListThenTextureCacheFlushPendingStateIsCleared, IsXeHpgCore) {
|
||||
auto kernel = std::make_unique<Mock<KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
|
||||
|
||||
auto releaseHelper = std::make_unique<MockReleaseHelper>();
|
||||
|
||||
@@ -1219,7 +1219,7 @@ using CommandListAppendLaunchKernel = Test<ModuleFixture>;
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnSuccess) {
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
|
||||
kernel.module = pMockModule.get();
|
||||
kernel.setModule(pMockModule.get());
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -1250,7 +1250,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMemo
|
||||
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
|
||||
kernel.module = pMockModule.get();
|
||||
kernel.setModule(pMockModule.get());
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
@@ -1367,7 +1367,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
|
||||
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
|
||||
kernel.module = pMockModule.get();
|
||||
kernel.setModule(pMockModule.get());
|
||||
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
|
||||
@@ -275,7 +275,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
|
||||
@@ -304,7 +304,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelWithPrintfBufferCrea
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
|
||||
kernel->privateState.pImplicitArgs.reset(new ImplicitArgs());
|
||||
@@ -342,7 +342,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenApp
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
@@ -369,7 +369,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
kernel->createPrintfBuffer();
|
||||
|
||||
@@ -403,7 +403,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelAndPrintfBufferForSt
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
|
||||
kernel->privateState.pImplicitArgs.reset(new ImplicitArgs());
|
||||
@@ -446,7 +446,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenApp
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
kernel->module = &module;
|
||||
kernel->setModule(&module);
|
||||
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
|
||||
@@ -74,7 +74,13 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
|
||||
return const_cast<NEO::KernelDescriptor &>(this->sharedState->kernelImmData->getDescriptor());
|
||||
}
|
||||
|
||||
WhiteBoxKernelHw() : ::L0::KernelHw<gfxCoreFamily>(nullptr) {}
|
||||
void setModule(Module *module) {
|
||||
this->module = module;
|
||||
DEBUG_BREAK_IF(!this->sharedState);
|
||||
this->sharedState->module = module;
|
||||
}
|
||||
|
||||
WhiteBoxKernelHw() : ::L0::KernelHw<gfxCoreFamily>() {}
|
||||
};
|
||||
|
||||
using KernelInitTest = Test<ModuleImmutableDataFixture>;
|
||||
@@ -267,7 +273,7 @@ using SetKernelArgCacheTest = Test<ModuleFixture>;
|
||||
|
||||
TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSetArgBufferWithAllocOnlyCalledIfNeeded) {
|
||||
MockKernelWithCallTracking mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
mockKernel.initialize(&desc);
|
||||
@@ -386,7 +392,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
|
||||
mockKernel.descriptor.kernelAttributes.simdSize = 1;
|
||||
mockKernel.privateState.kernelRequiresGenerationOfLocalIdsByRuntime = true; // although it is enabled for SIMD 1, make sure it is enforced
|
||||
mockKernel.descriptor.kernelAttributes.numLocalIdChannels = 3;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
const auto &device = mockModule.getDevice();
|
||||
auto grfSize = device->getHwInfo().capabilityTable.grfSize;
|
||||
auto numGrf = GrfConfig::defaultGrfNumber;
|
||||
@@ -434,7 +440,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSet
|
||||
Mock<KernelImp> mockKernel;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
mockKernel.descriptor.kernelAttributes.simdSize = 1;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
mockKernel.privateState.kernelRequiresGenerationOfLocalIdsByRuntime = false;
|
||||
|
||||
uint32_t groupSize[3] = {2, 3, 5};
|
||||
@@ -451,7 +457,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeDimensionWhenSettingGro
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
|
||||
}
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
uint32_t groupSize[3] = {1, 1, 1};
|
||||
mockKernel.privateState.groupSize[0] = 0;
|
||||
@@ -465,7 +471,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
|
||||
}
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
uint32_t groupSize[3] = {0, 0, 0};
|
||||
auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
|
||||
@@ -475,7 +481,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
|
||||
TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
|
||||
MockKernelWithCallTracking mockKernel;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
// First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
|
||||
auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
|
||||
@@ -2474,7 +2480,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenBindlessKernelAndNoGlobalBindlessAlloc
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
EXPECT_FALSE(mockKernel.privateState.isBindlessOffsetSet[0]);
|
||||
@@ -2488,7 +2494,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessTh
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
@@ -2526,7 +2532,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSurf
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
@@ -2574,7 +2580,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenMisalignedAndAlignedBufferAddressWhenS
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
@@ -2623,7 +2629,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThe
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
@@ -2658,7 +2664,7 @@ HWTEST_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnaligned
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
@@ -2699,7 +2705,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithA
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2718,7 +2724,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAl
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2737,7 +2743,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetA
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2758,7 +2764,7 @@ TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoun
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2777,7 +2783,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2827,7 +2833,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2879,7 +2885,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -2930,7 +2936,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
@@ -3333,7 +3339,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCal
|
||||
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -3427,7 +3433,7 @@ HWTEST_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgBuff
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
@@ -3452,7 +3458,7 @@ HWTEST_F(SetKernelArg, givenSlmPointerWhenSettingKernelArgThenPropertyIsSaved) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.setModule(module.get());
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
{
|
||||
@@ -3767,7 +3773,7 @@ TEST_F(KernelPrintHandlerTest, whenPrintPrintfOutputIsCalledThenPrintfBufferIsUs
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->initialize(&desc);
|
||||
|
||||
EXPECT_FALSE(kernel->sharedState->printfBuffer == nullptr);
|
||||
@@ -3816,7 +3822,7 @@ TEST_F(PrintfTest, givenKernelWithPrintfThenPrintfBufferIsCreated) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
EXPECT_TRUE(mockKernel.getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf);
|
||||
|
||||
@@ -3828,7 +3834,7 @@ TEST_F(PrintfTest, GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocati
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = false;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.createPrintfBuffer();
|
||||
EXPECT_EQ(nullptr, mockKernel.getPrintfBufferAllocation());
|
||||
@@ -3838,7 +3844,7 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContain
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.createPrintfBuffer();
|
||||
|
||||
@@ -3853,7 +3859,7 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
|
||||
@@ -4224,7 +4230,7 @@ using BindlessKernelTest = Test<DeviceFixture>;
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4288,7 +4294,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4339,7 +4345,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
|
||||
TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4365,7 +4371,7 @@ TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThre
|
||||
TEST_F(BindlessKernelTest, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCrossThreadDataThenMemoryIsNotPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4393,7 +4399,7 @@ TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImpl
|
||||
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4467,7 +4473,7 @@ TEST(KernelImmutableDataTest, givenBindlessKernelWhenInitializingImmDataThenSshT
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
@@ -4531,7 +4537,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
mockKernel.setModule(&mockModule);
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
@@ -230,7 +230,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
|
||||
EXPECT_EQ(state3.kernelHasIndirectAccess, state2.kernelHasIndirectAccess);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssignedAndCloneOriginMarked) {
|
||||
TEST_F(KernelImpTest, GivenKernelPrivateStateWhenKernelImpClonedThenSharedStateIsSharedAndPrivateIsCopied) {
|
||||
NEO::KernelDescriptor descriptor;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo{};
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
@@ -247,23 +247,18 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
|
||||
kernel1.privateState.reservePerThreadDataForWholeThreadGroup(mockSize);
|
||||
std::memcpy(kernel1.privateState.perThreadDataForWholeThreadGroup, std::to_array<uint8_t>({81, 82, 83, 84, 85, 86, 87, 88}).data(), mockSize);
|
||||
|
||||
// This state overrides the state of kernel1's clone
|
||||
KernelMutableState privateState;
|
||||
fillKernelMutableStateWithMockData(privateState);
|
||||
|
||||
// No need to check each and every member again
|
||||
EXPECT_NE(0, std::memcmp(kernel1.privateState.crossThreadData.data(), privateState.crossThreadData.data(), mockSize));
|
||||
EXPECT_NE(0, std::memcmp(kernel1.privateState.perThreadDataForWholeThreadGroup, privateState.perThreadDataForWholeThreadGroup, mockSize));
|
||||
|
||||
auto clonedKernel = kernel1.cloneWithStateOverride(&privateState);
|
||||
EXPECT_NE(nullptr, kernel1.ownedSharedState.get());
|
||||
EXPECT_EQ(kernel1.sharedState, kernel1.ownedSharedState.get());
|
||||
auto clonedKernel = kernel1.makeDependentClone();
|
||||
auto kernel2 = static_cast<WhiteBox<KernelImp> *>(clonedKernel.get());
|
||||
EXPECT_EQ(nullptr, kernel2->ownedSharedState.get());
|
||||
EXPECT_EQ(kernel2->sharedState, kernel1.ownedSharedState.get());
|
||||
|
||||
// KernelMutableState part taken from `state`
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->privateState.crossThreadData.data(), privateState.crossThreadData.data(), mockSize));
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->privateState.perThreadDataForWholeThreadGroup, privateState.perThreadDataForWholeThreadGroup, mockSize));
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->privateState.crossThreadData.data(), kernel1.privateState.crossThreadData.data(), mockSize));
|
||||
EXPECT_EQ(0, std::memcmp(kernel2->privateState.perThreadDataForWholeThreadGroup, kernel1.privateState.perThreadDataForWholeThreadGroup, mockSize));
|
||||
|
||||
// KernelImp part taken from `kernel1`
|
||||
EXPECT_EQ(kernel2->cloneOrigin, &kernel1);
|
||||
EXPECT_EQ(kernel2->sharedState->kernelImmData, &kernelInfo);
|
||||
EXPECT_EQ(kernel2->sharedState->devicePrintfKernelMutex, kernel1.sharedState->devicePrintfKernelMutex);
|
||||
EXPECT_EQ(kernel2->sharedState->privateMemoryGraphicsAllocation, kernel1.sharedState->privateMemoryGraphicsAllocation);
|
||||
|
||||
@@ -4321,7 +4321,7 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
kernel->initialize(&desc);
|
||||
}
|
||||
|
||||
|
||||
@@ -3864,7 +3864,7 @@ HWTEST_F(PrintfModuleTest, GivenModuleWithPrintfWhenKernelIsCreatedThenPrintfAll
|
||||
auto kernel = std::make_unique<Mock<KernelImp>>();
|
||||
ASSERT_NE(nullptr, kernel);
|
||||
|
||||
kernel->module = module.get();
|
||||
kernel->setModule(module.get());
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "test";
|
||||
kernel->initialize(&kernelDesc);
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel_mutable_state.h"
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
#include "level_zero/ze_api.h"
|
||||
#include "level_zero/ze_intel_gpu.h"
|
||||
|
||||
@@ -105,12 +105,10 @@ struct ExternalCbEventInfoContainer {
|
||||
|
||||
struct ClosureExternalStorage {
|
||||
using EventsListId = uint32_t;
|
||||
using KernelStateId = uint32_t;
|
||||
using ImageRegionId = uint32_t;
|
||||
using CopyRegionId = uint32_t;
|
||||
|
||||
static constexpr EventsListId invalidEventsListId = std::numeric_limits<EventsListId>::max();
|
||||
static constexpr KernelStateId invalidKernelStateId = std::numeric_limits<KernelStateId>::max();
|
||||
static constexpr ImageRegionId invalidImageRegionId = std::numeric_limits<ImageRegionId>::max();
|
||||
static constexpr CopyRegionId invalidCopyRegionId = std::numeric_limits<CopyRegionId>::max();
|
||||
|
||||
@@ -123,12 +121,6 @@ struct ClosureExternalStorage {
|
||||
return static_cast<EventsListId>(ret);
|
||||
}
|
||||
|
||||
KernelStateId registerKernelState(KernelMutableState &&state) {
|
||||
auto ret = kernelStates.size();
|
||||
kernelStates.push_back(std::move(state));
|
||||
return static_cast<KernelStateId>(ret);
|
||||
}
|
||||
|
||||
ImageRegionId registerImageRegion(const ze_image_region_t *imageRegion) {
|
||||
if (nullptr == imageRegion) {
|
||||
return invalidImageRegionId;
|
||||
@@ -161,13 +153,6 @@ struct ClosureExternalStorage {
|
||||
return waitEvents.data() + id;
|
||||
}
|
||||
|
||||
KernelMutableState *getKernelMutableState(KernelStateId id) {
|
||||
if (invalidKernelStateId == id) {
|
||||
return nullptr;
|
||||
}
|
||||
return kernelStates.data() + id;
|
||||
}
|
||||
|
||||
ze_image_region_t *getImageRegion(ImageRegionId id) {
|
||||
if (invalidImageRegionId == id) {
|
||||
return nullptr;
|
||||
@@ -184,7 +169,6 @@ struct ClosureExternalStorage {
|
||||
|
||||
protected:
|
||||
std::vector<ze_event_handle_t> waitEvents;
|
||||
std::vector<KernelMutableState> kernelStates;
|
||||
std::vector<ze_image_region_t> imageRegions;
|
||||
std::vector<ze_copy_region_t> copyRegions;
|
||||
};
|
||||
@@ -818,7 +802,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernel> {
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
ze_group_count_t launchKernelArgs;
|
||||
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::unique_ptr<KernelImp> capturedKernel;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
@@ -842,7 +826,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel> {
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
ze_group_count_t launchKernelArgs;
|
||||
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::unique_ptr<KernelImp> capturedKernel;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
@@ -865,7 +849,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect> {
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::unique_ptr<KernelImp> capturedKernel;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
@@ -890,7 +874,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect> {
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
ClosureExternalStorage::KernelStateId firstKernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::vector<std::unique_ptr<KernelImp>> capturedKernels;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
@@ -914,10 +898,12 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters> {
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
IndirectArgs(IndirectArgs &&) = default;
|
||||
IndirectArgs &operator=(IndirectArgs &&) = default;
|
||||
~IndirectArgs();
|
||||
ze_group_count_t groupCounts;
|
||||
void *pNext;
|
||||
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::unique_ptr<KernelImp> capturedKernel;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
@@ -943,9 +929,11 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments> {
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
IndirectArgs(IndirectArgs &&) = default;
|
||||
IndirectArgs &operator=(IndirectArgs &&) = default;
|
||||
~IndirectArgs();
|
||||
void *pNext;
|
||||
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
|
||||
std::unique_ptr<KernelImp> capturedKernel;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
@@ -272,77 +272,54 @@ ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemoryExt>::inst
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
this->launchKernelArgs = *apiArgs.launchKernelArgs;
|
||||
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
this->capturedKernel = kernel->makeDependentClone();
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
|
||||
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
|
||||
auto result = zeCommandListAppendLaunchKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
|
||||
auto result = zeCommandListAppendLaunchKernel(&executionTarget, kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
return result;
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
this->launchKernelArgs = *apiArgs.launchKernelArgs;
|
||||
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
this->capturedKernel = kernel->makeDependentClone();
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
|
||||
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
|
||||
auto result = zeCommandListAppendLaunchCooperativeKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
|
||||
auto result = zeCommandListAppendLaunchCooperativeKernel(&executionTarget, kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
return result;
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
this->capturedKernel = kernel->makeDependentClone();
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
|
||||
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
|
||||
auto result = zeCommandListAppendLaunchKernelIndirect(&executionTarget, kernelClone.get(), apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
|
||||
auto result = zeCommandListAppendLaunchKernelIndirect(&executionTarget, kernelHandle, apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
return result;
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
this->capturedKernels.reserve(apiArgs.numKernels);
|
||||
for (uint32_t i{0U}; i < apiArgs.numKernels; ++i) {
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.phKernels[i]));
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
const auto id = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
if (i == 0U) {
|
||||
this->firstKernelStateId = id;
|
||||
}
|
||||
this->capturedKernels.emplace_back(kernel->makeDependentClone().release());
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
std::vector<decltype(std::declval<KernelImp *>()->cloneWithStateOverride(nullptr))> kernelClonesOwner(apiArgs.numKernels);
|
||||
std::vector<ze_kernel_handle_t> phKernelClones(apiArgs.numKernels);
|
||||
|
||||
for (uint32_t i{0U}; i < apiArgs.numKernels; ++i) {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.phKernels[i]));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
const auto kernelStateId = this->indirectArgs.firstKernelStateId + i;
|
||||
kernelClonesOwner[i] = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(kernelStateId));
|
||||
phKernelClones[i] = kernelClonesOwner[i].get();
|
||||
phKernelClones[i] = this->indirectArgs.capturedKernels[i].get();
|
||||
}
|
||||
auto result = zeCommandListAppendLaunchMultipleKernelsIndirect(&executionTarget, apiArgs.numKernels, phKernelClones.data(), apiArgs.pCountBuffer, apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
@@ -357,8 +334,7 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs
|
||||
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
|
||||
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
this->capturedKernel = kernel->makeDependentClone();
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs::~IndirectArgs() {
|
||||
@@ -366,11 +342,8 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
|
||||
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
|
||||
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelClone.get(), &indirectArgs.groupCounts, indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
|
||||
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelHandle, &indirectArgs.groupCounts, indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
return result;
|
||||
}
|
||||
@@ -382,11 +355,7 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs:
|
||||
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
|
||||
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
result = CommandList::setKernelState(kernel, apiArgs.groupSizes, apiArgs.pArguments);
|
||||
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
|
||||
|
||||
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
|
||||
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
this->capturedKernel = kernel->makeDependentClone();
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs::~IndirectArgs() {
|
||||
@@ -394,11 +363,8 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs:
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
|
||||
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
DEBUG_BREAK_IF(nullptr == kernelOrig);
|
||||
|
||||
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
|
||||
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelClone.get(), &apiArgs.groupCounts, this->indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
|
||||
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelHandle, &apiArgs.groupCounts, this->indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
|
||||
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel_mutable_state.h"
|
||||
#include "level_zero/ze_api.h"
|
||||
|
||||
#include "graph_captured_apis.h"
|
||||
|
||||
Reference in New Issue
Block a user