performance: R&R - optimize appendLaunchKernel family

Related-To: NEO-16017
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2025-08-29 17:07:12 +00:00
committed by Compute-Runtime-Automation
parent f13c18be8c
commit d68cfab86c
20 changed files with 203 additions and 240 deletions

View File

@@ -328,6 +328,39 @@ KernelMutableState::~KernelMutableState() {
}
}
KernelSharedState::KernelSharedState(Module *module) {
if (nullptr == module) {
return;
}
this->module = module;
this->implicitArgsVersion = module->getDevice()->getGfxCoreHelper().getImplicitArgsVersion();
ModuleImp *moduleImp = reinterpret_cast<ModuleImp *>(this->module);
if (moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion > 0) {
this->implicitArgsVersion = moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion;
}
}
KernelSharedState::~KernelSharedState() {
if (nullptr == this->module) {
return;
}
if (nullptr != this->privateMemoryGraphicsAllocation) {
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->privateMemoryGraphicsAllocation);
}
const auto *kernelImmData = this->kernelImmData;
if (this->printfBuffer != nullptr) {
// not allowed to call virtual function on destructor, so calling printOutput directly
PrintfHandler::printOutput(kernelImmData, this->printfBuffer, this->module->getDevice(), false);
this->module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->printfBuffer);
}
if (kernelImmData && kernelImmData->getDescriptor().kernelAttributes.flags.usesAssert && this->module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
this->module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
}
ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
if (baseAddress) {
auto gmmHelper = module->getDevice()->getNEODevice()->getGmmHelper();
@@ -337,41 +370,6 @@ ze_result_t KernelImp::getBaseAddress(uint64_t *baseAddress) {
return ZE_RESULT_SUCCESS;
}
KernelImp::KernelImp(Module *module) : module(module), sharedState{std::make_shared<KernelSharedState>()} {
if (module) {
this->sharedState->implicitArgsVersion = module->getDevice()->getGfxCoreHelper().getImplicitArgsVersion();
ModuleImp *moduleImp = reinterpret_cast<ModuleImp *>(this->module);
if (moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion > 0) {
this->sharedState->implicitArgsVersion = moduleImp->getTranslationUnit()->programInfo.indirectAccessBufferMajorVersion;
}
}
}
KernelImp::KernelImp() : KernelImp(static_cast<Module *>(nullptr)) {}
KernelImp::~KernelImp() {
/* Only original instance should release resources shared with clones */
if (nullptr != cloneOrigin) {
return;
}
if (nullptr != sharedState->privateMemoryGraphicsAllocation) {
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(sharedState->privateMemoryGraphicsAllocation);
}
const auto *kernelImmData = this->sharedState->kernelImmData;
if (this->sharedState->printfBuffer != nullptr) {
// not allowed to call virtual function on destructor, so calling printOutput directly
PrintfHandler::printOutput(kernelImmData, this->sharedState->printfBuffer, module->getDevice(), false);
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(this->sharedState->printfBuffer);
}
if (kernelImmData && kernelImmData->getDescriptor().kernelAttributes.flags.usesAssert && module &&
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
module->getDevice()->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
}
ze_result_t KernelImp::getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) {
size_t kSize = static_cast<size_t>(this->getImmutableData()->getKernelInfo()->heapInfo.kernelHeapSize);
if (nullptr == pKernelBinary) {
@@ -1330,21 +1328,19 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
return ZE_RESULT_SUCCESS;
}
std::unique_ptr<KernelImp> KernelImp::cloneWithStateOverride(const KernelMutableState *stateOverride) {
std::unique_ptr<KernelImp> KernelImp::makeDependentClone() {
DEBUG_BREAK_IF(nullptr == this->ownedSharedState.get());
auto *device{static_cast<DeviceImp *>(this->module->getDevice())};
const auto productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
KernelAllocatorFn allocator = kernelFactory[productFamily];
auto clone = static_cast<KernelImp *>(allocator(module));
auto clone = static_cast<KernelImp *>(allocator(nullptr));
DEBUG_BREAK_IF(nullptr == clone);
DEBUG_BREAK_IF(clone->ownedSharedState);
clone->cloneOrigin = this;
clone->sharedState = this->sharedState;
if (stateOverride) {
clone->privateState = *stateOverride;
}
clone->module = this->module;
clone->sharedState = this->ownedSharedState.get();
clone->privateState = this->privateState;
return std::unique_ptr<KernelImp>{clone};
}

View File

@@ -35,9 +35,11 @@ struct KernelExt {
};
struct KernelImp : Kernel {
KernelImp(Module *module);
KernelImp(Module *module) : module(module),
ownedSharedState(module ? std::make_unique<KernelSharedState>(module) : nullptr),
sharedState(ownedSharedState.get()) {}
~KernelImp() override;
~KernelImp() override = default;
ze_result_t destroy() override {
if (this->sharedState->devicePrintfKernelMutex == nullptr) {
@@ -48,7 +50,7 @@ struct KernelImp : Kernel {
}
}
std::unique_ptr<KernelImp> cloneWithStateOverride(const KernelMutableState *stateOverride);
std::unique_ptr<KernelImp> makeDependentClone();
ze_result_t getBaseAddress(uint64_t *baseAddress) override;
ze_result_t getKernelProgramBinary(size_t *kernelSize, char *pKernelBinary) override;
@@ -258,7 +260,8 @@ struct KernelImp : Kernel {
KernelMutableState &getPrivateState() { return privateState; }
protected:
KernelImp();
KernelImp() : ownedSharedState(std::make_unique<KernelSharedState>(module)),
sharedState(ownedSharedState.get()) {}
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
void createPrintfBuffer();
@@ -272,9 +275,9 @@ struct KernelImp : Kernel {
ArrayRef<uint8_t> getDynamicStateHeapDataSpan() { return ArrayRef<uint8_t>(privateState.dynamicStateHeapData.data(), privateState.dynamicStateHeapData.size()); }
Module *module = nullptr;
KernelImp *cloneOrigin = nullptr;
std::shared_ptr<KernelSharedState> sharedState = nullptr;
std::unique_ptr<KernelSharedState> ownedSharedState = nullptr;
KernelSharedState *sharedState = nullptr;
KernelMutableState privateState{};
};

View File

@@ -7,6 +7,8 @@
#pragma once
#include <cstdint>
namespace std {
class mutex;
}
@@ -17,8 +19,17 @@ class GraphicsAllocation;
namespace L0 {
struct KernelImmutableData;
struct Module;
struct KernelSharedState {
KernelSharedState(Module *module);
KernelSharedState(const KernelSharedState &) = delete;
KernelSharedState(KernelSharedState &&) noexcept = default;
KernelSharedState &operator=(const KernelSharedState &) = delete;
KernelSharedState &operator=(KernelSharedState &&) noexcept = default;
~KernelSharedState();
Module *module = nullptr;
const KernelImmutableData *kernelImmData = nullptr;
std::mutex *devicePrintfKernelMutex = nullptr;

View File

@@ -422,15 +422,20 @@ TEST(GraphTestApiSubmit, GivenValidCmdListAndGraphThenGraphAppendReturnsSuccess)
EXPECT_EQ(ZE_RESULT_SUCCESS, err);
}
TEST(GraphTestApiCapture, GivenCommandListInRecordStateThenCaptureCommandsInsteadOfExecutingThem) {
using GraphTestApiCaptureWithDevice = Test<DeviceFixture>;
TEST_F(GraphTestApiCaptureWithDevice, GivenCommandListInRecordStateThenCaptureCommandsInsteadOfExecutingThem) {
GraphsCleanupGuard graphCleanup;
Mock<Context> ctx;
Mock<Context> otherCtx;
Mock<CommandList> cmdlist;
auto cmdListHandle = cmdlist.toHandle();
Mock<Event> event;
Mock<Module> module(this->device, nullptr);
Mock<KernelImp> kernel;
kernel.setModule(&module);
Mock<KernelImp> kernel2;
kernel2.setModule(&module);
ze_image_handle_t imgA = nullptr;
ze_image_handle_t imgB = nullptr;
ze_device_handle_t device = nullptr;
@@ -774,9 +779,9 @@ TEST_F(GraphTestInstantiationTest, WhenInstantiatingGraphThenBakeCommandsIntoCom
Mock<Event> event;
Mock<Module> module(this->device, nullptr);
Mock<KernelImp> kernel;
kernel.module = &module;
kernel.setModule(&module);
Mock<KernelImp> kernel2;
kernel2.module = &module;
kernel2.setModule(&module);
ze_image_handle_t imgA = nullptr;
ze_image_handle_t imgB = nullptr;
zes_device_handle_t device = nullptr;
@@ -1279,29 +1284,6 @@ TEST(ClosureExternalStorage, GivenEventWaitListThenRecordsItProperly) {
EXPECT_EQ(eventHandles[9], storage.getEventsList(waitList2Id)[1]);
}
TEST(ClosureExternalStorage, GivenKernelMutableStateThenRecordsItProperly) {
KernelMutableState s1;
s1.globalOffsets[0] = 5U;
KernelMutableState s2;
s2.globalOffsets[0] = 7U;
L0::ClosureExternalStorage storage;
auto kernelState1Id = storage.registerKernelState(std::move(s1));
auto kernelState2Id = storage.registerKernelState(std::move(s2));
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsListId, kernelState1Id);
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsListId, kernelState2Id);
EXPECT_EQ(nullptr, storage.getKernelMutableState(L0::ClosureExternalStorage::invalidKernelStateId));
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState1Id));
EXPECT_EQ(5U, storage.getKernelMutableState(kernelState1Id)->globalOffsets[0]);
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState2Id));
EXPECT_EQ(7U, storage.getKernelMutableState(kernelState2Id)->globalOffsets[0]);
}
TEST(ClosureExternalStorage, GivenImageRegionThenRecordsItProperly) {
ze_image_region_t r1 = {};
r1.width = 5;

View File

@@ -926,7 +926,8 @@ class ExtractKernelParametersTestFixture : public ModuleImmutableDataFixture, pu
mockKernelImmData = std::make_unique<MockImmutableData>(perHwThreadPrivateMemorySizeRequested);
mockKernelImmData->mockKernelInfo->kernelDescriptor.kernelMetadata.kernelName = kernelName;
createModuleFromMockBinary(perHwThreadPrivateMemorySizeRequested, false, mockKernelImmData.get());
mockKernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
auto pModule = ModuleImmutableDataFixture::module.get();
mockKernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(pModule);
}
void tearDown() {

View File

@@ -9,11 +9,15 @@
#include "shared/test/common/mocks/mock_io_functions.h"
#include "level_zero/core/test/unit_tests/experimental/test_graph.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h"
#include "level_zero/experimental/source/graph/graph_export.h"
#include "gtest/gtest.h"
#include <memory>
using namespace NEO;
namespace L0 {
@@ -105,7 +109,7 @@ class GraphDotExporterFileTest : public GraphDotExporterTest {
return {__VA_ARGS__}; \
}
class ExtractParametersTestFixture {
class ExtractParametersTestFixture : public DeviceFixture {
protected:
ClosureExternalStorage storage;
@@ -119,14 +123,20 @@ class ExtractParametersTestFixture {
ze_external_semaphore_wait_params_ext_t dummyWaitParams = {};
uint32_t dummyCountBuffer[1] = {1};
ze_group_count_t dummyLaunchArgs = {1, 1, 1};
std::unique_ptr<Mock<Module>> module = nullptr;
Mock<KernelImp> kernel;
ze_kernel_handle_t dummyKernels[1];
void setUp() {
DeviceFixture::setUp();
module = std::make_unique<Mock<Module>>(this->device, nullptr);
kernel.setModule(module.get());
dummyKernels[0] = &kernel;
}
void tearDown() {}
void tearDown() {
DeviceFixture::tearDown();
}
template <CaptureApi api, typename ApiArgsT>
void expectAllApiArgsPresent(const ApiArgsT &args) {

View File

@@ -164,7 +164,7 @@ void ModuleFixture::createKernel() {
desc.pKernelName = kernelName.c_str();
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->initialize(&desc);
if (NEO::ApiSpecificConfig::getBindlessMode(*device->getNEODevice())) {
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.bufferAddressingMode = KernelDescriptor::Bindless;
@@ -176,7 +176,7 @@ std::unique_ptr<WhiteBox<::L0::KernelImp>> ModuleFixture::createKernelWithName(s
desc.pKernelName = name.c_str();
auto kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->initialize(&desc);
return kernel;
}
@@ -217,7 +217,7 @@ void MultiDeviceModuleFixture::createKernel(uint32_t rootDeviceIndex) {
desc.pKernelName = kernelName.c_str();
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
kernel->module = modules[rootDeviceIndex].get();
kernel->setModule(modules[rootDeviceIndex].get());
kernel->initialize(&desc);
}

View File

@@ -40,12 +40,12 @@ template <>
struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
using BaseClass = ::L0::KernelImp;
using BaseClass::BaseClass;
using ::L0::KernelImp::cloneOrigin;
using ::L0::KernelImp::createPrintfBuffer;
using ::L0::KernelImp::getCrossThreadDataSpan;
using ::L0::KernelImp::getDynamicStateHeapDataSpan;
using ::L0::KernelImp::getSurfaceStateHeapDataSpan;
using ::L0::KernelImp::module;
using ::L0::KernelImp::ownedSharedState;
using ::L0::KernelImp::patchBindlessOffsetsInCrossThreadData;
using ::L0::KernelImp::patchBindlessSurfaceState;
using ::L0::KernelImp::patchSamplerBindlessOffsetsInCrossThreadData;
@@ -66,7 +66,13 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
return const_cast<NEO::KernelDescriptor &>(this->sharedState->kernelImmData->getDescriptor());
}
WhiteBox() : ::L0::KernelImp(nullptr) {}
void setModule(Module *module) {
this->module = module;
DEBUG_BREAK_IF(!this->sharedState);
this->sharedState->module = module;
}
WhiteBox() : ::L0::KernelImp() {}
};
template <>

View File

@@ -42,7 +42,7 @@ TEST(KernelAssert, GivenKernelWithAssertWhenDestroyedThenAssertIsChecked) {
{
Mock<KernelImp> kernel;
kernel.module = &module;
kernel.setModule(&module);
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
}
@@ -74,7 +74,7 @@ TEST(KernelAssert, GivenKernelWithAssertWhenNoAssertHandlerOnDestroyThenDestruct
{
Mock<KernelImp> kernel;
kernel.module = &module;
kernel.setModule(&module);
kernel.descriptor.kernelAttributes.flags.usesAssert = true;
}
@@ -113,7 +113,7 @@ TEST(KernelAssert, GivenKernelWithAssertAndImplicitArgsWhenInitializingKernelThe
MockModule module(&l0Device, nullptr, ModuleType::user);
Mock<KernelImp> kernel;
kernel.module = &module;
kernel.setModule(&module);
kernel.descriptor.kernelMetadata.kernelName = "test";
kernel.descriptor.kernelAttributes.flags.usesAssert = true;

View File

@@ -1595,7 +1595,7 @@ HWTEST_F(ImmediateCommandListTest,
ze_result_t returnValue;
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
module->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});

View File

@@ -236,7 +236,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
ze_group_count_t groupCount{1, 1, 1};
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -254,7 +254,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
ze_group_count_t groupCount{1, 1, 1};
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -277,7 +277,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToSynch
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -304,7 +304,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToAsync
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -339,7 +339,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendToSynchro
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::renderCompute, returnValue));
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -376,7 +376,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAppendedToCommandLi
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -419,7 +419,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToC
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -486,7 +486,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToC
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -548,7 +548,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAppendedToImmComman
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -575,7 +575,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToI
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -626,7 +626,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfAndEventAppendedToI
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto kernel = new Mock<KernelImp>{};
kernel->module = module.get();
kernel->setModule(module.get());
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
static_cast<ModuleImp *>(module.get())->getPrintfKernelContainer().push_back(std::shared_ptr<Kernel>{kernel});
@@ -1329,7 +1329,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenImmCmdListAndKernelWithImageWriteA
device->getNEODevice()->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper);
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
ze_command_queue_desc_t queueDesc = {};
@@ -1369,7 +1369,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenRegularCommandListAndOutOfOrderExe
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
auto releaseHelper = std::make_unique<MockReleaseHelper>();
@@ -1422,7 +1422,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenKernelWithImageWriteArgWhenAppendi
device->getNEODevice()->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper);
for (auto cmdListFlags : testedCmdListFlags) {
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
ze_group_count_t groupCount{1, 1, 1};
@@ -1475,7 +1475,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenKernelWithImageWriteArgWhenAppendi
HWTEST2_F(CommandListAppendLaunchKernel, whenResettingRegularCommandListThenTextureCacheFlushPendingStateIsCleared, IsXeHpgCore) {
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
auto releaseHelper = std::make_unique<MockReleaseHelper>();

View File

@@ -1219,7 +1219,7 @@ using CommandListAppendLaunchKernel = Test<ModuleFixture>;
HWTEST_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnSuccess) {
Mock<::L0::KernelImp> kernel;
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
kernel.module = pMockModule.get();
kernel.setModule(pMockModule.get());
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -1250,7 +1250,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithSlmSizeExceedingLocalMemo
Mock<::L0::KernelImp> kernel;
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
kernel.module = pMockModule.get();
kernel.setModule(pMockModule.get());
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
@@ -1367,7 +1367,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
Mock<::L0::KernelImp> kernel;
std::unique_ptr<Module> pMockModule = std::make_unique<Mock<Module>>(device, nullptr);
kernel.module = pMockModule.get();
kernel.setModule(pMockModule.get());
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);

View File

@@ -275,7 +275,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
@@ -304,7 +304,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelWithPrintfBufferCrea
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
kernel->privateState.pImplicitArgs.reset(new ImplicitArgs());
@@ -342,7 +342,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenApp
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
ze_event_desc_t eventDesc = {};
@@ -369,7 +369,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = true;
kernel->createPrintfBuffer();
@@ -403,7 +403,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelAndPrintfBufferForSt
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
kernel->privateState.pImplicitArgs.reset(new ImplicitArgs());
@@ -446,7 +446,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenApp
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->setModule(&module);
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
ze_event_desc_t eventDesc = {};

View File

@@ -74,7 +74,13 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
return const_cast<NEO::KernelDescriptor &>(this->sharedState->kernelImmData->getDescriptor());
}
WhiteBoxKernelHw() : ::L0::KernelHw<gfxCoreFamily>(nullptr) {}
void setModule(Module *module) {
this->module = module;
DEBUG_BREAK_IF(!this->sharedState);
this->sharedState->module = module;
}
WhiteBoxKernelHw() : ::L0::KernelHw<gfxCoreFamily>() {}
};
using KernelInitTest = Test<ModuleImmutableDataFixture>;
@@ -267,7 +273,7 @@ using SetKernelArgCacheTest = Test<ModuleFixture>;
TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSetArgBufferWithAllocOnlyCalledIfNeeded) {
MockKernelWithCallTracking mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
mockKernel.initialize(&desc);
@@ -386,7 +392,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeEnabledWhenSett
mockKernel.descriptor.kernelAttributes.simdSize = 1;
mockKernel.privateState.kernelRequiresGenerationOfLocalIdsByRuntime = true; // although it is enabled for SIMD 1, make sure it is enforced
mockKernel.descriptor.kernelAttributes.numLocalIdChannels = 3;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
const auto &device = mockModule.getDevice();
auto grfSize = device->getHwInfo().capabilityTable.grfSize;
auto numGrf = GrfConfig::defaultGrfNumber;
@@ -434,7 +440,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenLocalIdGenerationByRuntimeDisabledWhenSet
Mock<KernelImp> mockKernel;
Mock<Module> mockModule(this->device, nullptr);
mockKernel.descriptor.kernelAttributes.simdSize = 1;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.privateState.kernelRequiresGenerationOfLocalIdsByRuntime = false;
uint32_t groupSize[3] = {2, 3, 5};
@@ -451,7 +457,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenIncorrectGroupSizeDimensionWhenSettingGro
for (auto i = 0u; i < 3u; i++) {
mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
}
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
uint32_t groupSize[3] = {1, 1, 1};
mockKernel.privateState.groupSize[0] = 0;
@@ -465,7 +471,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
for (auto i = 0u; i < 3u; i++) {
mockKernel.descriptor.kernelAttributes.requiredWorkgroupSize[i] = 2;
}
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
uint32_t groupSize[3] = {0, 0, 0};
auto ret = mockKernel.setGroupSize(groupSize[0], groupSize[1], groupSize[2]);
@@ -475,7 +481,7 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
MockKernelWithCallTracking mockKernel;
Mock<Module> mockModule(this->device, nullptr);
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
// First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
@@ -2474,7 +2480,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenBindlessKernelAndNoGlobalBindlessAlloc
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
EXPECT_FALSE(mockKernel.privateState.isBindlessOffsetSet[0]);
@@ -2488,7 +2494,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessTh
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
@@ -2526,7 +2532,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSurf
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
@@ -2574,7 +2580,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenMisalignedAndAlignedBufferAddressWhenS
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
@@ -2623,7 +2629,7 @@ HWTEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThe
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
@@ -2658,7 +2664,7 @@ HWTEST_F(KernelImpL3CachingTests, GivenKernelImpWhenSetSurfaceStateWithUnaligned
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
@@ -2699,7 +2705,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithA
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2718,7 +2724,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAl
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2737,7 +2743,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetA
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2758,7 +2764,7 @@ TEST_F(KernelBindlessUncachedMemoryTests, givenBindlessKernelAndAllocDataNoTfoun
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2777,7 +2783,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2827,7 +2833,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2879,7 +2885,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -2930,7 +2936,7 @@ TEST_F(KernelBindlessUncachedMemoryTests,
desc.pKernelName = kernelName.c_str();
MyMockKernel mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
@@ -3333,7 +3339,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCal
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -3427,7 +3433,7 @@ HWTEST_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgBuff
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
@@ -3452,7 +3458,7 @@ HWTEST_F(SetKernelArg, givenSlmPointerWhenSettingKernelArgThenPropertyIsSaved) {
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<FamilyType::gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.setModule(module.get());
mockKernel.initialize(&desc);
{
@@ -3767,7 +3773,7 @@ TEST_F(KernelPrintHandlerTest, whenPrintPrintfOutputIsCalledThenPrintfBufferIsUs
desc.pKernelName = kernelName.c_str();
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->initialize(&desc);
EXPECT_FALSE(kernel->sharedState->printfBuffer == nullptr);
@@ -3816,7 +3822,7 @@ TEST_F(PrintfTest, givenKernelWithPrintfThenPrintfBufferIsCreated) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
EXPECT_TRUE(mockKernel.getImmutableData()->getDescriptor().kernelAttributes.flags.usesPrintf);
@@ -3828,7 +3834,7 @@ TEST_F(PrintfTest, GivenKernelNotUsingPrintfWhenCreatingPrintfBufferThenAllocati
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = false;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.createPrintfBuffer();
EXPECT_EQ(nullptr, mockKernel.getPrintfBufferAllocation());
@@ -3838,7 +3844,7 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContain
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.createPrintfBuffer();
@@ -3853,7 +3859,7 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.stateless = 0;
mockKernel.descriptor.payloadMappings.implicitArgs.printfSurfaceAddress.pointerSize = sizeof(uintptr_t);
@@ -4224,7 +4230,7 @@ using BindlessKernelTest = Test<DeviceFixture>;
TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4288,7 +4294,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4339,7 +4345,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatc
TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4365,7 +4371,7 @@ TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThre
TEST_F(BindlessKernelTest, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCrossThreadDataThenMemoryIsNotPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4393,7 +4399,7 @@ TEST_F(BindlessKernelTest, givenGlobalBindlessAllocatorAndBindlessKernelWithImpl
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4467,7 +4473,7 @@ TEST(KernelImmutableDataTest, givenBindlessKernelWhenInitializingImmDataThenSshT
TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
@@ -4531,7 +4537,7 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.setModule(&mockModule);
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;

View File

@@ -230,7 +230,7 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenAssignmentOperatorUsedThenPrope
EXPECT_EQ(state3.kernelHasIndirectAccess, state2.kernelHasIndirectAccess);
}
TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssignedAndCloneOriginMarked) {
TEST_F(KernelImpTest, GivenKernelPrivateStateWhenKernelImpClonedThenSharedStateIsSharedAndPrivateIsCopied) {
NEO::KernelDescriptor descriptor;
WhiteBox<::L0::KernelImmutableData> kernelInfo{};
kernelInfo.kernelDescriptor = &descriptor;
@@ -247,23 +247,18 @@ TEST_F(KernelImpTest, GivenKernelMutableStateWhenKernelImpClonedThenStateAssigne
kernel1.privateState.reservePerThreadDataForWholeThreadGroup(mockSize);
std::memcpy(kernel1.privateState.perThreadDataForWholeThreadGroup, std::to_array<uint8_t>({81, 82, 83, 84, 85, 86, 87, 88}).data(), mockSize);
// This state overrides the state of kernel1's clone
KernelMutableState privateState;
fillKernelMutableStateWithMockData(privateState);
// No need to check each and every member again
EXPECT_NE(0, std::memcmp(kernel1.privateState.crossThreadData.data(), privateState.crossThreadData.data(), mockSize));
EXPECT_NE(0, std::memcmp(kernel1.privateState.perThreadDataForWholeThreadGroup, privateState.perThreadDataForWholeThreadGroup, mockSize));
auto clonedKernel = kernel1.cloneWithStateOverride(&privateState);
EXPECT_NE(nullptr, kernel1.ownedSharedState.get());
EXPECT_EQ(kernel1.sharedState, kernel1.ownedSharedState.get());
auto clonedKernel = kernel1.makeDependentClone();
auto kernel2 = static_cast<WhiteBox<KernelImp> *>(clonedKernel.get());
EXPECT_EQ(nullptr, kernel2->ownedSharedState.get());
EXPECT_EQ(kernel2->sharedState, kernel1.ownedSharedState.get());
// KernelMutableState part taken from `state`
EXPECT_EQ(0, std::memcmp(kernel2->privateState.crossThreadData.data(), privateState.crossThreadData.data(), mockSize));
EXPECT_EQ(0, std::memcmp(kernel2->privateState.perThreadDataForWholeThreadGroup, privateState.perThreadDataForWholeThreadGroup, mockSize));
EXPECT_EQ(0, std::memcmp(kernel2->privateState.crossThreadData.data(), kernel1.privateState.crossThreadData.data(), mockSize));
EXPECT_EQ(0, std::memcmp(kernel2->privateState.perThreadDataForWholeThreadGroup, kernel1.privateState.perThreadDataForWholeThreadGroup, mockSize));
// KernelImp part taken from `kernel1`
EXPECT_EQ(kernel2->cloneOrigin, &kernel1);
EXPECT_EQ(kernel2->sharedState->kernelImmData, &kernelInfo);
EXPECT_EQ(kernel2->sharedState->devicePrintfKernelMutex, kernel1.sharedState->devicePrintfKernelMutex);
EXPECT_EQ(kernel2->sharedState->privateMemoryGraphicsAllocation, kernel1.sharedState->privateMemoryGraphicsAllocation);

View File

@@ -4321,7 +4321,7 @@ struct MultipleDevicePeerAllocationTest : public ::testing::Test {
desc.pKernelName = kernelName.c_str();
kernel = std::make_unique<WhiteBox<::L0::KernelImp>>();
kernel->module = module.get();
kernel->setModule(module.get());
kernel->initialize(&desc);
}

View File

@@ -3864,7 +3864,7 @@ HWTEST_F(PrintfModuleTest, GivenModuleWithPrintfWhenKernelIsCreatedThenPrintfAll
auto kernel = std::make_unique<Mock<KernelImp>>();
ASSERT_NE(nullptr, kernel);
kernel->module = module.get();
kernel->setModule(module.get());
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "test";
kernel->initialize(&kernelDesc);

View File

@@ -11,7 +11,7 @@
#include "shared/source/helpers/string.h"
#include "shared/source/utilities/stackvec.h"
#include "level_zero/core/source/kernel/kernel_mutable_state.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/ze_api.h"
#include "level_zero/ze_intel_gpu.h"
@@ -105,12 +105,10 @@ struct ExternalCbEventInfoContainer {
struct ClosureExternalStorage {
using EventsListId = uint32_t;
using KernelStateId = uint32_t;
using ImageRegionId = uint32_t;
using CopyRegionId = uint32_t;
static constexpr EventsListId invalidEventsListId = std::numeric_limits<EventsListId>::max();
static constexpr KernelStateId invalidKernelStateId = std::numeric_limits<KernelStateId>::max();
static constexpr ImageRegionId invalidImageRegionId = std::numeric_limits<ImageRegionId>::max();
static constexpr CopyRegionId invalidCopyRegionId = std::numeric_limits<CopyRegionId>::max();
@@ -123,12 +121,6 @@ struct ClosureExternalStorage {
return static_cast<EventsListId>(ret);
}
KernelStateId registerKernelState(KernelMutableState &&state) {
auto ret = kernelStates.size();
kernelStates.push_back(std::move(state));
return static_cast<KernelStateId>(ret);
}
ImageRegionId registerImageRegion(const ze_image_region_t *imageRegion) {
if (nullptr == imageRegion) {
return invalidImageRegionId;
@@ -161,13 +153,6 @@ struct ClosureExternalStorage {
return waitEvents.data() + id;
}
KernelMutableState *getKernelMutableState(KernelStateId id) {
if (invalidKernelStateId == id) {
return nullptr;
}
return kernelStates.data() + id;
}
ze_image_region_t *getImageRegion(ImageRegionId id) {
if (invalidImageRegionId == id) {
return nullptr;
@@ -184,7 +169,6 @@ struct ClosureExternalStorage {
protected:
std::vector<ze_event_handle_t> waitEvents;
std::vector<KernelMutableState> kernelStates;
std::vector<ze_image_region_t> imageRegions;
std::vector<ze_copy_region_t> copyRegions;
};
@@ -818,7 +802,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernel> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ze_group_count_t launchKernelArgs;
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::unique_ptr<KernelImp> capturedKernel;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
@@ -842,7 +826,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ze_group_count_t launchKernelArgs;
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::unique_ptr<KernelImp> capturedKernel;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
@@ -865,7 +849,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::unique_ptr<KernelImp> capturedKernel;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
@@ -890,7 +874,7 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ClosureExternalStorage::KernelStateId firstKernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::vector<std::unique_ptr<KernelImp>> capturedKernels;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
@@ -914,10 +898,12 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
IndirectArgs(IndirectArgs &&) = default;
IndirectArgs &operator=(IndirectArgs &&) = default;
~IndirectArgs();
ze_group_count_t groupCounts;
void *pNext;
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::unique_ptr<KernelImp> capturedKernel;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
@@ -943,9 +929,11 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments> {
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
IndirectArgs(IndirectArgs &&) = default;
IndirectArgs &operator=(IndirectArgs &&) = default;
~IndirectArgs();
void *pNext;
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
std::unique_ptr<KernelImp> capturedKernel;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}

View File

@@ -272,77 +272,54 @@ ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemoryExt>::inst
Closure<CaptureApi::zeCommandListAppendLaunchKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
this->launchKernelArgs = *apiArgs.launchKernelArgs;
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
this->capturedKernel = kernel->makeDependentClone();
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
auto result = zeCommandListAppendLaunchKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
auto result = zeCommandListAppendLaunchKernel(&executionTarget, kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
return result;
}
Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
this->launchKernelArgs = *apiArgs.launchKernelArgs;
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
this->capturedKernel = kernel->makeDependentClone();
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
auto result = zeCommandListAppendLaunchCooperativeKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
auto result = zeCommandListAppendLaunchCooperativeKernel(&executionTarget, kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
return result;
}
Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
this->capturedKernel = kernel->makeDependentClone();
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
auto result = zeCommandListAppendLaunchKernelIndirect(&executionTarget, kernelClone.get(), apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
auto result = zeCommandListAppendLaunchKernelIndirect(&executionTarget, kernelHandle, apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
return result;
}
Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
this->capturedKernels.reserve(apiArgs.numKernels);
for (uint32_t i{0U}; i < apiArgs.numKernels; ++i) {
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.phKernels[i]));
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
const auto id = externalStorage.registerKernelState(std::move(stateSnapshot));
if (i == 0U) {
this->firstKernelStateId = id;
}
this->capturedKernels.emplace_back(kernel->makeDependentClone().release());
}
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchMultipleKernelsIndirect>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
std::vector<decltype(std::declval<KernelImp *>()->cloneWithStateOverride(nullptr))> kernelClonesOwner(apiArgs.numKernels);
std::vector<ze_kernel_handle_t> phKernelClones(apiArgs.numKernels);
for (uint32_t i{0U}; i < apiArgs.numKernels; ++i) {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.phKernels[i]));
DEBUG_BREAK_IF(nullptr == kernelOrig);
const auto kernelStateId = this->indirectArgs.firstKernelStateId + i;
kernelClonesOwner[i] = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(kernelStateId));
phKernelClones[i] = kernelClonesOwner[i].get();
phKernelClones[i] = this->indirectArgs.capturedKernels[i].get();
}
auto result = zeCommandListAppendLaunchMultipleKernelsIndirect(&executionTarget, apiArgs.numKernels, phKernelClones.data(), apiArgs.pCountBuffer, apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
@@ -357,8 +334,7 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
this->capturedKernel = kernel->makeDependentClone();
}
Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs::~IndirectArgs() {
@@ -366,11 +342,8 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::IndirectArgs
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelClone.get(), &indirectArgs.groupCounts, indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelHandle, &indirectArgs.groupCounts, indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
return result;
}
@@ -382,11 +355,7 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs:
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
result = CommandList::setKernelState(kernel, apiArgs.groupSizes, apiArgs.pArguments);
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
L0::KernelMutableState stateSnapshot = kernel->getPrivateState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
this->capturedKernel = kernel->makeDependentClone();
}
Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs::~IndirectArgs() {
@@ -394,11 +363,8 @@ Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::IndirectArgs:
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelWithArguments>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage, ExternalCbEventInfoContainer &externalCbEventStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelClone.get(), &apiArgs.groupCounts, this->indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
auto *kernelHandle = this->indirectArgs.capturedKernel.get();
auto result = zeCommandListAppendLaunchKernelWithParameters(&executionTarget, kernelHandle, &apiArgs.groupCounts, this->indirectArgs.pNext, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
handleExternalCbEvent(L0::Event::fromHandle(apiArgs.hSignalEvent), externalCbEventStorage);
return result;
}

View File

@@ -9,7 +9,6 @@
#include "shared/source/utilities/stackvec.h"
#include "level_zero/core/source/kernel/kernel_mutable_state.h"
#include "level_zero/ze_api.h"
#include "graph_captured_apis.h"