feature: R&R support for further appendLaunchKernel variants 1/2

Support:
`zeCommandListAppendLaunchCooperativeKernel()`
`zeCommandListAppendLaunchKernelIndirect()`

Related-To: NEO-15374
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
This commit is contained in:
Maciej Bielski
2025-08-06 16:16:45 +00:00
committed by Compute-Runtime-Automation
parent f7ed43c454
commit 73ea13fb4c
4 changed files with 223 additions and 78 deletions

View File

@@ -238,37 +238,13 @@ void testMultiGraph(ze_driver_handle_t driver, ze_context_handle_t &context, ze_
SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPool));
}
void testAppendLaunchKernel(ze_driver_handle_t driver, ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet) {
auto graphApi = loadGraphApi(driver);
if (false == graphApi.valid()) {
std::cerr << "Graph API not available" << std::endl;
validRet = false;
return;
}
// Buffers
constexpr size_t allocSize = 4096;
void *srcBuffer = nullptr;
void *interimBuffer = nullptr;
void *dstBuffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
.pNext = nullptr,
.flags = 0,
.ordinal = 0,
};
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &interimBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &dstBuffer));
inline void createModuleFromSpirV(ze_context_handle_t context, ze_device_handle_t device, const char *kernelSrc, ze_module_handle_t &module) {
// SpirV for a kernel
std::string buildLog;
auto moduleBinary = LevelZeroBlackBoxTests::compileToSpirV(LevelZeroBlackBoxTests::memcpyBytesTestKernelSrc, "", buildLog);
auto moduleBinary = LevelZeroBlackBoxTests::compileToSpirV(kernelSrc, "", buildLog);
LevelZeroBlackBoxTests::printBuildLog(buildLog);
SUCCESS_OR_TERMINATE((0 == moduleBinary.size()));
// Module
ze_module_handle_t module;
ze_module_desc_t moduleDesc = {
.stype = ZE_STRUCTURE_TYPE_MODULE_DESC,
.pNext = nullptr,
@@ -277,17 +253,124 @@ void testAppendLaunchKernel(ze_driver_handle_t driver, ze_context_handle_t &cont
.pInputModule = reinterpret_cast<const uint8_t *>(moduleBinary.data()),
};
SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
}
// Kernel
ze_kernel_handle_t kernel;
inline void createKernelWithName(ze_module_handle_t module, const char *kernelName, ze_kernel_handle_t &kernel) {
ze_kernel_desc_t kernelDesc = {
.stype = ZE_STRUCTURE_TYPE_KERNEL_DESC,
.pNext = nullptr,
.flags = 0,
.pKernelName = "memcpy_bytes",
.pKernelName = kernelName,
};
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
}
inline void createEventPool(ze_context_handle_t context, ze_device_handle_t device, ze_event_pool_handle_t &eventPool) {
ze_event_pool_desc_t eventPoolDesc{
.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
.pNext = nullptr,
.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
.count = 1,
};
SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool));
}
inline void createEventHostCoherent(ze_event_pool_handle_t eventPool, ze_event_handle_t &newEventHandle) {
ze_event_desc_t eventDesc{
.stype = ZE_STRUCTURE_TYPE_EVENT_DESC,
.pNext = nullptr,
.index = 0,
.signal = ZE_EVENT_SCOPE_FLAG_HOST,
.wait = ZE_EVENT_SCOPE_FLAG_HOST,
};
SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &newEventHandle));
}
inline void createImmediateCmdlistWithMode(ze_context_handle_t context,
ze_device_handle_t device,
ze_command_queue_mode_t mode,
ze_command_list_handle_t &cmdList) {
ze_command_queue_desc_t cmdQueueDesc{
.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
.pNext = nullptr,
.ordinal = LevelZeroBlackBoxTests::getCommandQueueOrdinal(device, false),
.index = 0,
.flags = 0,
.mode = mode,
.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
};
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
}
inline auto allocateDispatchTraits(ze_context_handle_t context, bool indirect) {
auto dispatchTraitsDeleter = [context, indirect](ze_group_count_t *ptr) noexcept {
if (indirect) {
zeMemFree(context, ptr);
} else {
delete ptr;
}
};
using RetUniquePtr = std::unique_ptr<ze_group_count_t, decltype(dispatchTraitsDeleter)>;
ze_group_count_t *rawPtr = nullptr;
if (indirect) {
ze_host_mem_alloc_desc_t hostAllocDesc{
.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC,
.pNext = nullptr,
.flags = 0U,
};
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostAllocDesc, sizeof(ze_group_count_t), 4096, reinterpret_cast<void **>(&rawPtr)));
} else {
rawPtr = new ze_group_count_t;
}
return RetUniquePtr(rawPtr, dispatchTraitsDeleter);
}
void testAppendLaunchKernel(ze_driver_handle_t driver,
ze_context_handle_t &context,
ze_device_handle_t &device,
bool areDispatchTraitsIndirect,
bool &validRet) {
auto graphApi = loadGraphApi(driver);
if (false == graphApi.valid()) {
std::cerr << "Graph API not available" << std::endl;
validRet = false;
return;
}
ze_module_handle_t module;
createModuleFromSpirV(context, device, LevelZeroBlackBoxTests::memcpyBytesTestKernelSrc, module);
ze_kernel_handle_t kernel;
createKernelWithName(module, "memcpy_bytes", kernel);
ze_event_pool_handle_t eventPool = nullptr;
createEventPool(context, device, eventPool);
ze_event_handle_t eventCopied = nullptr;
createEventHostCoherent(eventPool, eventCopied);
ze_command_list_handle_t cmdList;
createImmediateCmdlistWithMode(context, device, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, cmdList);
// Buffers
constexpr size_t allocSize = 4096;
void *srcBuffer = nullptr;
void *interimBuffer = nullptr;
void *dstBuffer = nullptr;
ze_device_mem_alloc_desc_t devAllocDesc = {
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
.pNext = nullptr,
.flags = 0,
.ordinal = 0,
};
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &devAllocDesc, allocSize, allocSize, device, &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &devAllocDesc, allocSize, allocSize, device, &interimBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &devAllocDesc, allocSize, allocSize, device, &dstBuffer));
// Kernel groups size
constexpr size_t bytesPerThread = sizeof(std::byte);
constexpr size_t numThreads = allocSize / bytesPerThread;
uint32_t groupSizeX = 32u;
@@ -300,40 +383,6 @@ void testAppendLaunchKernel(ze_driver_handle_t driver, ze_context_handle_t &cont
}
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
// Events
ze_event_pool_handle_t eventPool = nullptr;
ze_event_pool_desc_t eventPoolDesc{
.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
.pNext = nullptr,
.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
.count = 1,
};
SUCCESS_OR_TERMINATE(zeEventPoolCreate(context, &eventPoolDesc, 1, &device, &eventPool));
ze_event_handle_t eventCopied = nullptr;
ze_event_desc_t eventDesc{
.stype = ZE_STRUCTURE_TYPE_EVENT_DESC,
.pNext = nullptr,
.index = 0,
.signal = ZE_EVENT_SCOPE_FLAG_HOST,
.wait = ZE_EVENT_SCOPE_FLAG_HOST,
};
SUCCESS_OR_TERMINATE(zeEventCreate(eventPool, &eventDesc, &eventCopied));
// Create cmdList
ze_command_list_handle_t cmdList;
ze_command_queue_desc_t cmdQueueDesc{
.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
.pNext = nullptr,
.ordinal = LevelZeroBlackBoxTests::getCommandQueueOrdinal(device, false),
.index = 0,
.flags = 0,
.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
};
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
// Start capturing commands
ze_graph_handle_t virtualGraph = nullptr;
SUCCESS_OR_TERMINATE(graphApi.graphCreate(context, &virtualGraph, nullptr));
@@ -348,23 +397,30 @@ void testAppendLaunchKernel(ze_driver_handle_t driver, ze_context_handle_t &cont
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, dstInitData.get(), allocSize, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
ze_group_count_t dispatchTraits{
.groupCountX = static_cast<uint32_t>(numThreads) / groupSizeX,
.groupCountY = 1u,
.groupCountZ = 1u,
};
LevelZeroBlackBoxTests::printGroupCount(dispatchTraits);
SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize);
auto dispatchTraits = allocateDispatchTraits(context, areDispatchTraitsIndirect);
dispatchTraits->groupCountX = static_cast<uint32_t>(numThreads) / groupSizeX,
dispatchTraits->groupCountY = 1u,
dispatchTraits->groupCountZ = 1u,
LevelZeroBlackBoxTests::printGroupCount(*dispatchTraits);
SUCCESS_OR_TERMINATE_BOOL(dispatchTraits->groupCountX * groupSizeX == allocSize);
// Launch first copy
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(interimBuffer), &interimBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(srcBuffer), &srcBuffer));
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, eventCopied, 0, nullptr));
if (areDispatchTraitsIndirect) {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernelIndirect(cmdList, kernel, dispatchTraits.get(), eventCopied, 0, nullptr));
} else {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, dispatchTraits.get(), eventCopied, 0, nullptr));
}
// Launch second copy
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(dstBuffer), &dstBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(interimBuffer), &interimBuffer));
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, nullptr, 1, &eventCopied));
if (areDispatchTraitsIndirect) {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernelIndirect(cmdList, kernel, dispatchTraits.get(), nullptr, 1, &eventCopied));
} else {
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, dispatchTraits.get(), nullptr, 1, &eventCopied));
}
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
// Encode reading data back
@@ -376,7 +432,7 @@ void testAppendLaunchKernel(ze_driver_handle_t driver, ze_context_handle_t &cont
ze_executable_graph_handle_t physicalGraph = nullptr;
SUCCESS_OR_TERMINATE(graphApi.commandListInstantiateGraph(virtualGraph, &physicalGraph, nullptr));
// // Dispatch and wait
// Dispatch and wait
SUCCESS_OR_TERMINATE(graphApi.commandListAppendGraph(cmdList, physicalGraph, nullptr, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListHostSynchronize(cmdList, -1));
@@ -431,7 +487,11 @@ int main(int argc, char *argv[]) {
LevelZeroBlackBoxTests::printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
currentTest = "AppendLaunchKernel";
testAppendLaunchKernel(driverHandle, context, device0, outputValidationSuccessful);
testAppendLaunchKernel(driverHandle, context, device0, false, outputValidationSuccessful);
LevelZeroBlackBoxTests::printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
currentTest = "AppendLaunchKernelIndirect";
testAppendLaunchKernel(driverHandle, context, device0, true, outputValidationSuccessful);
LevelZeroBlackBoxTests::printResult(aubMode, outputValidationSuccessful, blackBoxName, currentTest);
SUCCESS_OR_TERMINATE(zeContextDestroy(context));

View File

@@ -445,16 +445,16 @@ TEST(GraphTestApiCapture, GivenCommandListInRecordStateThenCaptureCommandsInstea
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendImageCopyToMemoryExt(&cmdlist, memA, imgA, &imgRegion, 16, 16, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendImageCopyFromMemoryExt(&cmdlist, imgA, memA, &imgRegion, 16, 16, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchKernel(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchCooperativeKernel(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchKernelIndirect(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
// temporarily unsupported
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, L0::zeCommandListAppendLaunchCooperativeKernel(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, L0::zeCommandListAppendLaunchKernelIndirect(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, L0::zeCommandListAppendLaunchMultipleKernelsIndirect(&cmdlist, 1, &kernelHandle, &kernelCount, &groupCount, nullptr, 0, nullptr));
ze_graph_handle_t hgraph = &graph;
EXPECT_EQ(ZE_RESULT_SUCCESS, ::zeCommandListEndGraphCaptureExp(&cmdlist, &hgraph, nullptr));
ASSERT_EQ(22U, graph.getCapturedCommands().size());
ASSERT_EQ(24U, graph.getCapturedCommands().size());
uint32_t i = 0;
EXPECT_EQ(CaptureApi::zeCommandListAppendBarrier, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
EXPECT_EQ(CaptureApi::zeCommandListAppendMemoryCopy, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
@@ -478,6 +478,8 @@ TEST(GraphTestApiCapture, GivenCommandListInRecordStateThenCaptureCommandsInstea
EXPECT_EQ(CaptureApi::zeCommandListAppendImageCopyToMemoryExt, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
EXPECT_EQ(CaptureApi::zeCommandListAppendImageCopyFromMemoryExt, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
EXPECT_EQ(CaptureApi::zeCommandListAppendLaunchKernel, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
EXPECT_EQ(CaptureApi::zeCommandListAppendLaunchCooperativeKernel, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
EXPECT_EQ(CaptureApi::zeCommandListAppendLaunchKernelIndirect, static_cast<CaptureApi>(graph.getCapturedCommands()[i++].index()));
}
TEST(GraphForks, GivenUnknownChildCommandlistThenJoinDoesNothing) {
@@ -769,6 +771,8 @@ TEST_F(GraphTestInstantiationFixture, WhenInstantiatingGraphThenBakeCommandsInto
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendImageCopyToMemoryExt(&cmdlist, memA, imgA, &imgRegion, 16, 16, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendImageCopyFromMemoryExt(&cmdlist, imgA, memA, &imgRegion, 16, 16, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchKernel(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchCooperativeKernel(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
EXPECT_EQ(ZE_RESULT_SUCCESS, L0::zeCommandListAppendLaunchKernelIndirect(&cmdlist, kernelHandle, &groupCount, nullptr, 0, nullptr));
ze_graph_handle_t hgraph = &srcGraph;
EXPECT_EQ(ZE_RESULT_SUCCESS, ::zeCommandListEndGraphCaptureExp(&cmdlist, &hgraph, nullptr));
@@ -799,6 +803,7 @@ TEST_F(GraphTestInstantiationFixture, WhenInstantiatingGraphThenBakeCommandsInto
EXPECT_EQ(0U, graphHwCommands->appendImageCopyToMemoryExtCalled);
EXPECT_EQ(0U, graphHwCommands->appendImageCopyFromMemoryExtCalled);
EXPECT_EQ(0U, graphHwCommands->appendLaunchKernelCalled);
EXPECT_EQ(0U, graphHwCommands->appendLaunchKernelIndirectCalled);
execGraph.instantiateFrom(srcGraph);
EXPECT_EQ(1U, graphHwCommands->appendBarrierCalled);
EXPECT_EQ(1U, graphHwCommands->appendMemoryCopyCalled);
@@ -821,7 +826,8 @@ TEST_F(GraphTestInstantiationFixture, WhenInstantiatingGraphThenBakeCommandsInto
EXPECT_EQ(1U, graphHwCommands->appendWaitExternalSemaphoresCalled);
EXPECT_EQ(1U, graphHwCommands->appendImageCopyToMemoryExtCalled);
EXPECT_EQ(1U, graphHwCommands->appendImageCopyFromMemoryExtCalled);
EXPECT_EQ(1U, graphHwCommands->appendLaunchKernelCalled);
EXPECT_EQ(2U, graphHwCommands->appendLaunchKernelCalled); // +1 for zeCommandListAppendLaunchCooperativeKernel
EXPECT_EQ(1U, graphHwCommands->appendLaunchKernelIndirectCalled);
}
TEST(GraphExecution, GivenEmptyExecutableGraphWhenSubmittingItToCommandListThenTakeCareOnlyOfEvents) {

View File

@@ -772,4 +772,51 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernel> {
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
};
template <>
struct Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel> {
inline static constexpr bool isSupported = true;
struct ApiArgs {
ze_command_list_handle_t hCommandList;
ze_kernel_handle_t kernelHandle;
const ze_group_count_t *launchKernelArgs;
ze_event_handle_t hSignalEvent;
uint32_t numWaitEvents;
ze_event_handle_t *phWaitEvents;
} apiArgs;
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ze_group_count_t launchKernelArgs;
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
};
template <>
struct Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect> {
inline static constexpr bool isSupported = true;
struct ApiArgs {
ze_command_list_handle_t hCommandList;
ze_kernel_handle_t kernelHandle;
const ze_group_count_t *launchArgsBuffer;
ze_event_handle_t hSignalEvent;
uint32_t numWaitEvents;
ze_event_handle_t *phWaitEvents;
} apiArgs;
struct IndirectArgs : IndirectArgsWithWaitEvents {
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
ClosureExternalStorage::KernelStateId kernelStateId = ClosureExternalStorage::invalidKernelStateId;
} indirectArgs;
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
};
} // namespace L0

View File

@@ -237,6 +237,38 @@ ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernel>::instantiateTo(
return zeCommandListAppendLaunchKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
}
Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
this->launchKernelArgs = *apiArgs.launchKernelArgs;
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getMutableState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchCooperativeKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
return zeCommandListAppendLaunchCooperativeKernel(&executionTarget, kernelClone.get(), &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
}
Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
L0::KernelMutableState stateSnapshot = kernel->getMutableState();
this->kernelStateId = externalStorage.registerKernelState(std::move(stateSnapshot));
}
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernelIndirect>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
auto *kernelOrig = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
DEBUG_BREAK_IF(nullptr == kernelOrig);
auto kernelClone = kernelOrig->cloneWithStateOverride(externalStorage.getKernelMutableState(this->indirectArgs.kernelStateId));
return zeCommandListAppendLaunchKernelIndirect(&executionTarget, kernelClone.get(), apiArgs.launchArgsBuffer, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsList(indirectArgs.waitEvents));
}
ExecutableGraph::~ExecutableGraph() = default;
L0::CommandList *ExecutableGraph::allocateAndAddCommandListSubmissionNode() {