refactor: simplify interface of append launch kernel

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-05-14 20:42:02 +00:00
committed by Compute-Runtime-Automation
parent ffe3198b72
commit f2601d8ccf
39 changed files with 631 additions and 624 deletions

View File

@@ -112,7 +112,7 @@ struct CommandList : _ze_command_list_handle_t {
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) = 0;
CmdListKernelLaunchParams &launchParams) = 0;
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t &pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,

View File

@@ -121,7 +121,7 @@ struct CommandListCoreFamily : public CommandListImp {
const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) override;
CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t &pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,

View File

@@ -392,7 +392,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
CmdListKernelLaunchParams &launchParams) {
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -413,7 +413,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
NEO::EncodeMemoryPrefetch<GfxFamily>::programMemoryPrefetch(cmdStream, *kernel->getIsaAllocation(), static_cast<uint32_t>(kernel->getImmutableData()->getIsaSize()), kernel->getIsaOffsetInParentAllocation(), device->getNEODevice()->getRootDeviceEnvironment());
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, launchParams.outListCommands, launchParams.relaxedOrderingDispatch, true, true, launchParams.omitAddingWaitEventsResidency, false);
if (ret) {
return ret;
}
@@ -493,6 +493,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
appendEventForProfiling(event, nullptr, true, false, false, false);
launchParams.isIndirect = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams);
addToMappedEventList(event);
@@ -524,6 +525,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
launchParams.isPredicate = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
Event *event = nullptr;
if (hEvent) {
@@ -614,7 +616,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithArgument
L0::CmdListKernelLaunchParams launchParams = {};
launchParams.skipInOrderNonWalkerSignaling = this->skipInOrderNonWalkerSignalingAllowed(hSignalEvent);
return this->appendLaunchKernel(hKernel, groupCounts, hSignalEvent, numWaitEvents, phWaitEvents, launchParams, false);
return this->appendLaunchKernel(hKernel, groupCounts, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -925,10 +927,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
launchParams);
addToMappedEventList(Event::fromHandle(hEvent));
return status;
@@ -1143,9 +1146,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(allocationStruct.alloc, device->getDriverHandle()->getSvmAllocsManager());
}
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch);
event, numWaitEvents, phWaitEvents, launchParams);
addToMappedEventList(event);
addFlushRequiredCommand(allocationStruct.needsFlush, event, isCopyOnly(false), !this->l3FlushAfterPostSyncRequired);
@@ -1288,9 +1291,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
launchParams);
addToMappedEventList(event);
return status;
@@ -2110,8 +2114,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAlignedAllocation->alloc, device->getDriverHandle()->getSvmAllocsManager());
}
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), dispatchKernelArgs, signalEvent, numWaitEvents,
phWaitEvents, launchParams, relaxedOrderingDispatch);
phWaitEvents, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2181,11 +2186,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
if constexpr (CommandListCoreFamily<gfxCoreFamily>::checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstAlignedAllocation->alloc, device->getDriverHandle()->getSvmAllocsManager());
}
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(),
dispatchKernelArgs, signalEvent,
numWaitEvents,
phWaitEvents,
launchParams, relaxedOrderingDispatch);
launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -3487,7 +3493,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
launchParams.isDestinationAllocationImported = this->isAllocationImported(dstPtrAllocationStruct.alloc, device->getDriverHandle()->getSvmAllocsManager());
}
auto appendResult = appendLaunchKernel(builtinKernel->toHandle(), dispatchKernelArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams, false);
phWaitEvents, launchParams);
if (appendResult != ZE_RESULT_SUCCESS) {
return appendResult;
}

View File

@@ -66,7 +66,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) override;
CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t &pDispatchArgumentsBuffer,

View File

@@ -557,16 +557,16 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
ze_kernel_handle_t kernelHandle, const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) {
CmdListKernelLaunchParams &launchParams) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
bool relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
bool stallingCmdsForRelaxedOrdering = hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch);
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false);
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
hSignalEvent, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
launchParams);
if (launchParams.skipInOrderNonWalkerSignaling) {
auto event = Event::fromHandle(hSignalEvent);

View File

@@ -80,6 +80,7 @@ struct CmdListKernelLaunchParams {
bool omitAddingEventResidency = false;
bool omitAddingWaitEventsResidency = false;
bool makeKernelCommandView = false;
bool relaxedOrderingDispatch = false;
};
struct CmdListMemoryCopyParams {