feature: Add new API to append kernel with support to additional parameters

Related-To: NEO-15565,GSD-10660
Signed-off-by: Aleksandra Nizio <aleksandra.nizio@intel.com>
This commit is contained in:
Aleksandra Nizio
2025-08-19 15:09:02 +00:00
committed by Compute-Runtime-Automation
parent d6bd0262e9
commit 9a0a52d46d
14 changed files with 163 additions and 10 deletions

View File

@@ -173,6 +173,25 @@ ze_result_t zeCommandListAppendLaunchKernelWithArguments(
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelWithArguments(hKernel, groupCounts, groupSizes, pArguments, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t zeCommandListAppendLaunchKernelWithParameters(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
if (!hCommandList) {
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
}
auto cmdList = L0::CommandList::fromHandle(hCommandList);
auto ret = cmdList->capture<CaptureApi::zeCommandListAppendLaunchKernelWithParameters>(hCommandList, hKernel, pGroupCounts, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
if (ret != ZE_RESULT_ERROR_NOT_AVAILABLE) {
return ret;
}
return cmdList->appendLaunchKernelWithParameters(hKernel, pGroupCounts, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
}
} // namespace L0
extern "C" {
@@ -322,4 +341,16 @@ ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithArguments(
ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernelWithArguments(hCommandList, hKernel, groupCounts, groupSizes, pArguments, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
}
} // extern "C"
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithParameters(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernelWithParameters(
hCommandList, hKernel, pGroupCounts, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
}
} // extern "C"

View File

@@ -139,6 +139,13 @@ struct CommandList : _ze_command_list_handle_t {
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchKernelWithParameters(ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size,
ze_memory_advice_t advice) = 0;
virtual ze_result_t executeMemAdvise(ze_device_handle_t hDevice,

View File

@@ -8,10 +8,10 @@
#include "shared/source/command_container/command_encoder.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/cmdlist/cmdlist_launch_params.h"
#include "level_zero/ze_intel_gpu.h"
namespace L0 {
struct CmdListKernelLaunchParams;
void CommandList::setAdditionalDispatchKernelArgsFromLaunchParams(NEO::EncodeDispatchKernelArgs &dispatchKernelArgs, const CmdListKernelLaunchParams &launchParams) const {
}
@@ -23,6 +23,15 @@ ze_result_t CommandList::validateLaunchParams(const Kernel &kernel, const CmdLis
}
ze_result_t CommandList::obtainLaunchParamsFromExtensions(const ze_base_desc_t *desc, CmdListKernelLaunchParams &launchParams, ze_kernel_handle_t kernelHandle) const {
while (desc) {
if (desc->stype == ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC) {
auto cooperativeDesc = reinterpret_cast<const ze_command_list_append_launch_kernel_param_cooperative_desc_t *>(desc);
launchParams.isCooperative = cooperativeDesc->isCooperative;
} else {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
desc = reinterpret_cast<const ze_base_desc_t *>(desc->pNext);
}
return ZE_RESULT_SUCCESS;
}

View File

@@ -139,6 +139,14 @@ struct CommandListCoreFamily : public CommandListImp {
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchKernelWithParameters(ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemAdvise(ze_device_handle_t hDevice,
const void *ptr, size_t size,
ze_memory_advice_t advice) override;

View File

@@ -635,17 +635,43 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithArgument
return result;
}
}
return this->appendLaunchKernelWithParameters(hKernel, &groupCounts, pNext, hSignalEvent, numWaitEvents, phWaitEvents);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParameters(
ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
if (hKernel == nullptr) {
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
}
auto kernel = L0::Kernel::fromHandle(hKernel);
if (kernel == nullptr) {
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
}
if (pGroupCounts == nullptr) {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
if ((phWaitEvents == nullptr) && (numWaitEvents > 0)) {
return ZE_RESULT_ERROR_INVALID_SIZE;
}
L0::CmdListKernelLaunchParams launchParams = {};
launchParams.skipInOrderNonWalkerSignaling = this->skipInOrderNonWalkerSignalingAllowed(hSignalEvent);
result = this->obtainLaunchParamsFromExtensions(reinterpret_cast<const ze_base_desc_t *>(pNext), launchParams, hKernel);
auto result = this->obtainLaunchParamsFromExtensions(reinterpret_cast<const ze_base_desc_t *>(pNext), launchParams, hKernel);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
return this->appendLaunchKernel(hKernel, groupCounts, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
return this->appendLaunchKernel(hKernel, *pGroupCounts, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -40,6 +40,7 @@ void *ExtensionFunctionAddressHelper::getExtensionFunctionAddress(const std::str
RETURN_FUNC_PTR_IF_EXIST(zeDeviceGetPriorityLevels);
RETURN_FUNC_PTR_IF_EXIST(zeCommandListAppendLaunchKernelWithArguments);
RETURN_FUNC_PTR_IF_EXIST(zeCommandListAppendLaunchKernelWithParameters);
RETURN_FUNC_PTR_IF_EXIST(zexKernelGetBaseAddress);
RETURN_FUNC_PTR_IF_EXIST(zexKernelGetArgumentSize);

View File

@@ -22,10 +22,10 @@ namespace LevelZeroBlackBoxTests {
decltype(&zerGetDefaultContext) zerGetDefaultContextFunc = nullptr;
decltype(&zeDeviceSynchronize) zeDeviceSynchronizeFunc = nullptr;
decltype(&zeCommandListAppendLaunchKernelWithArguments) zeCommandListAppendLaunchKernelWithArgumentsFunc = nullptr;
decltype(&zeCommandListAppendLaunchKernelWithParameters) zeCommandListAppendLaunchKernelWithParametersFunc = nullptr;
decltype(&zerTranslateIdentifierToDeviceHandle) zerTranslateIdentifierToDeviceHandleFunc = nullptr;
decltype(&zerTranslateDeviceHandleToIdentifier) zerTranslateDeviceHandleToIdentifierFunc = nullptr;
decltype(&zerGetLastErrorDescription) zerGetLastErrorDescriptionFunc = nullptr;
struct LoadedDriverExtensions {
std::vector<ze_driver_extension_properties_t> extensions;
bool loaded = false;
@@ -461,6 +461,8 @@ std::vector<ze_device_handle_t> zelloInitContextAndGetDevices(ze_context_handle_
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zerGetDefaultContext", reinterpret_cast<void **>(&zerGetDefaultContextFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zeDeviceSynchronize", reinterpret_cast<void **>(&zeDeviceSynchronizeFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zeCommandListAppendLaunchKernelWithArguments", reinterpret_cast<void **>(&zeCommandListAppendLaunchKernelWithArgumentsFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zeCommandListAppendLaunchKernelWithParameters", reinterpret_cast<void **>(&zeCommandListAppendLaunchKernelWithParametersFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zerTranslateIdentifierToDeviceHandle", reinterpret_cast<void **>(&zerTranslateIdentifierToDeviceHandleFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zerTranslateDeviceHandleToIdentifier", reinterpret_cast<void **>(&zerTranslateDeviceHandleToIdentifierFunc)));
SUCCESS_OR_TERMINATE(zeDriverGetExtensionFunctionAddress(driverHandle, "zerGetLastErrorDescription", reinterpret_cast<void **>(&zerGetLastErrorDescriptionFunc)));

View File

@@ -25,10 +25,10 @@ inline void validate(ResulT result, const char *message);
extern decltype(&zerGetDefaultContext) zerGetDefaultContextFunc;
extern decltype(&zeDeviceSynchronize) zeDeviceSynchronizeFunc;
extern decltype(&zeCommandListAppendLaunchKernelWithArguments) zeCommandListAppendLaunchKernelWithArgumentsFunc;
extern decltype(&zeCommandListAppendLaunchKernelWithParameters) zeCommandListAppendLaunchKernelWithParametersFunc;
extern decltype(&zerTranslateIdentifierToDeviceHandle) zerTranslateIdentifierToDeviceHandleFunc;
extern decltype(&zerTranslateDeviceHandleToIdentifier) zerTranslateDeviceHandleToIdentifierFunc;
extern decltype(&zerGetLastErrorDescription) zerGetLastErrorDescriptionFunc;
} // namespace LevelZeroBlackBoxTests
#define SUCCESS_OR_TERMINATE(CALL) LevelZeroBlackBoxTests::validate<true>(CALL, #CALL)

View File

@@ -395,6 +395,12 @@ struct Mock<CommandList> : public CommandList {
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendLaunchKernelWithParameters, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hKernel,
const ze_group_count_t *pGroupCounts,
const void *pNext, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendSoftwareTag, ze_result_t, ZE_RESULT_SUCCESS,
(const char *data));

View File

@@ -445,6 +445,23 @@ HWTEST_F(CommandListTest, givenComputeCommandListAnd2dRegionWhenMemoryCopyRegion
EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory);
}
HWTEST_F(CommandListTest, givenCooperativeDescriptorWithTrueValueWhenObtainLaunchParamsFromExtensionsIsCalledThenIsCooperativeIsSet) {
L0::CmdListKernelLaunchParams launchParams = {};
ze_command_list_append_launch_kernel_param_cooperative_desc_t cooperativeDesc = {};
cooperativeDesc.stype = static_cast<ze_structure_type_t>(ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC);
cooperativeDesc.pNext = nullptr;
cooperativeDesc.isCooperative = true;
ze_base_desc_t *desc = reinterpret_cast<ze_base_desc_t *>(&cooperativeDesc);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
ze_result_t result = commandList->obtainLaunchParamsFromExtensions(desc, launchParams, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(launchParams.isCooperative);
}
HWTEST_F(CommandListTest, givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInUsmHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);

View File

@@ -1277,6 +1277,7 @@ TEST_F(DriverExperimentalApiTest, whenRetrievingApiFunctionThenExpectProperPoint
decltype(&zeDeviceSynchronize) expectedZeDeviceSynchronize = zeDeviceSynchronize;
decltype(&zeCommandListAppendLaunchKernelWithArguments) expectedZeCommandListAppendLaunchKernelWithArguments = zeCommandListAppendLaunchKernelWithArguments;
decltype(&zeCommandListAppendLaunchKernelWithParameters) expectedZeCommandListAppendLaunchKernelWithParameters = zeCommandListAppendLaunchKernelWithParameters;
decltype(&zexKernelGetBaseAddress) expectedKernelGetBaseAddress = L0::zexKernelGetBaseAddress;
decltype(&zeIntelGetDriverVersionString) expectedIntelGetDriverVersionString = zeIntelGetDriverVersionString;
@@ -1322,6 +1323,9 @@ TEST_F(DriverExperimentalApiTest, whenRetrievingApiFunctionThenExpectProperPoint
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDriverGetExtensionFunctionAddress(driverHandle, "zeCommandListAppendLaunchKernelWithArguments", &funPtr));
EXPECT_EQ(expectedZeCommandListAppendLaunchKernelWithArguments, reinterpret_cast<decltype(&zeCommandListAppendLaunchKernelWithArguments)>(funPtr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDriverGetExtensionFunctionAddress(driverHandle, "zeCommandListAppendLaunchKernelWithParameters", &funPtr));
EXPECT_EQ(expectedZeCommandListAppendLaunchKernelWithParameters, reinterpret_cast<decltype(&zeCommandListAppendLaunchKernelWithParameters)>(funPtr));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeDriverGetExtensionFunctionAddress(driverHandle, "zexKernelGetBaseAddress", &funPtr));
EXPECT_EQ(expectedKernelGetBaseAddress, reinterpret_cast<decltype(&zexKernelGetBaseAddress)>(funPtr));

View File

@@ -41,6 +41,7 @@ struct Event;
RR_CAPTURED_API(zeCommandListAppendLaunchKernel) \
RR_CAPTURED_API(zeCommandListAppendLaunchCooperativeKernel) \
RR_CAPTURED_API(zeCommandListAppendLaunchKernelIndirect) \
RR_CAPTURED_API(zeCommandListAppendLaunchKernelWithParameters) \
RR_CAPTURED_API(zeCommandListAppendLaunchMultipleKernelsIndirect) \
RR_CAPTURED_API(zeCommandListAppendSignalExternalSemaphoreExt) \
RR_CAPTURED_API(zeCommandListAppendWaitExternalSemaphoreExt) \

View File

@@ -608,6 +608,46 @@ ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithArguments(
uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching
ze_event_handle_t *phWaitEvents); ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait on before launching
///////////////////////////////////////////////////////////////////////////////
/// @brief Extension descriptor for cooperative kernel launch via pNext chain.
/// @details
/// - This structure can be passed through pNext to zeCommandListAppendLaunchKernelWithParameters
typedef struct _ze_command_list_append_launch_kernel_param_cooperative_desc_t {
ze_structure_type_ext_t stype; ///< [in] Type of this structure (e.g. ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC)
const void *pNext; ///< [in][optional] Pointer to the next extension-specific structure
ze_bool_t isCooperative; ///< [in] Indicates if the kernel should be launched as cooperative
} ze_command_list_append_launch_kernel_param_cooperative_desc_t;
/// @brief Append with parameters
///
/// @details
/// - The application may call this function from simultaneous threads.
/// - The implementation of this function should be lock-free.
/// - Appends kernel to command list with additional parameters via pNext chain.
/// - Allows passing core and extension descriptors (e.g. cooperative kernel).
///
/// @returns
/// - ::ZE_RESULT_SUCCESS
/// - ::ZE_RESULT_ERROR_DEVICE_LOST
/// - ::ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
/// - ::ZE_RESULT_ERROR_UNSUPPORTED_FEATURE
/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `nullptr == hCommandList`
/// + `nullptr == hKernel`
/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
/// + `nullptr == pGroupCounts`
/// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT
/// - ::ZE_RESULT_ERROR_INVALID_SIZE
/// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)`
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelWithParameters(
ze_command_list_handle_t hCommandList, ///< [in] handle of the command list
ze_kernel_handle_t hKernel, ///< [in] handle of the kernel object
const ze_group_count_t *pGroupCounts, ///< [in] thread group launch arguments
const void *pNext, ///< [in][optional] additional parameters (pNext chain)
ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion
uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before launching
ze_event_handle_t *phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait on before launching
);
///////////////////////////////////////////////////////////////////////////////
/// @brief Retrieves a string describing the last error code returned by the
/// default driver in the current thread.
@@ -665,7 +705,5 @@ const ze_command_queue_desc_t defaultCommandQueueDesc = {
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, // mode
ZE_COMMAND_QUEUE_PRIORITY_NORMAL // priority
};
#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
#endif

View File

@@ -21,6 +21,9 @@ using zes_structure_type_ext_t = uint32_t;
#define ZE_STRUCTURE_TYPE_INTEL_MEDIA_COMMUNICATION_DESC static_cast<ze_structure_type_ext_t>(0x00020021)
#define ZE_STRUCTURE_TYPE_INTEL_MEDIA_DOORBELL_HANDLE_DESC static_cast<ze_structure_type_ext_t>(0x00020022)
#define ZE_STRUCTURE_TYPE_INTEL_DEVICE_MEDIA_EXP_PROPERTIES static_cast<ze_structure_type_ext_t>(0x00020023)
#if ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
#define ZE_STRUCTURE_TYPE_COMMAND_LIST_APPEND_PARAM_COOPERATIVE_DESC static_cast<ze_structure_type_ext_t>(0x00020036)
#endif // ZE_API_VERSION_CURRENT_M <= ZE_MAKE_VERSION(1, 13)
#define ZEX_STRUCTURE_TYPE_LABEL_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030002)
#define ZEX_STRUCTURE_TYPE_OPERAND_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030003)
#define ZEX_STRUCTURE_TYPE_VARIABLE_DESCRIPTOR static_cast<ze_structure_type_ext_t>(0x00030004)