Replace function term with kernel class specific names

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-09-09 22:29:39 +00:00 committed by Compute-Runtime-Automation
parent af7eaa0161
commit 072686f283
35 changed files with 399 additions and 399 deletions

View File

@ -65,8 +65,8 @@ ze_result_t zeModuleGetKernelNames(
ze_result_t zeKernelCreate( ze_result_t zeKernelCreate(
ze_module_handle_t hModule, ze_module_handle_t hModule,
const ze_kernel_desc_t *desc, const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) { ze_kernel_handle_t *kernelHandle) {
return L0::Module::fromHandle(hModule)->createKernel(desc, phFunction); return L0::Module::fromHandle(hModule)->createKernel(desc, kernelHandle);
} }
ze_result_t zeKernelDestroy( ze_result_t zeKernelDestroy(
@ -141,45 +141,45 @@ ze_result_t zeKernelGetProperties(
ze_result_t zeCommandListAppendLaunchKernel( ze_result_t zeCommandListAppendLaunchKernel(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
L0::CmdListKernelLaunchParams launchParams = {}; L0::CmdListKernelLaunchParams launchParams = {};
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams); return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
} }
ze_result_t zeCommandListAppendLaunchCooperativeKernel( ze_result_t zeCommandListAppendLaunchCooperativeKernel(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents); return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents);
} }
ze_result_t zeCommandListAppendLaunchKernelIndirect( ze_result_t zeCommandListAppendLaunchKernelIndirect(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(kernelHandle, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
} }
ze_result_t zeCommandListAppendLaunchMultipleKernelsIndirect( ze_result_t zeCommandListAppendLaunchMultipleKernelsIndirect(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
uint32_t numKernels, uint32_t numKernels,
ze_kernel_handle_t *phKernels, ze_kernel_handle_t *kernelHandles,
const uint32_t *pCountBuffer, const uint32_t *pCountBuffer,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
} }
ze_result_t zeKernelGetName( ze_result_t zeKernelGetName(
@ -433,15 +433,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel( ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernel( return L0::zeCommandListAppendLaunchKernel(
hCommandList, hCommandList,
hKernel, kernelHandle,
pLaunchFuncArgs, launchKernelArgs,
hSignalEvent, hSignalEvent,
numWaitEvents, numWaitEvents,
phWaitEvents); phWaitEvents);
@ -449,15 +449,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel( ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchCooperativeKernel( return L0::zeCommandListAppendLaunchCooperativeKernel(
hCommandList, hCommandList,
hKernel, kernelHandle,
pLaunchFuncArgs, launchKernelArgs,
hSignalEvent, hSignalEvent,
numWaitEvents, numWaitEvents,
phWaitEvents); phWaitEvents);
@ -465,14 +465,14 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect( ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel, ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernelIndirect( return L0::zeCommandListAppendLaunchKernelIndirect(
hCommandList, hCommandList,
hKernel, kernelHandle,
pLaunchArgumentsBuffer, pLaunchArgumentsBuffer,
hSignalEvent, hSignalEvent,
numWaitEvents, numWaitEvents,
@ -482,7 +482,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect( ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect(
ze_command_list_handle_t hCommandList, ze_command_list_handle_t hCommandList,
uint32_t numKernels, uint32_t numKernels,
ze_kernel_handle_t *phKernels, ze_kernel_handle_t *kernelHandles,
const uint32_t *pCountBuffer, const uint32_t *pCountBuffer,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
@ -491,7 +491,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndi
return L0::zeCommandListAppendLaunchMultipleKernelsIndirect( return L0::zeCommandListAppendLaunchMultipleKernelsIndirect(
hCommandList, hCommandList,
numKernels, numKernels,
phKernels, kernelHandles,
pCountBuffer, pCountBuffer,
pLaunchArgumentsBuffer, pLaunchArgumentsBuffer,
hSignalEvent, hSignalEvent,

View File

@ -30,15 +30,15 @@ CommandList::~CommandList() {
if (this->cmdListType == CommandListType::TYPE_REGULAR || !this->isFlushTaskSubmissionEnabled) { if (this->cmdListType == CommandListType::TYPE_REGULAR || !this->isFlushTaskSubmissionEnabled) {
removeHostPtrAllocations(); removeHostPtrAllocations();
} }
printfFunctionContainer.clear(); printfKernelContainer.clear();
} }
void CommandList::storePrintfFunction(Kernel *kernel) { void CommandList::storePrintfKernel(Kernel *kernel) {
auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(), auto it = std::find(this->printfKernelContainer.begin(), this->printfKernelContainer.end(),
kernel); kernel);
if (it == this->printfFunctionContainer.end()) { if (it == this->printfKernelContainer.end()) {
this->printfFunctionContainer.push_back(kernel); this->printfKernelContainer.push_back(kernel);
} }
} }
@ -131,7 +131,7 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
} }
} }
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) { NEO::PreemptionMode CommandList::obtainKernelPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor()); NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags); return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
} }

View File

@ -93,19 +93,19 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0; ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions, virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) = 0; const CmdListKernelLaunchParams &launchParams) = 0;
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0; ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0; ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels, virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments, const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent, const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
@ -219,13 +219,13 @@ struct CommandList : _ze_command_list_handle_t {
void handleIndirectAllocationResidency(); void handleIndirectAllocationResidency();
NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel); NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
std::vector<Kernel *> &getPrintfFunctionContainer() { std::vector<Kernel *> &getPrintfKernelContainer() {
return this->printfFunctionContainer; return this->printfKernelContainer;
} }
void storePrintfFunction(Kernel *kernel); void storePrintfKernel(Kernel *kernel);
void removeDeallocationContainerData(); void removeDeallocationContainerData();
void removeHostPtrAllocations(); void removeHostPtrAllocations();
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
@ -279,7 +279,7 @@ struct CommandList : _ze_command_list_handle_t {
void migrateSharedAllocations(); void migrateSharedAllocations();
ze_context_handle_t hContext = nullptr; ze_context_handle_t hContext = nullptr;
std::vector<Kernel *> printfFunctionContainer; std::vector<Kernel *> printfKernelContainer;
CommandQueue *cmdQImmediate = nullptr; CommandQueue *cmdQImmediate = nullptr;
NEO::CommandStreamReceiver *csr = nullptr; NEO::CommandStreamReceiver *csr = nullptr;
Device *device = nullptr; Device *device = nullptr;

View File

@ -80,22 +80,22 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override; ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override; const CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override; ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override; ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments, const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,

View File

@ -81,7 +81,7 @@ CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() { ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
printfFunctionContainer.clear(); printfKernelContainer.clear();
removeDeallocationContainerData(); removeDeallocationContainerData();
removeHostPtrAllocations(); removeHostPtrAllocations();
commandContainer.reset(); commandContainer.reset();
@ -202,7 +202,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programL3(bool isSLMused) {} void CommandListCoreFamily<gfxCoreFamily>::programL3(bool isSLMused) {}
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t hKernel, ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
@ -230,7 +230,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
event = Event::fromHandle(hEvent); event = Event::fromHandle(hEvent);
} }
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), threadGroupDimensions, auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
event, launchParams); event, launchParams);
if (NEO::DebugManager.flags.EnableSWTags.get()) { if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -245,8 +245,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel, ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) { ze_event_handle_t *phWaitEvents) {
@ -263,12 +263,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = true; launchParams.isCooperative = true;
return appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pLaunchFuncArgs, return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams); event, launchParams);
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
@ -287,7 +287,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
appendEventForProfiling(event, true, false); appendEventForProfiling(event, true, false);
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true; launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pDispatchArgumentsBuffer, ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams); nullptr, launchParams);
appendSignalEventPostWalker(event, false); appendSignalEventPostWalker(event, false);
@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsIndirect(uint32_t numKernels, ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments, const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
@ -325,7 +325,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true; launchParams.isIndirect = true;
launchParams.isPredicate = true; launchParams.isPredicate = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(phKernels[i]), ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr, haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
nullptr, launchParams); nullptr, launchParams);
if (ret) { if (ret) {
@ -563,12 +563,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY, ze_group_count_t kernelArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY,
pDstRegion->depth / groupSizeZ}; pDstRegion->depth / groupSizeZ};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true; launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs, return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, event, numWaitEvents, phWaitEvents,
launchParams); launchParams);
} }
@ -689,8 +689,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY, ze_group_count_t kernelArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
pSrcRegion->depth / groupSizeZ}; pSrcRegion->depth / groupSizeZ};
auto dstAllocationType = allocationStruct.alloc->getAllocationType(); auto dstAllocationType = allocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
@ -698,7 +698,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
launchParams.isDestinationAllocationInSystemMemory = launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) || (dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs, auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams); event, numWaitEvents, phWaitEvents, launchParams);
addFlushRequiredCommand(allocationStruct.needsFlush, event); addFlushRequiredCommand(allocationStruct.needsFlush, event);
@ -803,8 +803,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY, ze_group_count_t kernelArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
srcRegion.depth / groupSizeZ}; srcRegion.depth / groupSizeZ};
kernel->setArgRedescribedImage(0, hSrcImage); kernel->setArgRedescribedImage(0, hSrcImage);
kernel->setArgRedescribedImage(1, hDstImage); kernel->setArgRedescribedImage(1, hDstImage);
@ -813,7 +813,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true; launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &functionArgs, return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, event, numWaitEvents, phWaitEvents,
launchParams); launchParams);
} }
@ -907,31 +907,31 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
Kernel *builtinFunction = nullptr; Kernel *builtinKernel = nullptr;
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = builtinFunction->getImmutableData() uint32_t groupSizeX = builtinKernel->getImmutableData()
->getDescriptor() ->getDescriptor()
.kernelAttributes.simdSize; .kernelAttributes.simdSize;
uint32_t groupSizeY = 1u; uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u; uint32_t groupSizeZ = 1u;
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
builtinFunction->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc); builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
builtinFunction->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc); builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
uint64_t elems = size / elementSize; uint64_t elems = size / elementSize;
builtinFunction->setArgumentValue(2, sizeof(elems), &elems); builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
builtinFunction->setArgumentValue(3, sizeof(dstOffset), &dstOffset); builtinKernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
builtinFunction->setArgumentValue(4, sizeof(srcOffset), &srcOffset); builtinKernel->setArgumentValue(4, sizeof(srcOffset), &srcOffset);
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize)); uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
auto dstAllocationType = dstPtrAlloc->getAllocationType(); auto dstAllocationType = dstPtrAlloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
@ -942,7 +942,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
(dstAllocationType == NEO::AllocationType::SVM_CPU) || (dstAllocationType == NEO::AllocationType::SVM_CPU) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams); return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
@ -1164,9 +1164,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfilingAllWalkers(signalEvent, true); appendEventForProfilingAllWalkers(signalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) { if (ret == ZE_RESULT_SUCCESS && leftSize) {
Builtin func = Builtin::CopyBufferToBufferSide; Builtin copyKernel = Builtin::CopyBufferToBufferSide;
if (isStateless) { if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless; copyKernel = Builtin::CopyBufferToBufferSideStateless;
} }
if (isCopyOnly()) { if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1179,16 +1179,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr), reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset, srcAllocationStruct.alloc, srcAllocationStruct.offset,
leftSize, 1UL, leftSize, 1UL,
func, copyKernel,
signalEvent, signalEvent,
isStateless); isStateless);
} }
} }
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
Builtin func = Builtin::CopyBufferToBufferMiddle; Builtin copyKernel = Builtin::CopyBufferToBufferMiddle;
if (isStateless) { if (isStateless) {
func = Builtin::CopyBufferToBufferMiddleStateless; copyKernel = Builtin::CopyBufferToBufferMiddleStateless;
} }
if (isCopyOnly()) { if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1202,16 +1202,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
middleSizeBytes, middleSizeBytes,
middleElSize, middleElSize,
func, copyKernel,
signalEvent, signalEvent,
isStateless); isStateless);
} }
} }
if (ret == ZE_RESULT_SUCCESS && rightSize) { if (ret == ZE_RESULT_SUCCESS && rightSize) {
Builtin func = Builtin::CopyBufferToBufferSide; Builtin copyKernel = Builtin::CopyBufferToBufferSide;
if (isStateless) { if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless; copyKernel = Builtin::CopyBufferToBufferSideStateless;
} }
if (isCopyOnly()) { if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1224,7 +1224,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr), reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
rightSize, 1UL, rightSize, 1UL,
func, copyKernel,
signalEvent, signalEvent,
isStateless); isStateless);
} }
@ -1351,19 +1351,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeX = srcRegion->width;
uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeY = srcRegion->height;
uint32_t groupSizeZ = srcRegion->depth; uint32_t groupSizeZ = srcRegion->depth;
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
@ -1373,20 +1373,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY,
srcRegion->depth / groupSizeZ}; srcRegion->depth / groupSizeZ};
uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY), (srcRegion->originZ)};
uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY), (dstRegion->originZ)}; uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY), (dstRegion->originZ)};
uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)}; uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)};
uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)}; uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)};
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc); builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc); builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches); builtinKernel->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches); builtinKernel->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType(); auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
@ -1394,7 +1394,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
launchParams.isDestinationAllocationInSystemMemory = launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) || (dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, signalEvent, numWaitEvents, return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, signalEvent, numWaitEvents,
phWaitEvents, launchParams); phWaitEvents, launchParams);
} }
@ -1414,19 +1414,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = srcRegion->width; uint32_t groupSizeX = srcRegion->width;
uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeY = srcRegion->height;
uint32_t groupSizeZ = 1u; uint32_t groupSizeZ = 1u;
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX,
&groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
@ -1436,17 +1436,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u}; ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u};
uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)}; uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)};
uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)}; uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)};
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc); builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc); builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin); builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin); builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch); builtinKernel->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch); builtinKernel->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType(); auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
@ -1454,8 +1454,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
launchParams.isDestinationAllocationInSystemMemory = launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) || (dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(),
&dispatchFuncArgs, signalEvent, &dispatchKernelArgs, signalEvent,
numWaitEvents, numWaitEvents,
phWaitEvents, phWaitEvents,
launchParams); launchParams);
@ -1473,23 +1473,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, const CmdListKernelLaunchParams &launchParams) { ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, const CmdListKernelLaunchParams &launchParams) {
Kernel *builtinFunction = nullptr; Kernel *builtinKernel = nullptr;
if (isStateless) { if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
} else { } else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
} }
uint32_t groupSizeY = 1, groupSizeZ = 1; uint32_t groupSizeY = 1, groupSizeZ = 1;
uint32_t groupSizeX = static_cast<uint32_t>(unalignedSize); uint32_t groupSizeX = static_cast<uint32_t>(unalignedSize);
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u}; ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern)); uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value); builtinKernel->setArgumentValue(2, sizeof(value), &value);
auto res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams); auto res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelRemainderArgs, signalEvent, launchParams);
if (res) { if (res) {
return res; return res;
} }
@ -1569,12 +1569,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
middleSize -= leftRemainder; middleSize -= leftRemainder;
dstAllocation.offset += leftRemainder; dstAllocation.offset += leftRemainder;
} }
Kernel *builtinFunction = nullptr; Kernel *builtinKernel = nullptr;
if (isStateless) { if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless);
} else { } else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
} }
const auto dataTypeSize = sizeof(uint32_t) * 4; const auto dataTypeSize = sizeof(uint32_t) * 4;
size_t adjustedSize = middleSize / dataTypeSize; size_t adjustedSize = middleSize / dataTypeSize;
@ -1582,7 +1582,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
if (groupSizeX > adjustedSize && adjustedSize > 0) { if (groupSizeX > adjustedSize && adjustedSize > 0) {
groupSizeX = adjustedSize; groupSizeX = adjustedSize;
} }
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) { if (builtinKernel->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
@ -1590,17 +1590,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
size_t groups = adjustedSize / groupSizeX; size_t groups = adjustedSize / groupSizeX;
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * dataTypeSize + uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * dataTypeSize +
middleSize % dataTypeSize); middleSize % dataTypeSize);
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u}; ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(groups), 1u, 1u};
uint32_t value = 0; uint32_t value = 0;
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4); memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value); builtinKernel->setArgumentValue(2, sizeof(value), &value);
appendEventForProfilingAllWalkers(signalEvent, true); appendEventForProfilingAllWalkers(signalEvent, true);
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams); res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
if (res) { if (res) {
return res; return res;
} }
@ -1614,18 +1614,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
} }
} else { } else {
Kernel *builtinFunction = nullptr; Kernel *builtinKernel = nullptr;
if (isStateless) { if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless);
} else { } else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
} }
size_t middleElSize = sizeof(uint32_t); size_t middleElSize = sizeof(uint32_t);
size_t adjustedSize = size / middleElSize; size_t adjustedSize = size / middleElSize;
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize); uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
uint32_t groupSizeY = 1, groupSizeZ = 1; uint32_t groupSizeY = 1, groupSizeZ = 1;
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
uint32_t groups = static_cast<uint32_t>(adjustedSize) / groupSizeX; uint32_t groups = static_cast<uint32_t>(adjustedSize) / groupSizeX;
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * middleElSize + uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * middleElSize +
@ -1656,15 +1656,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
patternAllocOffset += patternSizeToCopy; patternAllocOffset += patternSizeToCopy;
} while (patternAllocOffset < patternAllocationSize); } while (patternAllocOffset < patternAllocationSize);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc); builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls); builtinKernel->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
appendEventForProfilingAllWalkers(signalEvent, true); appendEventForProfilingAllWalkers(signalEvent, true);
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams); res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
if (res) { if (res) {
return res; return res;
} }
@ -1673,28 +1673,28 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast<uint32_t>(middleElSize); uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast<uint32_t>(middleElSize);
uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize; uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize;
Kernel *builtinFunctionRemainder; Kernel *builtinKernelRemainder;
if (isStateless) { if (isStateless) {
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless); builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless);
} else { } else {
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover); builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover);
} }
builtinFunctionRemainder->setGroupSize(remainingBytes, 1u, 1u); builtinKernelRemainder->setGroupSize(remainingBytes, 1u, 1u);
ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; ze_group_count_t dispatchKernelArgs{1u, 1u, 1u};
builtinFunctionRemainder->setArgBufferWithAlloc(0, builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr, dstAllocation.alignedAllocationPtr,
dstAllocation.alloc); dstAllocation.alloc);
builtinFunctionRemainder->setArgumentValue(1, builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder), sizeof(dstOffsetRemainder),
&dstOffsetRemainder); &dstOffsetRemainder);
builtinFunctionRemainder->setArgBufferWithAlloc(2, builtinKernelRemainder->setArgBufferWithAlloc(2,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder, reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
patternGfxAlloc); patternGfxAlloc);
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize); builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
res = appendLaunchKernelSplit(builtinFunctionRemainder, &dispatchFuncArgs, signalEvent, launchParams); res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
if (res) { if (res) {
return res; return res;
} }
@ -2220,20 +2220,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
UNRECOVERABLE_IF(!result); UNRECOVERABLE_IF(!result);
Kernel *builtinFunction = nullptr; Kernel *builtinKernel = nullptr;
auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u; auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u;
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership(); auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
if (pOffsets == nullptr) { if (pOffsets == nullptr) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
builtinFunction->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps); builtinKernel->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
} else { } else {
auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents, false); auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents, false);
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress()); auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc); commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets); builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
builtinFunction->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc); builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
builtinFunction->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps); builtinKernel->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
offsetValPtr += sizeof(size_t); offsetValPtr += sizeof(size_t);
} }
@ -2241,23 +2241,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
uint32_t groupSizeY = 1u; uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u; uint32_t groupSizeZ = 1u;
if (builtinFunction->suggestGroupSize(numEvents, 1u, 1u, if (builtinKernel->suggestGroupSize(numEvents, 1u, 1u,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true); DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN; return ZE_RESULT_ERROR_UNKNOWN;
} }
ze_group_count_t dispatchFuncArgs{numEvents / groupSizeX, 1u, 1u}; ze_group_count_t dispatchKernelArgs{numEvents / groupSizeX, 1u, 1u};
auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress()); auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress());
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData); builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc); builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType(); auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
@ -2265,7 +2265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
launchParams.isDestinationAllocationInSystemMemory = launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) || (dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR); (dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, auto appendResult = appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams); phWaitEvents, launchParams);
if (appendResult != ZE_RESULT_SUCCESS) { if (appendResult != ZE_RESULT_SUCCESS) {
return appendResult; return appendResult;

View File

@ -44,7 +44,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} }
appendEventForProfiling(event, true, false); appendEventForProfiling(event, true, false);
const auto functionImmutableData = kernel->getImmutableData(); const auto kernelImmutableData = kernel->getImmutableData();
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(), auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(perThreadScratchSize); this->setCommandListPerThreadScratchSize(perThreadScratchSize);
@ -52,7 +52,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
this->setCommandListSLMEnable(slmEnable); this->setCommandListSLMEnable(slmEnable);
auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel); auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset(); kernel->patchGlobalOffset();
@ -169,14 +169,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
appendSignalEventPostWalker(event, false); appendSignalEventPostWalker(event, false);
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer(); auto &residencyContainer = kernel->getResidencyContainer();
for (auto resource : residencyContainer) { for (auto resource : residencyContainer) {
commandContainer.addToResidencyContainer(resource); commandContainer.addToResidencyContainer(resource);
} }
if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) { if (kernelImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
storePrintfFunction(kernel); storePrintfKernel(kernel);
} }
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {

View File

@ -22,13 +22,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass::BaseClass; using BaseClass::BaseClass;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override; const CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override; ze_event_handle_t *phWaitEvents) override;

View File

@ -163,7 +163,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel( ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions, ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) { const CmdListKernelLaunchParams &launchParams) {
@ -171,7 +171,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
checkAvailableSpace(); checkAvailableSpace();
} }
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(hKernel, threadGroupDimensions, auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
hSignalEvent, numWaitEvents, phWaitEvents, hSignalEvent, numWaitEvents, phWaitEvents,
launchParams); launchParams);
return flushImmediate(ret, true); return flushImmediate(ret, true);
@ -179,13 +179,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect( ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
if (this->isFlushTaskSubmissionEnabled) { if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace(); checkAvailableSpace();
} }
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer, auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hSignalEvent, numWaitEvents, phWaitEvents); hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true); return flushImmediate(ret, true);
} }

View File

@ -134,7 +134,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::Device *neoDevice = device->getNEODevice(); NEO::Device *neoDevice = device->getNEODevice();
UNRECOVERABLE_IF(kernel == nullptr); UNRECOVERABLE_IF(kernel == nullptr);
const auto functionImmutableData = kernel->getImmutableData(); const auto kernelImmutableData = kernel->getImmutableData();
auto &kernelDescriptor = kernel->getKernelDescriptor(); auto &kernelDescriptor = kernel->getKernelDescriptor();
if (kernelDescriptor.kernelAttributes.flags.isInvalid) { if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_ERROR_INVALID_ARGUMENT;
@ -142,8 +142,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel); auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode); commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset(); kernel->patchGlobalOffset();
if (launchParams.isIndirect && threadGroupDimensions) { if (launchParams.isIndirect && threadGroupDimensions) {
@ -287,9 +287,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args); NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState; *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
} }
// Attach Function residency to our CommandList residency // Attach kernel residency to our CommandList residency
{ {
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer(); auto &residencyContainer = kernel->getResidencyContainer();
for (auto resource : residencyContainer) { for (auto resource : residencyContainer) {
commandContainer.addToResidencyContainer(resource); commandContainer.addToResidencyContainer(resource);
@ -299,7 +299,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
// Store PrintfBuffer from a kernel // Store PrintfBuffer from a kernel
{ {
if (kernelDescriptor.kernelAttributes.flags.usesPrintf) { if (kernelDescriptor.kernelAttributes.flags.usesPrintf) {
storePrintfFunction(kernel); storePrintfKernel(kernel);
} }
} }

View File

@ -152,16 +152,16 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
void CommandQueueImp::printFunctionsPrintfOutput() { void CommandQueueImp::printKernelsPrintfOutput() {
size_t size = this->printfFunctionContainer.size(); size_t size = this->printfKernelContainer.size();
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
this->printfFunctionContainer[i]->printPrintfOutput(); this->printfKernelContainer[i]->printPrintfOutput();
} }
this->printfFunctionContainer.clear(); this->printfKernelContainer.clear();
} }
void CommandQueueImp::postSyncOperations() { void CommandQueueImp::postSyncOperations() {
printFunctionsPrintfOutput(); printKernelsPrintfOutput();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) { if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId()); device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());

View File

@ -909,9 +909,9 @@ void CommandQueueHw<gfxCoreFamily>::collectPrintfContentsFromAllCommandsLists(
for (auto i = 0u; i < numCommandLists; ++i) { for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]); auto commandList = CommandList::fromHandle(phCommandLists[i]);
this->printfFunctionContainer.insert(this->printfFunctionContainer.end(), this->printfKernelContainer.insert(this->printfKernelContainer.end(),
commandList->getPrintfFunctionContainer().begin(), commandList->getPrintfKernelContainer().begin(),
commandList->getPrintfFunctionContainer().end()); commandList->getPrintfKernelContainer().end());
} }
} }

View File

@ -87,14 +87,14 @@ struct CommandQueueImp : public CommandQueue {
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout); ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout);
void printFunctionsPrintfOutput(); void printKernelsPrintfOutput();
void postSyncOperations(); void postSyncOperations();
CommandBufferManager buffers; CommandBufferManager buffers;
NEO::HeapContainer heapContainer; NEO::HeapContainer heapContainer;
ze_command_queue_desc_t desc; ze_command_queue_desc_t desc;
std::vector<Kernel *> printfFunctionContainer; std::vector<Kernel *> printfKernelContainer;
Device *device = nullptr; Device *device = nullptr;
NEO::CommandStreamReceiver *csr = nullptr; NEO::CommandStreamReceiver *csr = nullptr;

View File

@ -33,7 +33,7 @@ struct Module : _ze_module_handle_t {
virtual Device *getDevice() const = 0; virtual Device *getDevice() const = 0;
virtual ze_result_t createKernel(const ze_kernel_desc_t *desc, virtual ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) = 0; ze_kernel_handle_t *kernelHandle) = 0;
virtual ze_result_t destroy() = 0; virtual ze_result_t destroy() = 0;
virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0; virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0;
virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0; virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0;
@ -45,7 +45,7 @@ struct Module : _ze_module_handle_t {
ze_module_handle_t *phModules, ze_module_handle_t *phModules,
ze_module_build_log_handle_t *phLinkLog) = 0; ze_module_build_log_handle_t *phLinkLog) = 0;
virtual const KernelImmutableData *getKernelImmutableData(const char *functionName) const = 0; virtual const KernelImmutableData *getKernelImmutableData(const char *kernelName) const = 0;
virtual const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const = 0; virtual const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const = 0;
virtual uint32_t getMaxGroupSize() const = 0; virtual uint32_t getMaxGroupSize() const = 0;
virtual bool isDebugEnabled() const = 0; virtual bool isDebugEnabled() const = 0;

View File

@ -664,9 +664,9 @@ void ModuleImp::passDebugData() {
} }
} }
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const { const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelName) const {
for (auto &kernelImmData : kernelImmDatas) { for (auto &kernelImmData : kernelImmDatas) {
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) { if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(kernelName) == 0) {
return kernelImmData.get(); return kernelImmData.get();
} }
} }
@ -742,7 +742,7 @@ void ModuleImp::updateBuildLog(NEO::Device *neoDevice) {
} }
ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc, ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) { ze_kernel_handle_t *kernelHandle) {
ze_result_t res; ze_result_t res;
if (!isFullyLinked) { if (!isFullyLinked) {
return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED; return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED;
@ -750,7 +750,7 @@ ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
auto kernel = Kernel::create(productFamily, this, desc, &res); auto kernel = Kernel::create(productFamily, this, desc, &res);
if (res == ZE_RESULT_SUCCESS) { if (res == ZE_RESULT_SUCCESS) {
*phFunction = kernel->toHandle(); *kernelHandle = kernel->toHandle();
} }
return res; return res;

View File

@ -89,7 +89,7 @@ struct ModuleImp : public Module {
ze_result_t destroy() override; ze_result_t destroy() override;
ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override; ze_kernel_handle_t *kernelHandle) override;
ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override; ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override;
@ -107,7 +107,7 @@ struct ModuleImp : public Module {
ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override; ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override;
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override; const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; } const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }

View File

@ -110,7 +110,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
~MockModule() override { ~MockModule() override {
} }
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override { const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
return mockKernelImmData; return mockKernelImmData;
} }

View File

@ -12,7 +12,7 @@
namespace L0 { namespace L0 {
namespace ult { namespace ult {
void MultiTileCommandListAppendLaunchFunctionFixture::setUp() { void MultiTileCommandListAppendLaunchKernelFixture::setUp() {
DebugManager.flags.EnableImplicitScaling.set(1); DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u; MultiDeviceFixture::numRootDevices = 1u;
@ -35,14 +35,14 @@ void MultiTileCommandListAppendLaunchFunctionFixture::setUp() {
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
} }
void MultiTileCommandListAppendLaunchFunctionFixture::tearDown() { void MultiTileCommandListAppendLaunchKernelFixture::tearDown() {
commandList->destroy(); commandList->destroy();
contextImp->destroy(); contextImp->destroy();
MultiDeviceModuleFixture::tearDown(); MultiDeviceModuleFixture::tearDown();
} }
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() { void MultiTileImmediateCommandListAppendLaunchKernelFixture::setUp() {
DebugManager.flags.EnableImplicitScaling.set(1); DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u; MultiDeviceFixture::numRootDevices = 1u;
@ -61,7 +61,7 @@ void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() {
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext)); contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
} }
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::tearDown() { void MultiTileImmediateCommandListAppendLaunchKernelFixture::tearDown() {
contextImp->destroy(); contextImp->destroy();
MultiDeviceModuleFixture::tearDown(); MultiDeviceModuleFixture::tearDown();

View File

@ -18,7 +18,7 @@ struct Device;
namespace ult { namespace ult {
struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture { struct MultiTileCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
void setUp(); void setUp();
void tearDown(); void tearDown();
@ -28,7 +28,7 @@ struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModul
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true}; VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
}; };
struct MultiTileImmediateCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture { struct MultiTileImmediateCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
void setUp(); void setUp();
void tearDown(); void tearDown();

View File

@ -27,9 +27,9 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
void SetUp() override { void SetUp() override {
DeviceFixture::setUp(); DeviceFixture::setUp();
dispatchFunctionArguments.groupCountX = 1u; dispatchKernelArguments.groupCountX = 1u;
dispatchFunctionArguments.groupCountY = 2u; dispatchKernelArguments.groupCountY = 2u;
dispatchFunctionArguments.groupCountZ = 3u; dispatchKernelArguments.groupCountZ = 3u;
} }
void TearDown() override { void TearDown() override {
@ -44,18 +44,18 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
} }
std::vector<void *> isaBuffers; std::vector<void *> isaBuffers;
ze_group_count_t dispatchFunctionArguments; ze_group_count_t dispatchKernelArguments;
void *buffer = nullptr; void *buffer = nullptr;
void initializeFunction(WhiteBox<::L0::Kernel> &function, void initializeKernel(WhiteBox<::L0::Kernel> &kernel,
WhiteBox<::L0::KernelImmutableData> &functionData, WhiteBox<::L0::KernelImmutableData> &kernelData,
L0::Device *device) { L0::Device *device) {
uint32_t isaSize = 4096; uint32_t isaSize = 4096;
void *isaBuffer = malloc(isaSize); void *isaBuffer = malloc(isaSize);
isaBuffers.push_back(isaBuffer); isaBuffers.push_back(isaBuffer);
functionData.device = device; kernelData.device = device;
if (!buffer) { if (!buffer) {
buffer = alignedMalloc(isaSize, 64); buffer = alignedMalloc(isaSize, 64);
} }
@ -70,22 +70,22 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
if (isaBuffer != nullptr) { if (isaBuffer != nullptr) {
memcpy_s(allocation->getUnderlyingBuffer(), allocation->getUnderlyingBufferSize(), isaBuffer, isaSize); memcpy_s(allocation->getUnderlyingBuffer(), allocation->getUnderlyingBufferSize(), isaBuffer, isaSize);
} }
functionData.isaGraphicsAllocation.reset(allocation); kernelData.isaGraphicsAllocation.reset(allocation);
uint32_t crossThreadDataSize = 128; uint32_t crossThreadDataSize = 128;
function.crossThreadData.reset(new uint8_t[crossThreadDataSize]); kernel.crossThreadData.reset(new uint8_t[crossThreadDataSize]);
function.crossThreadDataSize = crossThreadDataSize; kernel.crossThreadDataSize = crossThreadDataSize;
uint32_t perThreadDataSize = 128; uint32_t perThreadDataSize = 128;
function.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32)); kernel.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32));
function.perThreadDataSize = perThreadDataSize; kernel.perThreadDataSize = perThreadDataSize;
function.kernelImmData = &functionData; kernel.kernelImmData = &kernelData;
} }
void cleanupFunction(WhiteBox<::L0::KernelImmutableData> &functionData) { void cleanupKernel(WhiteBox<::L0::KernelImmutableData> &kernelData) {
functionData.isaGraphicsAllocation.reset(nullptr); kernelData.isaGraphicsAllocation.reset(nullptr);
} }
}; };
@ -102,99 +102,99 @@ GEN9TEST_F(CommandListCreateGen9, WhenGettingCommandListPreemptionModeThenMatche
} }
GEN9TEST_F(CommandListCreateGen9, GivenDisabledMidThreadPreemptionWhenLaunchingKernelThenThreadGroupModeSet) { GEN9TEST_F(CommandListCreateGen9, GivenDisabledMidThreadPreemptionWhenLaunchingKernelThenThreadGroupModeSet) {
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {}; WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
NEO::KernelDescriptor kernelDescriptor; NEO::KernelDescriptor kernelDescriptor;
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor; kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionThreadGroup; WhiteBox<::L0::Kernel> kernelThreadGroup;
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1; kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device); initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
ze_result_t returnValue; ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionThreadGroup.toHandle(), commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close(); auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoThreadGroupData); cleanupKernel(kernelInfoThreadGroupData);
delete commandList; delete commandList;
} }
GEN9TEST_F(CommandListCreateGen9, GivenUsesFencesForReadWriteImagesWhenLaunchingKernelThenMidBatchModeSet) { GEN9TEST_F(CommandListCreateGen9, GivenUsesFencesForReadWriteImagesWhenLaunchingKernelThenMidBatchModeSet) {
WhiteBox<::L0::KernelImmutableData> funcInfoMidBatchData = {}; WhiteBox<::L0::KernelImmutableData> kernelInfoMidBatchData = {};
NEO::KernelDescriptor kernelDescriptor; NEO::KernelDescriptor kernelDescriptor;
funcInfoMidBatchData.kernelDescriptor = &kernelDescriptor; kernelInfoMidBatchData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionMidBatch; WhiteBox<::L0::Kernel> kernelMidBatch;
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1; kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1; kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waDisableLSQCROPERFforOCL = true; device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waDisableLSQCROPERFforOCL = true;
initializeFunction(functionMidBatch, funcInfoMidBatchData, device); initializeKernel(kernelMidBatch, kernelInfoMidBatchData, device);
ze_result_t returnValue; ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionMidBatch.toHandle(), commandList->appendLaunchKernel(kernelMidBatch.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
auto result = commandList->close(); auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoMidBatchData); cleanupKernel(kernelInfoMidBatchData);
delete commandList; delete commandList;
} }
GEN9TEST_F(CommandListCreateGen9, WhenCommandListHasLowerPreemptionLevelThenDoNotIncreaseAgain) { GEN9TEST_F(CommandListCreateGen9, WhenCommandListHasLowerPreemptionLevelThenDoNotIncreaseAgain) {
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {}; WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
NEO::KernelDescriptor kernelDescriptor; NEO::KernelDescriptor kernelDescriptor;
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor; kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionThreadGroup; WhiteBox<::L0::Kernel> kernelThreadGroup;
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1; kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device); initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
WhiteBox<::L0::KernelImmutableData> funcInfoMidThreadData = {}; WhiteBox<::L0::KernelImmutableData> kernelInfoMidThreadData = {};
NEO::KernelDescriptor kernelDescriptor2; NEO::KernelDescriptor kernelDescriptor2;
funcInfoMidThreadData.kernelDescriptor = &kernelDescriptor2; kernelInfoMidThreadData.kernelDescriptor = &kernelDescriptor2;
WhiteBox<::L0::Kernel> functionMidThread; WhiteBox<::L0::Kernel> kernelMidThread;
initializeFunction(functionMidThread, funcInfoMidThreadData, device); initializeKernel(kernelMidThread, kernelInfoMidThreadData, device);
ze_result_t returnValue; ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionThreadGroup.toHandle(), commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
commandList->appendLaunchKernel(functionMidThread.toHandle(), commandList->appendLaunchKernel(kernelMidThread.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close(); auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode()); EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoThreadGroupData); cleanupKernel(kernelInfoThreadGroupData);
cleanupFunction(funcInfoMidThreadData); cleanupKernel(kernelInfoMidThreadData);
delete commandList; delete commandList;
} }
} // namespace ult } // namespace ult

View File

@ -81,23 +81,23 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
} }
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments, const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override { ze_event_handle_t *phWaitEvents) override {
appendEventMultipleKernelIndirectEventHandleValue = hEvent; appendEventMultipleKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pNumLaunchArguments, pLaunchArgumentsBuffer, return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pNumLaunchArguments, pLaunchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents); hEvent, numWaitEvents, phWaitEvents);
} }
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override { ze_event_handle_t *phWaitEvents) override {
appendEventKernelIndirectEventHandleValue = hEvent; appendEventKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer, return BaseClass::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents); hEvent, numWaitEvents, phWaitEvents);
} }
@ -163,29 +163,29 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS, ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hFunction, (ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams)); const CmdListKernelLaunchParams &launchParams));
ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS, ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hKernel, (ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchFuncArgs, const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents)); ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS, ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hFunction, (ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents)); ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS, ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numFunctions, (uint32_t numKernels,
const ze_kernel_handle_t *phFunctions, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments, const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
@ -505,14 +505,14 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo
public: public:
CmdListHelper cmdListHelper; CmdListHelper cmdListHelper;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, ze_event_handle_t hEvent,
uint32_t numWaitEvents, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override { const CmdListKernelLaunchParams &launchParams) override {
const auto kernel = Kernel::fromHandle(hKernel); const auto kernel = Kernel::fromHandle(kernelHandle);
cmdListHelper.isaAllocation = kernel->getIsaAllocation(); cmdListHelper.isaAllocation = kernel->getIsaAllocation();
cmdListHelper.residencyContainer = kernel->getResidencyContainer(); cmdListHelper.residencyContainer = kernel->getResidencyContainer();
cmdListHelper.groupSize = kernel->getGroupSize(); cmdListHelper.groupSize = kernel->getGroupSize();

View File

@ -28,7 +28,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::csr; using BaseClass::csr;
using BaseClass::device; using BaseClass::device;
using BaseClass::preemptionCmdSyncProgramming; using BaseClass::preemptionCmdSyncProgramming;
using BaseClass::printfFunctionContainer; using BaseClass::printfKernelContainer;
using BaseClass::submitBatchBuffer; using BaseClass::submitBatchBuffer;
using BaseClass::synchronizeByPollingForTaskCount; using BaseClass::synchronizeByPollingForTaskCount;
using BaseClass::taskCount; using BaseClass::taskCount;
@ -62,7 +62,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>; using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
using BaseClass::commandStream; using BaseClass::commandStream;
using BaseClass::prepareAndSubmitBatchBuffer; using BaseClass::prepareAndSubmitBatchBuffer;
using BaseClass::printfFunctionContainer; using BaseClass::printfKernelContainer;
using L0::CommandQueue::activeSubDevices; using L0::CommandQueue::activeSubDevices;
using L0::CommandQueue::internalUsage; using L0::CommandQueue::internalUsage;
using L0::CommandQueue::multiReturnPointCommandList; using L0::CommandQueue::multiReturnPointCommandList;

View File

@ -20,9 +20,9 @@ struct MockDeviceForRebuildBuilins : public Mock<DeviceImp> {
MockModuleForRebuildBuiltins(Device *device, ModuleType type) : ModuleImp(device, nullptr, type) {} MockModuleForRebuildBuiltins(Device *device, ModuleType type) : ModuleImp(device, nullptr, type) {}
ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override { ze_kernel_handle_t *kernelHandle) override {
*phFunction = new Mock<Kernel>(); *kernelHandle = new Mock<Kernel>();
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
}; };

View File

@ -43,11 +43,11 @@ struct Mock<Module> : public Module {
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) {} Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) {}
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){}; Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){};
ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction)); ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *kernelHandle));
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction)); ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction));
ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary)); ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary));
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *functionName)); ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *kernelName));
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ()); ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ());
ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames)); ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames));
ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS, ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS,
@ -91,7 +91,7 @@ struct MockModule : public L0::ModuleImp {
~MockModule() override = default; ~MockModule() override = default;
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override { const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
return kernelImmData; return kernelImmData;
} }

View File

@ -200,8 +200,8 @@ HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenRebuildPrecompiledKernelsDebug
MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {} MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {}
ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override { ze_kernel_handle_t *kernelHandle) override {
*phFunction = nullptr; *kernelHandle = nullptr;
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
}; };

View File

@ -262,14 +262,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchMultipl
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>(); auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
const ze_kernel_handle_t launchFn = kernel->toHandle(); const ze_kernel_handle_t launchKernels = kernel->toHandle();
uint32_t *numLaunchArgs; uint32_t *numLaunchArgs;
ze_device_mem_alloc_desc_t deviceDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {};
returnValue = context->allocDeviceMem( returnValue = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs)); device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr); returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event->toHandle(), commandList->appendEventMultipleKernelIndirectEventHandleValue); EXPECT_EQ(event->toHandle(), commandList->appendEventMultipleKernelIndirectEventHandleValue);

View File

@ -72,11 +72,11 @@ TEST(zeCommandListAppendWriteGlobalTimestamp, whenCalledThenRedirectedToObject)
TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) { TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) {
MockCommandList commandList; MockCommandList commandList;
Mock<::L0::Kernel> kernel; Mock<::L0::Kernel> kernel;
ze_group_count_t dispatchFunctionArguments; ze_group_count_t dispatchKernelArguments;
auto result = auto result =
zeCommandListAppendLaunchKernel(commandList.toHandle(), kernel.toHandle(), zeCommandListAppendLaunchKernel(commandList.toHandle(), kernel.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr); &dispatchKernelArguments, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} }
TEST(zeCommandListAppendEventReset, whenCalledThenRedirectedToObject) { TEST(zeCommandListAppendEventReset, whenCalledThenRedirectedToObject) {

View File

@ -193,8 +193,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]); EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
} }
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) { HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
@ -208,12 +208,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]); EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size()); EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
} }
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) { HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
@ -617,8 +617,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenCommandListWhenResetCalledThenState
commandList->commandContainer.getResidencyContainer().size()); commandList->commandContainer.getResidencyContainer().size());
ASSERT_EQ(commandListControl->commandContainer.getDeallocationContainer().size(), ASSERT_EQ(commandListControl->commandContainer.getDeallocationContainer().size(),
commandList->commandContainer.getDeallocationContainer().size()); commandList->commandContainer.getDeallocationContainer().size());
ASSERT_EQ(commandListControl->getPrintfFunctionContainer().size(), ASSERT_EQ(commandListControl->getPrintfKernelContainer().size(),
commandList->getPrintfFunctionContainer().size()); commandList->getPrintfKernelContainer().size());
ASSERT_EQ(commandListControl->commandContainer.getCommandStream()->getUsed(), commandList->commandContainer.getCommandStream()->getUsed()); ASSERT_EQ(commandListControl->commandContainer.getCommandStream()->getUsed(), commandList->commandContainer.getCommandStream()->getUsed());
ASSERT_EQ(commandListControl->commandContainer.slmSize, commandList->commandContainer.slmSize); ASSERT_EQ(commandListControl->commandContainer.slmSize, commandList->commandContainer.slmSize);

View File

@ -1276,9 +1276,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
} }
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionFixture>; using MultiTileCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileCommandListAppendLaunchKernelFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) { HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
ze_event_pool_desc_t eventPoolDesc = {}; ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTes
EXPECT_EQ(4u, commandList->partitionCount); EXPECT_EQ(4u, commandList->partitionCount);
} }
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) { HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
auto commandListWithNonCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>(); auto commandListWithNonCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
@ -1342,7 +1342,7 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
} }
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) { givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;

View File

@ -128,12 +128,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
ze_result_t returnValue; ze_result_t returnValue;
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
const ze_kernel_handle_t launchFn = kernel->toHandle(); const ze_kernel_handle_t launchKernels = kernel->toHandle();
uint32_t *numLaunchArgs; uint32_t *numLaunchArgs;
ze_device_mem_alloc_desc_t deviceDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem( auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs)); device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr); result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 0; *numLaunchArgs = 0;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@ -156,13 +156,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
using MI_MATH = typename FamilyType::MI_MATH; using MI_MATH = typename FamilyType::MI_MATH;
ze_result_t returnValue; ze_result_t returnValue;
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()}; const ze_kernel_handle_t launchKernels[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
uint32_t *numLaunchArgs; uint32_t *numLaunchArgs;
const uint32_t numKernels = 3; const uint32_t numKernels = 3;
ze_device_mem_alloc_desc_t deviceDesc = {}; ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem( auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs)); device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr); result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 2; *numLaunchArgs = 2;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@ -710,9 +710,9 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
context->freeMem(alloc); context->freeMem(alloc);
} }
using MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchFunctionFixture>; using MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchKernelFixture>;
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) { HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
@ -775,7 +775,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
EXPECT_EQ(cmdList.end(), itorSemaphoreWait); EXPECT_EQ(cmdList.end(), itorSemaphoreWait);
} }
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) { HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;

View File

@ -63,7 +63,7 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) {
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
} }
TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfFunctionsStoredAndClearsFunctionContainer) { TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfKernelsStoredAndClearsKernelContainer) {
const ze_command_queue_desc_t desc{}; const ze_command_queue_desc_t desc{};
ze_result_t returnValue; ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
@ -76,12 +76,12 @@ TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutput
Mock<Kernel> kernel1, kernel2; Mock<Kernel> kernel1, kernel2;
commandQueue->printfFunctionContainer.push_back(&kernel1); commandQueue->printfKernelContainer.push_back(&kernel1);
commandQueue->printfFunctionContainer.push_back(&kernel2); commandQueue->printfKernelContainer.push_back(&kernel2);
commandQueue->synchronizeByPollingForTaskCount(0u); commandQueue->synchronizeByPollingForTaskCount(0u);
EXPECT_EQ(0u, commandQueue->printfFunctionContainer.size()); EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
EXPECT_EQ(1u, kernel1.printPrintfOutputCalledTimes); EXPECT_EQ(1u, kernel1.printPrintfOutputCalledTimes);
EXPECT_EQ(1u, kernel2.printPrintfOutputCalledTimes); EXPECT_EQ(1u, kernel2.printPrintfOutputCalledTimes);
@ -255,9 +255,9 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1}; ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
const size_t numHandles = 100; const size_t numHandles = 100;
ze_command_list_handle_t cmdListHandles[numHandles]; ze_command_list_handle_t cmdListHandles[numHandles];
@ -305,9 +305,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperWhenExecutingThenMergeStates
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
commandList->nonImmediateLogicalStateHelper.reset(mockCmdListLogicalStateHelper); commandList->nonImmediateLogicalStateHelper.reset(mockCmdListLogicalStateHelper);
ze_group_count_t dispatchFunctionArguments{1, 1, 1}; ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles = commandList->toHandle(); ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@ -341,9 +341,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExecu
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue))); auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)));
ze_group_count_t dispatchFunctionArguments{1, 1, 1}; ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles = commandList->toHandle(); ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@ -367,9 +367,9 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1}; ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()}; ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};

View File

@ -745,9 +745,9 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
kernel.residencyContainer.push_back(gpuAlloc); kernel.residencyContainer.push_back(gpuAlloc);
ze_group_count_t dispatchFunctionArguments{1, 1, 1}; ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {}; CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams); commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
auto deviceImp = static_cast<DeviceImp *>(device); auto deviceImp = static_cast<DeviceImp *>(device);
auto pageFaultCmdQueue = whiteboxCast(deviceImp->pageFaultCommandList->cmdQImmediate); auto pageFaultCmdQueue = whiteboxCast(deviceImp->pageFaultCommandList->cmdQImmediate);

View File

@ -6,7 +6,7 @@
target_sources(${TARGET_NAME} PRIVATE target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_function.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp
) )

View File

@ -20,26 +20,26 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
uint32_t *crossThreadData = uint32_t *crossThreadData =
reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32)); reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32));
WhiteBox<::L0::KernelImmutableData> funcInfo = {}; WhiteBox<::L0::KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor; NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t); kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t);
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData)); kernel.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
function.crossThreadDataSize = sizeof(uint32_t[6]); kernel.crossThreadDataSize = sizeof(uint32_t[6]);
function.groupSize[0] = 2; kernel.groupSize[0] = 2;
function.groupSize[1] = 3; kernel.groupSize[1] = 3;
function.groupSize[2] = 5; kernel.groupSize[2] = 5;
function.KernelImp::setGroupCount(7, 11, 13); kernel.KernelImp::setGroupCount(7, 11, 13);
auto crossThread = function.KernelImp::getCrossThreadData(); auto crossThread = kernel.KernelImp::getCrossThreadData();
ASSERT_NE(nullptr, crossThread); ASSERT_NE(nullptr, crossThread);
const uint32_t *globalWorkSizes = reinterpret_cast<const uint32_t *>(crossThread); const uint32_t *globalWorkSizes = reinterpret_cast<const uint32_t *>(crossThread);
EXPECT_EQ(2U * 7U, globalWorkSizes[0]); EXPECT_EQ(2U * 7U, globalWorkSizes[0]);
@ -51,30 +51,30 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
EXPECT_EQ(11U, numGroups[1]); EXPECT_EQ(11U, numGroups[1]);
EXPECT_EQ(13U, numGroups[2]); EXPECT_EQ(13U, numGroups[2]);
function.crossThreadData.release(); kernel.crossThreadData.release();
alignedFree(crossThreadData); alignedFree(crossThreadData);
} }
TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) { TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) {
NEO::KernelDescriptor descriptor = {}; NEO::KernelDescriptor descriptor = {};
WhiteBox<KernelImmutableData> funcInfo = {}; WhiteBox<KernelImmutableData> kernelInfo = {};
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr); Mock<Module> module(device, nullptr);
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.module = &module; kernel.module = &module;
const std::array<uint32_t, 4> testedSimd = {{1, 8, 16, 32}}; const std::array<uint32_t, 4> testedSimd = {{1, 8, 16, 32}};
for (auto simd : testedSimd) { for (auto simd : testedSimd) {
descriptor.kernelAttributes.simdSize = simd; descriptor.kernelAttributes.simdSize = simd;
function.KernelImp::setGroupSize(simd, 1, 1); kernel.KernelImp::setGroupSize(simd, 1, 1);
if (simd == 1) { if (simd == 1) {
EXPECT_EQ(maxNBitValue(32), function.KernelImp::getThreadExecutionMask()); EXPECT_EQ(maxNBitValue(32), kernel.KernelImp::getThreadExecutionMask());
} else { } else {
EXPECT_EQ(maxNBitValue(simd), function.KernelImp::getThreadExecutionMask()); EXPECT_EQ(maxNBitValue(simd), kernel.KernelImp::getThreadExecutionMask());
} }
} }
} }
@ -82,20 +82,20 @@ TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSe
TEST_F(KernelImp, WhenSuggestingGroupSizeThenClampToMaxGroupSize) { TEST_F(KernelImp, WhenSuggestingGroupSizeThenClampToMaxGroupSize) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
WhiteBox<KernelImmutableData> funcInfo = {}; WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor; NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false); NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
Mock<Module> module(device, nullptr); Mock<Module> module(device, nullptr);
module.getMaxGroupSizeResult = 8; module.getMaxGroupSizeResult = 8;
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.module = &module; kernel.module = &module;
uint32_t groupSize[3]; uint32_t groupSize[3];
function.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(8U, groupSize[0]); EXPECT_EQ(8U, groupSize[0]);
EXPECT_EQ(1U, groupSize[1]); EXPECT_EQ(1U, groupSize[1]);
EXPECT_EQ(1U, groupSize[2]); EXPECT_EQ(1U, groupSize[2]);
@ -118,9 +118,9 @@ INSTANTIATE_TEST_CASE_P(, KernelImpSuggestGroupSize,
TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen) { TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
WhiteBox<KernelImmutableData> funcInfo = {}; WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor; NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false); NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
@ -128,105 +128,105 @@ TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen)
uint32_t size = GetParam(); uint32_t size = GetParam();
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.module = &module; kernel.module = &module;
uint32_t groupSize[3]; uint32_t groupSize[3];
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]); EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]); EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]); EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]); EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]); EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]); EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1, kernel.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1,
groupSize + 2); groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]); EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]); EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]); EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]); EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]); EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]); EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]); EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]); EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]); EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]); EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]); EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]); EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]); EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]); EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]); EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2); kernel.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]); EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]); EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]); EXPECT_EQ(0U, size % groupSize[2]);
} }
TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) { TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {
Mock<Kernel> function; Mock<Kernel> kernel;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(0U, 1U, 1U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(0U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 0U, 1U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 0U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 1U, 0U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 1U, 0U));
} }
TEST_F(KernelImp, givenSetGroupSizeWithGreaterGroupSizeThanAllowedThenCorrectErrorCodeIsReturned) { TEST_F(KernelImp, givenSetGroupSizeWithGreaterGroupSizeThanAllowedThenCorrectErrorCodeIsReturned) {
WhiteBox<KernelImmutableData> funcInfo = {}; WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor; NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr); Mock<Module> module(device, nullptr);
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.module = &module; kernel.module = &module;
uint32_t maxGroupSizeX = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[0]); uint32_t maxGroupSizeX = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[0]);
uint32_t maxGroupSizeY = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[1]); uint32_t maxGroupSizeY = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[1]);
uint32_t maxGroupSizeZ = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[2]); uint32_t maxGroupSizeZ = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[2]);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U));
} }
TEST_F(KernelImp, GivenNumChannelsZeroWhenSettingGroupSizeThenLocalIdsNotGenerated) { TEST_F(KernelImp, GivenNumChannelsZeroWhenSettingGroupSizeThenLocalIdsNotGenerated) {
WhiteBox<KernelImmutableData> funcInfo = {}; WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor; NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor; kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr); Mock<Module> module(device, nullptr);
Mock<Kernel> function; Mock<Kernel> kernel;
function.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
function.module = &module; kernel.module = &module;
function.KernelImp::setGroupSize(16U, 16U, 1U); kernel.KernelImp::setGroupSize(16U, 16U, 1U);
std::vector<char> memBefore; std::vector<char> memBefore;
{ {
auto perThreadData = auto perThreadData =
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData()); reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
memBefore.assign(perThreadData, memBefore.assign(perThreadData,
perThreadData + function.KernelImp::getPerThreadDataSize()); perThreadData + kernel.KernelImp::getPerThreadDataSize());
} }
function.KernelImp::setGroupSize(8U, 32U, 1U); kernel.KernelImp::setGroupSize(8U, 32U, 1U);
std::vector<char> memAfter; std::vector<char> memAfter;
{ {
auto perThreadData = auto perThreadData =
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData()); reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
memAfter.assign(perThreadData, memAfter.assign(perThreadData,
perThreadData + function.KernelImp::getPerThreadDataSize()); perThreadData + kernel.KernelImp::getPerThreadDataSize());
} }
EXPECT_EQ(memAfter, memBefore); EXPECT_EQ(memAfter, memBefore);
@ -253,12 +253,12 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
uint32_t dssCount; uint32_t dssCount;
uint32_t availableSlm; uint32_t availableSlm;
uint32_t maxBarrierCount; uint32_t maxBarrierCount;
WhiteBox<::L0::KernelImmutableData> funcInfo; WhiteBox<::L0::KernelImmutableData> kernelInfo;
NEO::KernelDescriptor kernelDescriptor; NEO::KernelDescriptor kernelDescriptor;
void SetUp() override { void SetUp() override {
KernelImp::SetUp(); KernelImp::SetUp();
funcInfo.kernelDescriptor = &kernelDescriptor; kernelInfo.kernelDescriptor = &kernelDescriptor;
auto &hardwareInfo = device->getHwInfo(); auto &hardwareInfo = device->getHwInfo();
auto &hwHelper = device->getHwHelper(); auto &hwHelper = device->getHwHelper();
availableThreadCount = hwHelper.calculateAvailableThreadCount(hardwareInfo, numGrf); availableThreadCount = hwHelper.calculateAvailableThreadCount(hardwareInfo, numGrf);
@ -270,16 +270,16 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize; availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
maxBarrierCount = static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()); maxBarrierCount = static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice());
funcInfo.kernelDescriptor->kernelAttributes.simdSize = simd; kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
funcInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf; kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
} }
uint32_t getMaxWorkGroupCount() { uint32_t getMaxWorkGroupCount() {
funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm; kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
funcInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers; kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
Mock<Kernel> kernel; Mock<Kernel> kernel;
kernel.kernelImmData = &funcInfo; kernel.kernelImmData = &kernelInfo;
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User); auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
kernel.module = module.get(); kernel.module = module.get();

View File

@ -529,7 +529,7 @@ HWTEST_F(ModuleTest, GivenIncorrectNameWhenCreatingKernelThenResultErrorInvalidA
ze_kernel_handle_t kernelHandle; ze_kernel_handle_t kernelHandle;
ze_kernel_desc_t kernelDesc = {}; ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "nonexistent_function"; kernelDesc.pKernelName = "nonexistent_kernel";
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);

View File

@ -10,7 +10,7 @@
namespace L0 { namespace L0 {
namespace ult { namespace ult {
void onEnterCommandListAppendLaunchFunction( void onEnterCommandListAppendLaunchKernel(
ze_command_list_append_launch_kernel_params_t *params, ze_command_list_append_launch_kernel_params_t *params,
ze_result_t result, ze_result_t result,
void *pTracerUserData, void *pTracerUserData,
@ -18,7 +18,7 @@ void onEnterCommandListAppendLaunchFunction(
int a = 0; int a = 0;
a++; a++;
} }
void onExitCommandListAppendLaunchFunction( void onExitCommandListAppendLaunchKernel(
ze_command_list_append_launch_kernel_params_t *params, ze_command_list_append_launch_kernel_params_t *params,
ze_result_t result, ze_result_t result,
void *pTracerUserData, void *pTracerUserData,
@ -168,8 +168,8 @@ TEST_F(ZeApiTracingCoreTests, WhenCreateTracerAndsetCallbacksAndEnableTracingAnd
zet_core_callbacks_t prologCbs = {}; zet_core_callbacks_t prologCbs = {};
zet_core_callbacks_t epilogCbs = {}; zet_core_callbacks_t epilogCbs = {};
prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchFunction; prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchKernel;
epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchFunction; epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchKernel;
result = zetTracerExpSetPrologues(apiTracerHandle, &prologCbs); result = zetTracerExpSetPrologues(apiTracerHandle, &prologCbs);
EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(ZE_RESULT_SUCCESS, result);