Replace function term with kernel class specific names
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
af7eaa0161
commit
072686f283
|
@ -65,8 +65,8 @@ ze_result_t zeModuleGetKernelNames(
|
|||
ze_result_t zeKernelCreate(
|
||||
ze_module_handle_t hModule,
|
||||
const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) {
|
||||
return L0::Module::fromHandle(hModule)->createKernel(desc, phFunction);
|
||||
ze_kernel_handle_t *kernelHandle) {
|
||||
return L0::Module::fromHandle(hModule)->createKernel(desc, kernelHandle);
|
||||
}
|
||||
|
||||
ze_result_t zeKernelDestroy(
|
||||
|
@ -141,45 +141,45 @@ ze_result_t zeKernelGetProperties(
|
|||
|
||||
ze_result_t zeCommandListAppendLaunchKernel(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
L0::CmdListKernelLaunchParams launchParams = {};
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
|
||||
}
|
||||
|
||||
ze_result_t zeCommandListAppendLaunchCooperativeKernel(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t zeCommandListAppendLaunchKernelIndirect(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(kernelHandle, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t zeCommandListAppendLaunchMultipleKernelsIndirect(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
uint32_t numKernels,
|
||||
ze_kernel_handle_t *phKernels,
|
||||
ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pCountBuffer,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t zeKernelGetName(
|
||||
|
@ -433,15 +433,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName(
|
|||
|
||||
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::zeCommandListAppendLaunchKernel(
|
||||
hCommandList,
|
||||
hKernel,
|
||||
pLaunchFuncArgs,
|
||||
kernelHandle,
|
||||
launchKernelArgs,
|
||||
hSignalEvent,
|
||||
numWaitEvents,
|
||||
phWaitEvents);
|
||||
|
@ -449,15 +449,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
|
|||
|
||||
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::zeCommandListAppendLaunchCooperativeKernel(
|
||||
hCommandList,
|
||||
hKernel,
|
||||
pLaunchFuncArgs,
|
||||
kernelHandle,
|
||||
launchKernelArgs,
|
||||
hSignalEvent,
|
||||
numWaitEvents,
|
||||
phWaitEvents);
|
||||
|
@ -465,14 +465,14 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
|
|||
|
||||
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
ze_kernel_handle_t hKernel,
|
||||
ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
return L0::zeCommandListAppendLaunchKernelIndirect(
|
||||
hCommandList,
|
||||
hKernel,
|
||||
kernelHandle,
|
||||
pLaunchArgumentsBuffer,
|
||||
hSignalEvent,
|
||||
numWaitEvents,
|
||||
|
@ -482,7 +482,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
|
|||
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect(
|
||||
ze_command_list_handle_t hCommandList,
|
||||
uint32_t numKernels,
|
||||
ze_kernel_handle_t *phKernels,
|
||||
ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pCountBuffer,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
|
@ -491,7 +491,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndi
|
|||
return L0::zeCommandListAppendLaunchMultipleKernelsIndirect(
|
||||
hCommandList,
|
||||
numKernels,
|
||||
phKernels,
|
||||
kernelHandles,
|
||||
pCountBuffer,
|
||||
pLaunchArgumentsBuffer,
|
||||
hSignalEvent,
|
||||
|
|
|
@ -30,15 +30,15 @@ CommandList::~CommandList() {
|
|||
if (this->cmdListType == CommandListType::TYPE_REGULAR || !this->isFlushTaskSubmissionEnabled) {
|
||||
removeHostPtrAllocations();
|
||||
}
|
||||
printfFunctionContainer.clear();
|
||||
printfKernelContainer.clear();
|
||||
}
|
||||
|
||||
void CommandList::storePrintfFunction(Kernel *kernel) {
|
||||
auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(),
|
||||
void CommandList::storePrintfKernel(Kernel *kernel) {
|
||||
auto it = std::find(this->printfKernelContainer.begin(), this->printfKernelContainer.end(),
|
||||
kernel);
|
||||
|
||||
if (it == this->printfFunctionContainer.end()) {
|
||||
this->printfFunctionContainer.push_back(kernel);
|
||||
if (it == this->printfKernelContainer.end()) {
|
||||
this->printfKernelContainer.push_back(kernel);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -131,7 +131,7 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
|
|||
}
|
||||
}
|
||||
|
||||
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
|
||||
NEO::PreemptionMode CommandList::obtainKernelPreemptionMode(Kernel *kernel) {
|
||||
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
|
||||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
}
|
||||
|
|
|
@ -93,19 +93,19 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions,
|
||||
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams) = 0;
|
||||
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
|
||||
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) = 0;
|
||||
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels,
|
||||
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
|
||||
|
@ -219,13 +219,13 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
|
||||
void handleIndirectAllocationResidency();
|
||||
|
||||
NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel);
|
||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||
|
||||
std::vector<Kernel *> &getPrintfFunctionContainer() {
|
||||
return this->printfFunctionContainer;
|
||||
std::vector<Kernel *> &getPrintfKernelContainer() {
|
||||
return this->printfKernelContainer;
|
||||
}
|
||||
|
||||
void storePrintfFunction(Kernel *kernel);
|
||||
void storePrintfKernel(Kernel *kernel);
|
||||
void removeDeallocationContainerData();
|
||||
void removeHostPtrAllocations();
|
||||
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
|
@ -279,7 +279,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||
void migrateSharedAllocations();
|
||||
|
||||
ze_context_handle_t hContext = nullptr;
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
CommandQueue *cmdQImmediate = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
Device *device = nullptr;
|
||||
|
|
|
@ -80,22 +80,22 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams) override;
|
||||
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
|
||||
const ze_kernel_handle_t *phKernels,
|
||||
const ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
|
|
|
@ -81,7 +81,7 @@ CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
printfFunctionContainer.clear();
|
||||
printfKernelContainer.clear();
|
||||
removeDeallocationContainerData();
|
||||
removeHostPtrAllocations();
|
||||
commandContainer.reset();
|
||||
|
@ -202,7 +202,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
|||
void CommandListCoreFamily<gfxCoreFamily>::programL3(bool isSLMused) {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t hKernel,
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
|
@ -230,7 +230,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
|||
event = Event::fromHandle(hEvent);
|
||||
}
|
||||
|
||||
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), threadGroupDimensions,
|
||||
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
|
||||
event, launchParams);
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
|
@ -245,8 +245,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
|||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
|
@ -263,12 +263,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
|||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isCooperative = true;
|
||||
return appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pLaunchFuncArgs,
|
||||
return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
|
||||
event, launchParams);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
|
@ -287,7 +287,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
|||
appendEventForProfiling(event, true, false);
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pDispatchArgumentsBuffer,
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
||||
nullptr, launchParams);
|
||||
appendSignalEventPostWalker(event, false);
|
||||
|
||||
|
@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
|
||||
const ze_kernel_handle_t *phKernels,
|
||||
const ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
|
@ -325,7 +325,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
|||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isIndirect = true;
|
||||
launchParams.isPredicate = true;
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(phKernels[i]),
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
|
||||
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
|
||||
nullptr, launchParams);
|
||||
if (ret) {
|
||||
|
@ -563,12 +563,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
|||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY,
|
||||
pDstRegion->depth / groupSizeZ};
|
||||
ze_group_count_t kernelArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY,
|
||||
pDstRegion->depth / groupSizeZ};
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isBuiltInKernel = true;
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
||||
event, numWaitEvents, phWaitEvents,
|
||||
launchParams);
|
||||
}
|
||||
|
@ -689,8 +689,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
|||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
|
||||
pSrcRegion->depth / groupSizeZ};
|
||||
ze_group_count_t kernelArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
|
||||
pSrcRegion->depth / groupSizeZ};
|
||||
|
||||
auto dstAllocationType = allocationStruct.alloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
@ -698,7 +698,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
|||
launchParams.isDestinationAllocationInSystemMemory =
|
||||
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
|
||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
|
||||
event, numWaitEvents, phWaitEvents, launchParams);
|
||||
|
||||
addFlushRequiredCommand(allocationStruct.needsFlush, event);
|
||||
|
@ -803,8 +803,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
|||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
|
||||
srcRegion.depth / groupSizeZ};
|
||||
ze_group_count_t kernelArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
|
||||
srcRegion.depth / groupSizeZ};
|
||||
|
||||
kernel->setArgRedescribedImage(0, hSrcImage);
|
||||
kernel->setArgRedescribedImage(1, hDstImage);
|
||||
|
@ -813,7 +813,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
|||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
launchParams.isBuiltInKernel = true;
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &functionArgs,
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
|
||||
event, numWaitEvents, phWaitEvents,
|
||||
launchParams);
|
||||
}
|
||||
|
@ -907,31 +907,31 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
|
|||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
Kernel *builtinFunction = nullptr;
|
||||
Kernel *builtinKernel = nullptr;
|
||||
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
|
||||
uint32_t groupSizeX = builtinFunction->getImmutableData()
|
||||
uint32_t groupSizeX = builtinKernel->getImmutableData()
|
||||
->getDescriptor()
|
||||
.kernelAttributes.simdSize;
|
||||
uint32_t groupSizeY = 1u;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
|
||||
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) {
|
||||
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
|
||||
builtinFunction->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
|
||||
|
||||
uint64_t elems = size / elementSize;
|
||||
builtinFunction->setArgumentValue(2, sizeof(elems), &elems);
|
||||
builtinFunction->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
|
||||
builtinFunction->setArgumentValue(4, sizeof(srcOffset), &srcOffset);
|
||||
builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
|
||||
builtinKernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
|
||||
builtinKernel->setArgumentValue(4, sizeof(srcOffset), &srcOffset);
|
||||
|
||||
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
|
||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||
ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
|
||||
|
||||
auto dstAllocationType = dstPtrAlloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
@ -942,7 +942,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
|
|||
(dstAllocationType == NEO::AllocationType::SVM_CPU) ||
|
||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -1164,9 +1164,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||
appendEventForProfilingAllWalkers(signalEvent, true);
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && leftSize) {
|
||||
Builtin func = Builtin::CopyBufferToBufferSide;
|
||||
Builtin copyKernel = Builtin::CopyBufferToBufferSide;
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferSideStateless;
|
||||
copyKernel = Builtin::CopyBufferToBufferSideStateless;
|
||||
}
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
|
@ -1179,16 +1179,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
||||
leftSize, 1UL,
|
||||
func,
|
||||
copyKernel,
|
||||
signalEvent,
|
||||
isStateless);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||
Builtin func = Builtin::CopyBufferToBufferMiddle;
|
||||
Builtin copyKernel = Builtin::CopyBufferToBufferMiddle;
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferMiddleStateless;
|
||||
copyKernel = Builtin::CopyBufferToBufferMiddleStateless;
|
||||
}
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
|
@ -1202,16 +1202,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
||||
middleSizeBytes,
|
||||
middleElSize,
|
||||
func,
|
||||
copyKernel,
|
||||
signalEvent,
|
||||
isStateless);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||
Builtin func = Builtin::CopyBufferToBufferSide;
|
||||
Builtin copyKernel = Builtin::CopyBufferToBufferSide;
|
||||
if (isStateless) {
|
||||
func = Builtin::CopyBufferToBufferSideStateless;
|
||||
copyKernel = Builtin::CopyBufferToBufferSideStateless;
|
||||
}
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
|
@ -1224,7 +1224,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
||||
rightSize, 1UL,
|
||||
func,
|
||||
copyKernel,
|
||||
signalEvent,
|
||||
isStateless);
|
||||
}
|
||||
|
@ -1351,19 +1351,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
|||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
|
||||
uint32_t groupSizeX = srcRegion->width;
|
||||
uint32_t groupSizeY = srcRegion->height;
|
||||
uint32_t groupSizeZ = srcRegion->depth;
|
||||
|
||||
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ,
|
||||
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ,
|
||||
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
@ -1373,20 +1373,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
|||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY,
|
||||
srcRegion->depth / groupSizeZ};
|
||||
ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY,
|
||||
srcRegion->depth / groupSizeZ};
|
||||
|
||||
uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY), (srcRegion->originZ)};
|
||||
uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY), (dstRegion->originZ)};
|
||||
uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)};
|
||||
uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)};
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
|
||||
builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinKernel->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
|
||||
builtinKernel->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
|
||||
|
||||
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
@ -1394,7 +1394,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
|||
launchParams.isDestinationAllocationInSystemMemory =
|
||||
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
|
||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, signalEvent, numWaitEvents,
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, signalEvent, numWaitEvents,
|
||||
phWaitEvents, launchParams);
|
||||
}
|
||||
|
||||
|
@ -1414,19 +1414,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
|||
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
|
||||
|
||||
uint32_t groupSizeX = srcRegion->width;
|
||||
uint32_t groupSizeY = srcRegion->height;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
|
||||
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX,
|
||||
&groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX,
|
||||
&groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
@ -1436,17 +1436,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
|||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u};
|
||||
ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u};
|
||||
|
||||
uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)};
|
||||
uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)};
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
|
||||
builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinKernel->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
|
||||
builtinKernel->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
|
||||
|
||||
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
@ -1454,8 +1454,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
|||
launchParams.isDestinationAllocationInSystemMemory =
|
||||
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
|
||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),
|
||||
&dispatchFuncArgs, signalEvent,
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(),
|
||||
&dispatchKernelArgs, signalEvent,
|
||||
numWaitEvents,
|
||||
phWaitEvents,
|
||||
launchParams);
|
||||
|
@ -1473,23 +1473,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, const CmdListKernelLaunchParams &launchParams) {
|
||||
Kernel *builtinFunction = nullptr;
|
||||
Kernel *builtinKernel = nullptr;
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
|
||||
}
|
||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(unalignedSize);
|
||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
|
||||
builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
|
||||
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinKernel->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
auto res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
||||
auto res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelRemainderArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
@ -1569,12 +1569,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
middleSize -= leftRemainder;
|
||||
dstAllocation.offset += leftRemainder;
|
||||
}
|
||||
Kernel *builtinFunction = nullptr;
|
||||
Kernel *builtinKernel = nullptr;
|
||||
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||
}
|
||||
const auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
size_t adjustedSize = middleSize / dataTypeSize;
|
||||
|
@ -1582,7 +1582,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
if (groupSizeX > adjustedSize && adjustedSize > 0) {
|
||||
groupSizeX = adjustedSize;
|
||||
}
|
||||
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
|
||||
if (builtinKernel->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
@ -1590,17 +1590,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
size_t groups = adjustedSize / groupSizeX;
|
||||
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * dataTypeSize +
|
||||
middleSize % dataTypeSize);
|
||||
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||
|
||||
uint32_t value = 0;
|
||||
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinKernel->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, true);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
||||
res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
@ -1614,18 +1614,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
}
|
||||
} else {
|
||||
|
||||
Kernel *builtinFunction = nullptr;
|
||||
Kernel *builtinKernel = nullptr;
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
||||
}
|
||||
size_t middleElSize = sizeof(uint32_t);
|
||||
size_t adjustedSize = size / middleElSize;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
|
||||
uint32_t groups = static_cast<uint32_t>(adjustedSize) / groupSizeX;
|
||||
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * middleElSize +
|
||||
|
@ -1656,15 +1656,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
|
||||
patternAllocOffset += patternSizeToCopy;
|
||||
} while (patternAllocOffset < patternAllocationSize);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
|
||||
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
|
||||
builtinKernel->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, true);
|
||||
|
||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
||||
ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
|
||||
res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
@ -1673,28 +1673,28 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||
uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast<uint32_t>(middleElSize);
|
||||
uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize;
|
||||
|
||||
Kernel *builtinFunctionRemainder;
|
||||
Kernel *builtinKernelRemainder;
|
||||
if (isStateless) {
|
||||
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless);
|
||||
builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless);
|
||||
} else {
|
||||
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover);
|
||||
builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover);
|
||||
}
|
||||
|
||||
builtinFunctionRemainder->setGroupSize(remainingBytes, 1u, 1u);
|
||||
ze_group_count_t dispatchFuncArgs{1u, 1u, 1u};
|
||||
builtinKernelRemainder->setGroupSize(remainingBytes, 1u, 1u);
|
||||
ze_group_count_t dispatchKernelArgs{1u, 1u, 1u};
|
||||
|
||||
builtinFunctionRemainder->setArgBufferWithAlloc(0,
|
||||
dstAllocation.alignedAllocationPtr,
|
||||
dstAllocation.alloc);
|
||||
builtinFunctionRemainder->setArgumentValue(1,
|
||||
sizeof(dstOffsetRemainder),
|
||||
&dstOffsetRemainder);
|
||||
builtinFunctionRemainder->setArgBufferWithAlloc(2,
|
||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
||||
patternGfxAlloc);
|
||||
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(0,
|
||||
dstAllocation.alignedAllocationPtr,
|
||||
dstAllocation.alloc);
|
||||
builtinKernelRemainder->setArgumentValue(1,
|
||||
sizeof(dstOffsetRemainder),
|
||||
&dstOffsetRemainder);
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(2,
|
||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
||||
patternGfxAlloc);
|
||||
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinFunctionRemainder, &dispatchFuncArgs, signalEvent, launchParams);
|
||||
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
@ -2220,20 +2220,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
|||
|
||||
UNRECOVERABLE_IF(!result);
|
||||
|
||||
Kernel *builtinFunction = nullptr;
|
||||
Kernel *builtinKernel = nullptr;
|
||||
auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u;
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
|
||||
if (pOffsets == nullptr) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
|
||||
builtinFunction->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
|
||||
builtinKernel->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
|
||||
} else {
|
||||
auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents, false);
|
||||
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
|
||||
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
|
||||
builtinFunction->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
|
||||
builtinFunction->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
|
||||
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
|
||||
builtinKernel->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
|
||||
offsetValPtr += sizeof(size_t);
|
||||
}
|
||||
|
||||
|
@ -2241,23 +2241,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
|||
uint32_t groupSizeY = 1u;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
|
||||
if (builtinFunction->suggestGroupSize(numEvents, 1u, 1u,
|
||||
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->suggestGroupSize(numEvents, 1u, 1u,
|
||||
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ze_group_count_t dispatchFuncArgs{numEvents / groupSizeX, 1u, 1u};
|
||||
ze_group_count_t dispatchKernelArgs{numEvents / groupSizeX, 1u, 1u};
|
||||
|
||||
auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress());
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
|
||||
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
|
||||
|
||||
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
@ -2265,7 +2265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
|||
launchParams.isDestinationAllocationInSystemMemory =
|
||||
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
|
||||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
|
||||
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
|
||||
auto appendResult = appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, hSignalEvent, numWaitEvents,
|
||||
phWaitEvents, launchParams);
|
||||
if (appendResult != ZE_RESULT_SUCCESS) {
|
||||
return appendResult;
|
||||
|
|
|
@ -44,7 +44,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
appendEventForProfiling(event, true, false);
|
||||
const auto functionImmutableData = kernel->getImmutableData();
|
||||
const auto kernelImmutableData = kernel->getImmutableData();
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
|
||||
|
@ -52,7 +52,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
|
||||
this->setCommandListSLMEnable(slmEnable);
|
||||
|
||||
auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel);
|
||||
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
|
||||
|
||||
kernel->patchGlobalOffset();
|
||||
|
@ -169,14 +169,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
|
||||
appendSignalEventPostWalker(event, false);
|
||||
|
||||
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
|
||||
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = kernel->getResidencyContainer();
|
||||
for (auto resource : residencyContainer) {
|
||||
commandContainer.addToResidencyContainer(resource);
|
||||
}
|
||||
|
||||
if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
|
||||
storePrintfFunction(kernel);
|
||||
if (kernelImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
|
||||
storePrintfKernel(kernel);
|
||||
}
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
|
|
|
@ -22,13 +22,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||
|
||||
using BaseClass::BaseClass;
|
||||
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams) override;
|
||||
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override;
|
||||
|
|
|
@ -163,7 +163,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
||||
ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions,
|
||||
ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams) {
|
||||
|
||||
|
@ -171,7 +171,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
|||
checkAvailableSpace();
|
||||
}
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(hKernel, threadGroupDimensions,
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents,
|
||||
launchParams);
|
||||
return flushImmediate(ret, true);
|
||||
|
@ -179,13 +179,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
|||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
|
||||
ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace();
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer,
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return flushImmediate(ret, true);
|
||||
}
|
||||
|
|
|
@ -134,7 +134,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
|
||||
UNRECOVERABLE_IF(kernel == nullptr);
|
||||
const auto functionImmutableData = kernel->getImmutableData();
|
||||
const auto kernelImmutableData = kernel->getImmutableData();
|
||||
auto &kernelDescriptor = kernel->getKernelDescriptor();
|
||||
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
@ -142,8 +142,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
|
||||
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
|
||||
|
||||
auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode);
|
||||
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
|
||||
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
|
||||
|
||||
kernel->patchGlobalOffset();
|
||||
if (launchParams.isIndirect && threadGroupDimensions) {
|
||||
|
@ -287,9 +287,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
// Attach Function residency to our CommandList residency
|
||||
// Attach kernel residency to our CommandList residency
|
||||
{
|
||||
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
|
||||
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = kernel->getResidencyContainer();
|
||||
for (auto resource : residencyContainer) {
|
||||
commandContainer.addToResidencyContainer(resource);
|
||||
|
@ -299,7 +299,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||
// Store PrintfBuffer from a kernel
|
||||
{
|
||||
if (kernelDescriptor.kernelAttributes.flags.usesPrintf) {
|
||||
storePrintfFunction(kernel);
|
||||
storePrintfKernel(kernel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -152,16 +152,16 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
|
|||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
void CommandQueueImp::printFunctionsPrintfOutput() {
|
||||
size_t size = this->printfFunctionContainer.size();
|
||||
void CommandQueueImp::printKernelsPrintfOutput() {
|
||||
size_t size = this->printfKernelContainer.size();
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
this->printfFunctionContainer[i]->printPrintfOutput();
|
||||
this->printfKernelContainer[i]->printPrintfOutput();
|
||||
}
|
||||
this->printfFunctionContainer.clear();
|
||||
this->printfKernelContainer.clear();
|
||||
}
|
||||
|
||||
void CommandQueueImp::postSyncOperations() {
|
||||
printFunctionsPrintfOutput();
|
||||
printKernelsPrintfOutput();
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
|
||||
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());
|
||||
|
|
|
@ -909,9 +909,9 @@ void CommandQueueHw<gfxCoreFamily>::collectPrintfContentsFromAllCommandsLists(
|
|||
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
this->printfFunctionContainer.insert(this->printfFunctionContainer.end(),
|
||||
commandList->getPrintfFunctionContainer().begin(),
|
||||
commandList->getPrintfFunctionContainer().end());
|
||||
this->printfKernelContainer.insert(this->printfKernelContainer.end(),
|
||||
commandList->getPrintfKernelContainer().begin(),
|
||||
commandList->getPrintfKernelContainer().end());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -87,14 +87,14 @@ struct CommandQueueImp : public CommandQueue {
|
|||
|
||||
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout);
|
||||
|
||||
void printFunctionsPrintfOutput();
|
||||
void printKernelsPrintfOutput();
|
||||
|
||||
void postSyncOperations();
|
||||
|
||||
CommandBufferManager buffers;
|
||||
NEO::HeapContainer heapContainer;
|
||||
ze_command_queue_desc_t desc;
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
|
||||
Device *device = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
|
|
|
@ -33,7 +33,7 @@ struct Module : _ze_module_handle_t {
|
|||
virtual Device *getDevice() const = 0;
|
||||
|
||||
virtual ze_result_t createKernel(const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) = 0;
|
||||
ze_kernel_handle_t *kernelHandle) = 0;
|
||||
virtual ze_result_t destroy() = 0;
|
||||
virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0;
|
||||
virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0;
|
||||
|
@ -45,7 +45,7 @@ struct Module : _ze_module_handle_t {
|
|||
ze_module_handle_t *phModules,
|
||||
ze_module_build_log_handle_t *phLinkLog) = 0;
|
||||
|
||||
virtual const KernelImmutableData *getKernelImmutableData(const char *functionName) const = 0;
|
||||
virtual const KernelImmutableData *getKernelImmutableData(const char *kernelName) const = 0;
|
||||
virtual const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const = 0;
|
||||
virtual uint32_t getMaxGroupSize() const = 0;
|
||||
virtual bool isDebugEnabled() const = 0;
|
||||
|
|
|
@ -664,9 +664,9 @@ void ModuleImp::passDebugData() {
|
|||
}
|
||||
}
|
||||
|
||||
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const {
|
||||
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelName) const {
|
||||
for (auto &kernelImmData : kernelImmDatas) {
|
||||
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) {
|
||||
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(kernelName) == 0) {
|
||||
return kernelImmData.get();
|
||||
}
|
||||
}
|
||||
|
@ -742,7 +742,7 @@ void ModuleImp::updateBuildLog(NEO::Device *neoDevice) {
|
|||
}
|
||||
|
||||
ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) {
|
||||
ze_kernel_handle_t *kernelHandle) {
|
||||
ze_result_t res;
|
||||
if (!isFullyLinked) {
|
||||
return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED;
|
||||
|
@ -750,7 +750,7 @@ ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
|
|||
auto kernel = Kernel::create(productFamily, this, desc, &res);
|
||||
|
||||
if (res == ZE_RESULT_SUCCESS) {
|
||||
*phFunction = kernel->toHandle();
|
||||
*kernelHandle = kernel->toHandle();
|
||||
}
|
||||
|
||||
return res;
|
||||
|
|
|
@ -89,7 +89,7 @@ struct ModuleImp : public Module {
|
|||
ze_result_t destroy() override;
|
||||
|
||||
ze_result_t createKernel(const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) override;
|
||||
ze_kernel_handle_t *kernelHandle) override;
|
||||
|
||||
ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override;
|
||||
|
||||
|
@ -107,7 +107,7 @@ struct ModuleImp : public Module {
|
|||
|
||||
ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override;
|
||||
|
||||
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override;
|
||||
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
|
||||
|
||||
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
|||
~MockModule() override {
|
||||
}
|
||||
|
||||
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
|
||||
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
|
||||
return mockKernelImmData;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
void MultiTileCommandListAppendLaunchFunctionFixture::setUp() {
|
||||
void MultiTileCommandListAppendLaunchKernelFixture::setUp() {
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
|
||||
MultiDeviceFixture::numRootDevices = 1u;
|
||||
|
@ -35,14 +35,14 @@ void MultiTileCommandListAppendLaunchFunctionFixture::setUp() {
|
|||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
}
|
||||
|
||||
void MultiTileCommandListAppendLaunchFunctionFixture::tearDown() {
|
||||
void MultiTileCommandListAppendLaunchKernelFixture::tearDown() {
|
||||
commandList->destroy();
|
||||
contextImp->destroy();
|
||||
|
||||
MultiDeviceModuleFixture::tearDown();
|
||||
}
|
||||
|
||||
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() {
|
||||
void MultiTileImmediateCommandListAppendLaunchKernelFixture::setUp() {
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
|
||||
MultiDeviceFixture::numRootDevices = 1u;
|
||||
|
@ -61,7 +61,7 @@ void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() {
|
|||
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
|
||||
}
|
||||
|
||||
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::tearDown() {
|
||||
void MultiTileImmediateCommandListAppendLaunchKernelFixture::tearDown() {
|
||||
contextImp->destroy();
|
||||
|
||||
MultiDeviceModuleFixture::tearDown();
|
||||
|
|
|
@ -18,7 +18,7 @@ struct Device;
|
|||
|
||||
namespace ult {
|
||||
|
||||
struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
|
||||
struct MultiTileCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
|
||||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
|
@ -28,7 +28,7 @@ struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModul
|
|||
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
|
||||
};
|
||||
|
||||
struct MultiTileImmediateCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
|
||||
struct MultiTileImmediateCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
|
||||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
|
|
|
@ -27,9 +27,9 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
|
|||
void SetUp() override {
|
||||
DeviceFixture::setUp();
|
||||
|
||||
dispatchFunctionArguments.groupCountX = 1u;
|
||||
dispatchFunctionArguments.groupCountY = 2u;
|
||||
dispatchFunctionArguments.groupCountZ = 3u;
|
||||
dispatchKernelArguments.groupCountX = 1u;
|
||||
dispatchKernelArguments.groupCountY = 2u;
|
||||
dispatchKernelArguments.groupCountZ = 3u;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
@ -44,18 +44,18 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
|
|||
}
|
||||
|
||||
std::vector<void *> isaBuffers;
|
||||
ze_group_count_t dispatchFunctionArguments;
|
||||
ze_group_count_t dispatchKernelArguments;
|
||||
void *buffer = nullptr;
|
||||
|
||||
void initializeFunction(WhiteBox<::L0::Kernel> &function,
|
||||
WhiteBox<::L0::KernelImmutableData> &functionData,
|
||||
L0::Device *device) {
|
||||
void initializeKernel(WhiteBox<::L0::Kernel> &kernel,
|
||||
WhiteBox<::L0::KernelImmutableData> &kernelData,
|
||||
L0::Device *device) {
|
||||
|
||||
uint32_t isaSize = 4096;
|
||||
void *isaBuffer = malloc(isaSize);
|
||||
isaBuffers.push_back(isaBuffer);
|
||||
|
||||
functionData.device = device;
|
||||
kernelData.device = device;
|
||||
if (!buffer) {
|
||||
buffer = alignedMalloc(isaSize, 64);
|
||||
}
|
||||
|
@ -70,22 +70,22 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
|
|||
if (isaBuffer != nullptr) {
|
||||
memcpy_s(allocation->getUnderlyingBuffer(), allocation->getUnderlyingBufferSize(), isaBuffer, isaSize);
|
||||
}
|
||||
functionData.isaGraphicsAllocation.reset(allocation);
|
||||
kernelData.isaGraphicsAllocation.reset(allocation);
|
||||
|
||||
uint32_t crossThreadDataSize = 128;
|
||||
|
||||
function.crossThreadData.reset(new uint8_t[crossThreadDataSize]);
|
||||
function.crossThreadDataSize = crossThreadDataSize;
|
||||
kernel.crossThreadData.reset(new uint8_t[crossThreadDataSize]);
|
||||
kernel.crossThreadDataSize = crossThreadDataSize;
|
||||
|
||||
uint32_t perThreadDataSize = 128;
|
||||
|
||||
function.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32));
|
||||
function.perThreadDataSize = perThreadDataSize;
|
||||
kernel.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32));
|
||||
kernel.perThreadDataSize = perThreadDataSize;
|
||||
|
||||
function.kernelImmData = &functionData;
|
||||
kernel.kernelImmData = &kernelData;
|
||||
}
|
||||
void cleanupFunction(WhiteBox<::L0::KernelImmutableData> &functionData) {
|
||||
functionData.isaGraphicsAllocation.reset(nullptr);
|
||||
void cleanupKernel(WhiteBox<::L0::KernelImmutableData> &kernelData) {
|
||||
kernelData.isaGraphicsAllocation.reset(nullptr);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -102,99 +102,99 @@ GEN9TEST_F(CommandListCreateGen9, WhenGettingCommandListPreemptionModeThenMatche
|
|||
}
|
||||
|
||||
GEN9TEST_F(CommandListCreateGen9, GivenDisabledMidThreadPreemptionWhenLaunchingKernelThenThreadGroupModeSet) {
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {};
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> functionThreadGroup;
|
||||
kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> kernelThreadGroup;
|
||||
|
||||
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
|
||||
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device);
|
||||
initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(functionThreadGroup.toHandle(),
|
||||
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
|
||||
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
|
||||
|
||||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
|
||||
cleanupFunction(funcInfoThreadGroupData);
|
||||
cleanupKernel(kernelInfoThreadGroupData);
|
||||
delete commandList;
|
||||
}
|
||||
|
||||
GEN9TEST_F(CommandListCreateGen9, GivenUsesFencesForReadWriteImagesWhenLaunchingKernelThenMidBatchModeSet) {
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfoMidBatchData = {};
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfoMidBatchData = {};
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
funcInfoMidBatchData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> functionMidBatch;
|
||||
kernelInfoMidBatchData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> kernelMidBatch;
|
||||
|
||||
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1;
|
||||
kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1;
|
||||
|
||||
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waDisableLSQCROPERFforOCL = true;
|
||||
|
||||
initializeFunction(functionMidBatch, funcInfoMidBatchData, device);
|
||||
initializeKernel(kernelMidBatch, kernelInfoMidBatchData, device);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(functionMidBatch.toHandle(),
|
||||
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernelMidBatch.toHandle(),
|
||||
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
|
||||
|
||||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
|
||||
cleanupFunction(funcInfoMidBatchData);
|
||||
cleanupKernel(kernelInfoMidBatchData);
|
||||
delete commandList;
|
||||
}
|
||||
|
||||
GEN9TEST_F(CommandListCreateGen9, WhenCommandListHasLowerPreemptionLevelThenDoNotIncreaseAgain) {
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {};
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> functionThreadGroup;
|
||||
kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
|
||||
WhiteBox<::L0::Kernel> kernelThreadGroup;
|
||||
|
||||
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
|
||||
|
||||
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device);
|
||||
initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
|
||||
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfoMidThreadData = {};
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfoMidThreadData = {};
|
||||
NEO::KernelDescriptor kernelDescriptor2;
|
||||
funcInfoMidThreadData.kernelDescriptor = &kernelDescriptor2;
|
||||
kernelInfoMidThreadData.kernelDescriptor = &kernelDescriptor2;
|
||||
|
||||
WhiteBox<::L0::Kernel> functionMidThread;
|
||||
WhiteBox<::L0::Kernel> kernelMidThread;
|
||||
|
||||
initializeFunction(functionMidThread, funcInfoMidThreadData, device);
|
||||
initializeKernel(kernelMidThread, kernelInfoMidThreadData, device);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(functionThreadGroup.toHandle(),
|
||||
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
|
||||
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
|
||||
|
||||
commandList->appendLaunchKernel(functionMidThread.toHandle(),
|
||||
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernelMidThread.toHandle(),
|
||||
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
|
||||
|
||||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
|
||||
cleanupFunction(funcInfoThreadGroupData);
|
||||
cleanupFunction(funcInfoMidThreadData);
|
||||
cleanupKernel(kernelInfoThreadGroupData);
|
||||
cleanupKernel(kernelInfoMidThreadData);
|
||||
delete commandList;
|
||||
}
|
||||
} // namespace ult
|
||||
|
|
|
@ -81,23 +81,23 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
|||
}
|
||||
|
||||
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
|
||||
const ze_kernel_handle_t *phKernels,
|
||||
const ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override {
|
||||
appendEventMultipleKernelIndirectEventHandleValue = hEvent;
|
||||
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pNumLaunchArguments, pLaunchArgumentsBuffer,
|
||||
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pNumLaunchArguments, pLaunchArgumentsBuffer,
|
||||
hEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) override {
|
||||
appendEventKernelIndirectEventHandleValue = hEvent;
|
||||
return BaseClass::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer,
|
||||
return BaseClass::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
||||
hEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
|
@ -163,29 +163,29 @@ struct MockCommandList : public CommandList {
|
|||
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(ze_kernel_handle_t hFunction,
|
||||
(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams));
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(ze_kernel_handle_t hKernel,
|
||||
const ze_group_count_t *pLaunchFuncArgs,
|
||||
(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *launchKernelArgs,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents));
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(ze_kernel_handle_t hFunction,
|
||||
(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *pDispatchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents));
|
||||
|
||||
ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
(uint32_t numFunctions,
|
||||
const ze_kernel_handle_t *phFunctions,
|
||||
(uint32_t numKernels,
|
||||
const ze_kernel_handle_t *kernelHandles,
|
||||
const uint32_t *pNumLaunchArguments,
|
||||
const ze_group_count_t *pLaunchArgumentsBuffer,
|
||||
ze_event_handle_t hEvent,
|
||||
|
@ -505,14 +505,14 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo
|
|||
|
||||
public:
|
||||
CmdListHelper cmdListHelper;
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents,
|
||||
const CmdListKernelLaunchParams &launchParams) override {
|
||||
|
||||
const auto kernel = Kernel::fromHandle(hKernel);
|
||||
const auto kernel = Kernel::fromHandle(kernelHandle);
|
||||
cmdListHelper.isaAllocation = kernel->getIsaAllocation();
|
||||
cmdListHelper.residencyContainer = kernel->getResidencyContainer();
|
||||
cmdListHelper.groupSize = kernel->getGroupSize();
|
||||
|
|
|
@ -28,7 +28,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
|||
using BaseClass::csr;
|
||||
using BaseClass::device;
|
||||
using BaseClass::preemptionCmdSyncProgramming;
|
||||
using BaseClass::printfFunctionContainer;
|
||||
using BaseClass::printfKernelContainer;
|
||||
using BaseClass::submitBatchBuffer;
|
||||
using BaseClass::synchronizeByPollingForTaskCount;
|
||||
using BaseClass::taskCount;
|
||||
|
@ -62,7 +62,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
|||
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
|
||||
using BaseClass::commandStream;
|
||||
using BaseClass::prepareAndSubmitBatchBuffer;
|
||||
using BaseClass::printfFunctionContainer;
|
||||
using BaseClass::printfKernelContainer;
|
||||
using L0::CommandQueue::activeSubDevices;
|
||||
using L0::CommandQueue::internalUsage;
|
||||
using L0::CommandQueue::multiReturnPointCommandList;
|
||||
|
|
|
@ -20,9 +20,9 @@ struct MockDeviceForRebuildBuilins : public Mock<DeviceImp> {
|
|||
MockModuleForRebuildBuiltins(Device *device, ModuleType type) : ModuleImp(device, nullptr, type) {}
|
||||
|
||||
ze_result_t createKernel(const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) override {
|
||||
ze_kernel_handle_t *kernelHandle) override {
|
||||
|
||||
*phFunction = new Mock<Kernel>();
|
||||
*kernelHandle = new Mock<Kernel>();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -43,11 +43,11 @@ struct Mock<Module> : public Module {
|
|||
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) {}
|
||||
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){};
|
||||
|
||||
ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction));
|
||||
ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *kernelHandle));
|
||||
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
|
||||
ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction));
|
||||
ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary));
|
||||
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *functionName));
|
||||
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *kernelName));
|
||||
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ());
|
||||
ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames));
|
||||
ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS,
|
||||
|
@ -91,7 +91,7 @@ struct MockModule : public L0::ModuleImp {
|
|||
|
||||
~MockModule() override = default;
|
||||
|
||||
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
|
||||
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
|
||||
return kernelImmData;
|
||||
}
|
||||
|
||||
|
|
|
@ -200,8 +200,8 @@ HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenRebuildPrecompiledKernelsDebug
|
|||
MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {}
|
||||
|
||||
ze_result_t createKernel(const ze_kernel_desc_t *desc,
|
||||
ze_kernel_handle_t *phFunction) override {
|
||||
*phFunction = nullptr;
|
||||
ze_kernel_handle_t *kernelHandle) override {
|
||||
*kernelHandle = nullptr;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -262,14 +262,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchMultipl
|
|||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
const ze_kernel_handle_t launchFn = kernel->toHandle();
|
||||
const ze_kernel_handle_t launchKernels = kernel->toHandle();
|
||||
uint32_t *numLaunchArgs;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
returnValue = context->allocDeviceMem(
|
||||
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
|
||||
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_EQ(event->toHandle(), commandList->appendEventMultipleKernelIndirectEventHandleValue);
|
||||
|
||||
|
|
|
@ -72,11 +72,11 @@ TEST(zeCommandListAppendWriteGlobalTimestamp, whenCalledThenRedirectedToObject)
|
|||
TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) {
|
||||
MockCommandList commandList;
|
||||
Mock<::L0::Kernel> kernel;
|
||||
ze_group_count_t dispatchFunctionArguments;
|
||||
ze_group_count_t dispatchKernelArguments;
|
||||
|
||||
auto result =
|
||||
zeCommandListAppendLaunchKernel(commandList.toHandle(), kernel.toHandle(),
|
||||
&dispatchFunctionArguments, nullptr, 0, nullptr);
|
||||
&dispatchKernelArguments, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
TEST(zeCommandListAppendEventReset, whenCalledThenRedirectedToObject) {
|
||||
|
|
|
@ -193,8 +193,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
|||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
|
||||
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]);
|
||||
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
|
||||
EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
|
||||
|
@ -208,12 +208,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
|||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
|
||||
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]);
|
||||
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
|
||||
EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
|
||||
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
|
||||
|
@ -617,8 +617,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenCommandListWhenResetCalledThenState
|
|||
commandList->commandContainer.getResidencyContainer().size());
|
||||
ASSERT_EQ(commandListControl->commandContainer.getDeallocationContainer().size(),
|
||||
commandList->commandContainer.getDeallocationContainer().size());
|
||||
ASSERT_EQ(commandListControl->getPrintfFunctionContainer().size(),
|
||||
commandList->getPrintfFunctionContainer().size());
|
||||
ASSERT_EQ(commandListControl->getPrintfKernelContainer().size(),
|
||||
commandList->getPrintfKernelContainer().size());
|
||||
ASSERT_EQ(commandListControl->commandContainer.getCommandStream()->getUsed(), commandList->commandContainer.getCommandStream()->getUsed());
|
||||
ASSERT_EQ(commandListControl->commandContainer.slmSize, commandList->commandContainer.slmSize);
|
||||
|
||||
|
|
|
@ -1276,9 +1276,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
|
|||
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
|
||||
}
|
||||
|
||||
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionFixture>;
|
||||
using MultiTileCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileCommandListAppendLaunchKernelFixture>;
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTes
|
|||
EXPECT_EQ(4u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
auto commandListWithNonCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
|
@ -1342,7 +1342,7 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
|
|||
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest,
|
||||
HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
|
||||
givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
|
|
@ -128,12 +128,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
|
|||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
const ze_kernel_handle_t launchFn = kernel->toHandle();
|
||||
const ze_kernel_handle_t launchKernels = kernel->toHandle();
|
||||
uint32_t *numLaunchArgs;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(
|
||||
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
|
||||
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr);
|
||||
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
*numLaunchArgs = 0;
|
||||
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
@ -156,13 +156,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
|
|||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
|
||||
const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
|
||||
const ze_kernel_handle_t launchKernels[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
|
||||
uint32_t *numLaunchArgs;
|
||||
const uint32_t numKernels = 3;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
auto result = context->allocDeviceMem(
|
||||
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
|
||||
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr);
|
||||
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
*numLaunchArgs = 2;
|
||||
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
|
||||
|
@ -710,9 +710,9 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
|
|||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
using MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchFunctionFixture>;
|
||||
using MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchKernelFixture>;
|
||||
|
||||
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
@ -775,7 +775,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
|
|||
EXPECT_EQ(cmdList.end(), itorSemaphoreWait);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
|
||||
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
|
|
@ -63,7 +63,7 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) {
|
|||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfFunctionsStoredAndClearsFunctionContainer) {
|
||||
TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfKernelsStoredAndClearsKernelContainer) {
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
|
@ -76,12 +76,12 @@ TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutput
|
|||
|
||||
Mock<Kernel> kernel1, kernel2;
|
||||
|
||||
commandQueue->printfFunctionContainer.push_back(&kernel1);
|
||||
commandQueue->printfFunctionContainer.push_back(&kernel2);
|
||||
commandQueue->printfKernelContainer.push_back(&kernel1);
|
||||
commandQueue->printfKernelContainer.push_back(&kernel2);
|
||||
|
||||
commandQueue->synchronizeByPollingForTaskCount(0u);
|
||||
|
||||
EXPECT_EQ(0u, commandQueue->printfFunctionContainer.size());
|
||||
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
|
||||
EXPECT_EQ(1u, kernel1.printPrintfOutputCalledTimes);
|
||||
EXPECT_EQ(1u, kernel2.printPrintfOutputCalledTimes);
|
||||
|
||||
|
@ -255,9 +255,9 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
|
|||
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
|
||||
ze_group_count_t dispatchKernelArguments{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
const size_t numHandles = 100;
|
||||
ze_command_list_handle_t cmdListHandles[numHandles];
|
||||
|
@ -305,9 +305,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperWhenExecutingThenMergeStates
|
|||
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
commandList->nonImmediateLogicalStateHelper.reset(mockCmdListLogicalStateHelper);
|
||||
|
||||
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
|
||||
ze_group_count_t dispatchKernelArguments{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
|
||||
|
||||
|
@ -341,9 +341,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExecu
|
|||
|
||||
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)));
|
||||
|
||||
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
|
||||
ze_group_count_t dispatchKernelArguments{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
|
||||
|
||||
|
@ -367,9 +367,9 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
|
|||
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
|
||||
ze_group_count_t dispatchKernelArguments{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};
|
||||
|
||||
|
|
|
@ -745,9 +745,9 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
|
|||
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
kernel.residencyContainer.push_back(gpuAlloc);
|
||||
|
||||
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
|
||||
ze_group_count_t dispatchKernelArguments{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
|
||||
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
|
||||
auto deviceImp = static_cast<DeviceImp *>(device);
|
||||
auto pageFaultCmdQueue = whiteboxCast(deviceImp->pageFaultCommandList->cmdQImmediate);
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
target_sources(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_function.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_2.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp
|
||||
)
|
||||
|
|
|
@ -20,26 +20,26 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
|
|||
uint32_t *crossThreadData =
|
||||
reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32));
|
||||
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfo = {};
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t);
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t);
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t);
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t);
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t);
|
||||
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t);
|
||||
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t);
|
||||
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
|
||||
function.crossThreadDataSize = sizeof(uint32_t[6]);
|
||||
function.groupSize[0] = 2;
|
||||
function.groupSize[1] = 3;
|
||||
function.groupSize[2] = 5;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
|
||||
kernel.crossThreadDataSize = sizeof(uint32_t[6]);
|
||||
kernel.groupSize[0] = 2;
|
||||
kernel.groupSize[1] = 3;
|
||||
kernel.groupSize[2] = 5;
|
||||
|
||||
function.KernelImp::setGroupCount(7, 11, 13);
|
||||
auto crossThread = function.KernelImp::getCrossThreadData();
|
||||
kernel.KernelImp::setGroupCount(7, 11, 13);
|
||||
auto crossThread = kernel.KernelImp::getCrossThreadData();
|
||||
ASSERT_NE(nullptr, crossThread);
|
||||
const uint32_t *globalWorkSizes = reinterpret_cast<const uint32_t *>(crossThread);
|
||||
EXPECT_EQ(2U * 7U, globalWorkSizes[0]);
|
||||
|
@ -51,30 +51,30 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
|
|||
EXPECT_EQ(11U, numGroups[1]);
|
||||
EXPECT_EQ(13U, numGroups[2]);
|
||||
|
||||
function.crossThreadData.release();
|
||||
kernel.crossThreadData.release();
|
||||
alignedFree(crossThreadData);
|
||||
}
|
||||
|
||||
TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) {
|
||||
NEO::KernelDescriptor descriptor = {};
|
||||
WhiteBox<KernelImmutableData> funcInfo = {};
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
WhiteBox<KernelImmutableData> kernelInfo = {};
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
|
||||
Mock<Module> module(device, nullptr);
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.module = &module;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.module = &module;
|
||||
|
||||
const std::array<uint32_t, 4> testedSimd = {{1, 8, 16, 32}};
|
||||
|
||||
for (auto simd : testedSimd) {
|
||||
descriptor.kernelAttributes.simdSize = simd;
|
||||
function.KernelImp::setGroupSize(simd, 1, 1);
|
||||
kernel.KernelImp::setGroupSize(simd, 1, 1);
|
||||
|
||||
if (simd == 1) {
|
||||
EXPECT_EQ(maxNBitValue(32), function.KernelImp::getThreadExecutionMask());
|
||||
EXPECT_EQ(maxNBitValue(32), kernel.KernelImp::getThreadExecutionMask());
|
||||
} else {
|
||||
EXPECT_EQ(maxNBitValue(simd), function.KernelImp::getThreadExecutionMask());
|
||||
EXPECT_EQ(maxNBitValue(simd), kernel.KernelImp::getThreadExecutionMask());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -82,20 +82,20 @@ TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSe
|
|||
TEST_F(KernelImp, WhenSuggestingGroupSizeThenClampToMaxGroupSize) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
WhiteBox<KernelImmutableData> funcInfo = {};
|
||||
WhiteBox<KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
|
||||
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
|
||||
Mock<Module> module(device, nullptr);
|
||||
module.getMaxGroupSizeResult = 8;
|
||||
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.module = &module;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.module = &module;
|
||||
uint32_t groupSize[3];
|
||||
function.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(8U, groupSize[0]);
|
||||
EXPECT_EQ(1U, groupSize[1]);
|
||||
EXPECT_EQ(1U, groupSize[2]);
|
||||
|
@ -118,9 +118,9 @@ INSTANTIATE_TEST_CASE_P(, KernelImpSuggestGroupSize,
|
|||
TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
WhiteBox<KernelImmutableData> funcInfo = {};
|
||||
WhiteBox<KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
|
||||
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
|
||||
|
||||
|
@ -128,105 +128,105 @@ TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen)
|
|||
|
||||
uint32_t size = GetParam();
|
||||
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.module = &module;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.module = &module;
|
||||
uint32_t groupSize[3];
|
||||
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, size % groupSize[0]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[1]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, size % groupSize[0]);
|
||||
EXPECT_EQ(0U, size % groupSize[1]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1,
|
||||
groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1,
|
||||
groupSize + 2);
|
||||
EXPECT_EQ(0U, size % groupSize[0]);
|
||||
EXPECT_EQ(0U, size % groupSize[1]);
|
||||
EXPECT_EQ(0U, size % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, size % groupSize[0]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[1]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, 1U % groupSize[0]);
|
||||
EXPECT_EQ(0U, size % groupSize[1]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, 1U % groupSize[0]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[1]);
|
||||
EXPECT_EQ(0U, size % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, 1U % groupSize[0]);
|
||||
EXPECT_EQ(0U, size % groupSize[1]);
|
||||
EXPECT_EQ(0U, size % groupSize[2]);
|
||||
|
||||
function.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
kernel.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2);
|
||||
EXPECT_EQ(0U, size % groupSize[0]);
|
||||
EXPECT_EQ(0U, 1U % groupSize[1]);
|
||||
EXPECT_EQ(0U, size % groupSize[2]);
|
||||
}
|
||||
|
||||
TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {
|
||||
Mock<Kernel> function;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(0U, 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 0U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 1U, 0U));
|
||||
Mock<Kernel> kernel;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(0U, 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 0U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 1U, 0U));
|
||||
}
|
||||
|
||||
TEST_F(KernelImp, givenSetGroupSizeWithGreaterGroupSizeThanAllowedThenCorrectErrorCodeIsReturned) {
|
||||
WhiteBox<KernelImmutableData> funcInfo = {};
|
||||
WhiteBox<KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
|
||||
Mock<Module> module(device, nullptr);
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.module = &module;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.module = &module;
|
||||
|
||||
uint32_t maxGroupSizeX = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[0]);
|
||||
uint32_t maxGroupSizeY = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[1]);
|
||||
uint32_t maxGroupSizeZ = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[2]);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U));
|
||||
}
|
||||
|
||||
TEST_F(KernelImp, GivenNumChannelsZeroWhenSettingGroupSizeThenLocalIdsNotGenerated) {
|
||||
WhiteBox<KernelImmutableData> funcInfo = {};
|
||||
WhiteBox<KernelImmutableData> kernelInfo = {};
|
||||
NEO::KernelDescriptor descriptor;
|
||||
funcInfo.kernelDescriptor = &descriptor;
|
||||
kernelInfo.kernelDescriptor = &descriptor;
|
||||
|
||||
Mock<Module> module(device, nullptr);
|
||||
Mock<Kernel> function;
|
||||
function.kernelImmData = &funcInfo;
|
||||
function.module = &module;
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
kernel.module = &module;
|
||||
|
||||
function.KernelImp::setGroupSize(16U, 16U, 1U);
|
||||
kernel.KernelImp::setGroupSize(16U, 16U, 1U);
|
||||
std::vector<char> memBefore;
|
||||
{
|
||||
auto perThreadData =
|
||||
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData());
|
||||
reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
|
||||
memBefore.assign(perThreadData,
|
||||
perThreadData + function.KernelImp::getPerThreadDataSize());
|
||||
perThreadData + kernel.KernelImp::getPerThreadDataSize());
|
||||
}
|
||||
|
||||
function.KernelImp::setGroupSize(8U, 32U, 1U);
|
||||
kernel.KernelImp::setGroupSize(8U, 32U, 1U);
|
||||
std::vector<char> memAfter;
|
||||
{
|
||||
auto perThreadData =
|
||||
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData());
|
||||
reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
|
||||
memAfter.assign(perThreadData,
|
||||
perThreadData + function.KernelImp::getPerThreadDataSize());
|
||||
perThreadData + kernel.KernelImp::getPerThreadDataSize());
|
||||
}
|
||||
|
||||
EXPECT_EQ(memAfter, memBefore);
|
||||
|
@ -253,12 +253,12 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
|
|||
uint32_t dssCount;
|
||||
uint32_t availableSlm;
|
||||
uint32_t maxBarrierCount;
|
||||
WhiteBox<::L0::KernelImmutableData> funcInfo;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo;
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
|
||||
void SetUp() override {
|
||||
KernelImp::SetUp();
|
||||
funcInfo.kernelDescriptor = &kernelDescriptor;
|
||||
kernelInfo.kernelDescriptor = &kernelDescriptor;
|
||||
auto &hardwareInfo = device->getHwInfo();
|
||||
auto &hwHelper = device->getHwHelper();
|
||||
availableThreadCount = hwHelper.calculateAvailableThreadCount(hardwareInfo, numGrf);
|
||||
|
@ -270,16 +270,16 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
|
|||
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
|
||||
maxBarrierCount = static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
funcInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
funcInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
}
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
funcInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &funcInfo;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
kernel.module = module.get();
|
||||
|
|
@ -529,7 +529,7 @@ HWTEST_F(ModuleTest, GivenIncorrectNameWhenCreatingKernelThenResultErrorInvalidA
|
|||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "nonexistent_function";
|
||||
kernelDesc.pKernelName = "nonexistent_kernel";
|
||||
|
||||
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
void onEnterCommandListAppendLaunchFunction(
|
||||
void onEnterCommandListAppendLaunchKernel(
|
||||
ze_command_list_append_launch_kernel_params_t *params,
|
||||
ze_result_t result,
|
||||
void *pTracerUserData,
|
||||
|
@ -18,7 +18,7 @@ void onEnterCommandListAppendLaunchFunction(
|
|||
int a = 0;
|
||||
a++;
|
||||
}
|
||||
void onExitCommandListAppendLaunchFunction(
|
||||
void onExitCommandListAppendLaunchKernel(
|
||||
ze_command_list_append_launch_kernel_params_t *params,
|
||||
ze_result_t result,
|
||||
void *pTracerUserData,
|
||||
|
@ -168,8 +168,8 @@ TEST_F(ZeApiTracingCoreTests, WhenCreateTracerAndsetCallbacksAndEnableTracingAnd
|
|||
zet_core_callbacks_t prologCbs = {};
|
||||
zet_core_callbacks_t epilogCbs = {};
|
||||
|
||||
prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchFunction;
|
||||
epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchFunction;
|
||||
prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchKernel;
|
||||
epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchKernel;
|
||||
|
||||
result = zetTracerExpSetPrologues(apiTracerHandle, &prologCbs);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
|
Loading…
Reference in New Issue