Replace function term with kernel class specific names

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-09-09 22:29:39 +00:00 committed by Compute-Runtime-Automation
parent af7eaa0161
commit 072686f283
35 changed files with 399 additions and 399 deletions

View File

@ -65,8 +65,8 @@ ze_result_t zeModuleGetKernelNames(
ze_result_t zeKernelCreate(
ze_module_handle_t hModule,
const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) {
return L0::Module::fromHandle(hModule)->createKernel(desc, phFunction);
ze_kernel_handle_t *kernelHandle) {
return L0::Module::fromHandle(hModule)->createKernel(desc, kernelHandle);
}
ze_result_t zeKernelDestroy(
@ -141,45 +141,45 @@ ze_result_t zeKernelGetProperties(
ze_result_t zeCommandListAppendLaunchKernel(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
L0::CmdListKernelLaunchParams launchParams = {};
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents, launchParams);
}
ze_result_t zeCommandListAppendLaunchCooperativeKernel(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(hKernel, pLaunchFuncArgs, hSignalEvent, numWaitEvents, phWaitEvents);
return L0::CommandList::fromHandle(hCommandList)->appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t zeCommandListAppendLaunchKernelIndirect(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(hKernel, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
return L0::CommandList::fromHandle(hCommandList)->appendLaunchKernelIndirect(kernelHandle, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t zeCommandListAppendLaunchMultipleKernelsIndirect(
ze_command_list_handle_t hCommandList,
uint32_t numKernels,
ze_kernel_handle_t *phKernels,
ze_kernel_handle_t *kernelHandles,
const uint32_t *pCountBuffer,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
return L0::CommandList::fromHandle(hCommandList)->appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pCountBuffer, pLaunchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t zeKernelGetName(
@ -433,15 +433,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeKernelGetName(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernel(
hCommandList,
hKernel,
pLaunchFuncArgs,
kernelHandle,
launchKernelArgs,
hSignalEvent,
numWaitEvents,
phWaitEvents);
@ -449,15 +449,15 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernel(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchCooperativeKernel(
hCommandList,
hKernel,
pLaunchFuncArgs,
kernelHandle,
launchKernelArgs,
hSignalEvent,
numWaitEvents,
phWaitEvents);
@ -465,14 +465,14 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchCooperativeKernel(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
ze_command_list_handle_t hCommandList,
ze_kernel_handle_t hKernel,
ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::zeCommandListAppendLaunchKernelIndirect(
hCommandList,
hKernel,
kernelHandle,
pLaunchArgumentsBuffer,
hSignalEvent,
numWaitEvents,
@ -482,7 +482,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchKernelIndirect(
ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndirect(
ze_command_list_handle_t hCommandList,
uint32_t numKernels,
ze_kernel_handle_t *phKernels,
ze_kernel_handle_t *kernelHandles,
const uint32_t *pCountBuffer,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hSignalEvent,
@ -491,7 +491,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeCommandListAppendLaunchMultipleKernelsIndi
return L0::zeCommandListAppendLaunchMultipleKernelsIndirect(
hCommandList,
numKernels,
phKernels,
kernelHandles,
pCountBuffer,
pLaunchArgumentsBuffer,
hSignalEvent,

View File

@ -30,15 +30,15 @@ CommandList::~CommandList() {
if (this->cmdListType == CommandListType::TYPE_REGULAR || !this->isFlushTaskSubmissionEnabled) {
removeHostPtrAllocations();
}
printfFunctionContainer.clear();
printfKernelContainer.clear();
}
void CommandList::storePrintfFunction(Kernel *kernel) {
auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(),
void CommandList::storePrintfKernel(Kernel *kernel) {
auto it = std::find(this->printfKernelContainer.begin(), this->printfKernelContainer.end(),
kernel);
if (it == this->printfFunctionContainer.end()) {
this->printfFunctionContainer.push_back(kernel);
if (it == this->printfKernelContainer.end()) {
this->printfKernelContainer.push_back(kernel);
}
}
@ -131,7 +131,7 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
}
}
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
NEO::PreemptionMode CommandList::obtainKernelPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
}

View File

@ -93,19 +93,19 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions,
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) = 0;
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
virtual ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
virtual ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *phKernels,
virtual ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels, const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer, ze_event_handle_t hEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
@ -219,13 +219,13 @@ struct CommandList : _ze_command_list_handle_t {
void handleIndirectAllocationResidency();
NEO::PreemptionMode obtainFunctionPreemptionMode(Kernel *kernel);
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
std::vector<Kernel *> &getPrintfFunctionContainer() {
return this->printfFunctionContainer;
std::vector<Kernel *> &getPrintfKernelContainer() {
return this->printfKernelContainer;
}
void storePrintfFunction(Kernel *kernel);
void storePrintfKernel(Kernel *kernel);
void removeDeallocationContainerData();
void removeHostPtrAllocations();
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
@ -279,7 +279,7 @@ struct CommandList : _ze_command_list_handle_t {
void migrateSharedAllocations();
ze_context_handle_t hContext = nullptr;
std::vector<Kernel *> printfFunctionContainer;
std::vector<Kernel *> printfKernelContainer;
CommandQueue *cmdQImmediate = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;
Device *device = nullptr;

View File

@ -80,22 +80,22 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_result_t appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels,
const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,

View File

@ -81,7 +81,7 @@ CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
printfFunctionContainer.clear();
printfKernelContainer.clear();
removeDeallocationContainerData();
removeHostPtrAllocations();
commandContainer.reset();
@ -202,7 +202,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programL3(bool isSLMused) {}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t hKernel,
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
@ -230,7 +230,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
event = Event::fromHandle(hEvent);
}
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), threadGroupDimensions,
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
event, launchParams);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -245,8 +245,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
@ -263,12 +263,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = true;
return appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pLaunchFuncArgs,
return appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
@ -287,7 +287,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
appendEventForProfiling(event, true, false);
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(hKernel), pDispatchArgumentsBuffer,
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams);
appendSignalEventPostWalker(event, false);
@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels,
const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
@ -325,7 +325,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
CmdListKernelLaunchParams launchParams = {};
launchParams.isIndirect = true;
launchParams.isPredicate = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(phKernels[i]),
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
haveLaunchArguments ? &pLaunchArgumentsBuffer[i] : nullptr,
nullptr, launchParams);
if (ret) {
@ -563,12 +563,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY,
pDstRegion->depth / groupSizeZ};
ze_group_count_t kernelArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY,
pDstRegion->depth / groupSizeZ};
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams);
}
@ -689,8 +689,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
pSrcRegion->depth / groupSizeZ};
ze_group_count_t kernelArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY,
pSrcRegion->depth / groupSizeZ};
auto dstAllocationType = allocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
@ -698,7 +698,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &functionArgs,
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams);
addFlushRequiredCommand(allocationStruct.needsFlush, event);
@ -803,8 +803,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
srcRegion.depth / groupSizeZ};
ze_group_count_t kernelArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
srcRegion.depth / groupSizeZ};
kernel->setArgRedescribedImage(0, hSrcImage);
kernel->setArgRedescribedImage(1, hDstImage);
@ -813,7 +813,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &functionArgs,
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), &kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams);
}
@ -907,31 +907,31 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
Kernel *builtinFunction = nullptr;
Kernel *builtinKernel = nullptr;
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = builtinFunction->getImmutableData()
uint32_t groupSizeX = builtinKernel->getImmutableData()
->getDescriptor()
.kernelAttributes.simdSize;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) {
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
builtinFunction->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
builtinFunction->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
uint64_t elems = size / elementSize;
builtinFunction->setArgumentValue(2, sizeof(elems), &elems);
builtinFunction->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
builtinFunction->setArgumentValue(4, sizeof(srcOffset), &srcOffset);
builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
builtinKernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
builtinKernel->setArgumentValue(4, sizeof(srcOffset), &srcOffset);
uint32_t groups = static_cast<uint32_t>((size + ((static_cast<uint64_t>(groupSizeX) * elementSize) - 1)) / (static_cast<uint64_t>(groupSizeX) * elementSize));
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
auto dstAllocationType = dstPtrAlloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
@ -942,7 +942,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
(dstAllocationType == NEO::AllocationType::SVM_CPU) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@ -1164,9 +1164,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfilingAllWalkers(signalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) {
Builtin func = Builtin::CopyBufferToBufferSide;
Builtin copyKernel = Builtin::CopyBufferToBufferSide;
if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless;
copyKernel = Builtin::CopyBufferToBufferSideStateless;
}
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1179,16 +1179,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset,
leftSize, 1UL,
func,
copyKernel,
signalEvent,
isStateless);
}
}
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
Builtin func = Builtin::CopyBufferToBufferMiddle;
Builtin copyKernel = Builtin::CopyBufferToBufferMiddle;
if (isStateless) {
func = Builtin::CopyBufferToBufferMiddleStateless;
copyKernel = Builtin::CopyBufferToBufferMiddleStateless;
}
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1202,16 +1202,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
middleSizeBytes,
middleElSize,
func,
copyKernel,
signalEvent,
isStateless);
}
}
if (ret == ZE_RESULT_SUCCESS && rightSize) {
Builtin func = Builtin::CopyBufferToBufferSide;
Builtin copyKernel = Builtin::CopyBufferToBufferSide;
if (isStateless) {
func = Builtin::CopyBufferToBufferSideStateless;
copyKernel = Builtin::CopyBufferToBufferSideStateless;
}
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@ -1224,7 +1224,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
rightSize, 1UL,
func,
copyKernel,
signalEvent,
isStateless);
}
@ -1351,19 +1351,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = srcRegion->width;
uint32_t groupSizeY = srcRegion->height;
uint32_t groupSizeZ = srcRegion->depth;
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
@ -1373,20 +1373,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY,
srcRegion->depth / groupSizeZ};
ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY,
srcRegion->depth / groupSizeZ};
uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY), (srcRegion->originZ)};
uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY), (dstRegion->originZ)};
uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)};
uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)};
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinFunction->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
builtinFunction->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinKernel->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
builtinKernel->setArgumentValue(5, sizeof(dstPitches), &dstPitches);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
@ -1394,7 +1394,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, signalEvent, numWaitEvents,
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, signalEvent, numWaitEvents,
phWaitEvents, launchParams);
}
@ -1414,19 +1414,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin);
auto builtinKernel = device->getBuiltinFunctionsLib()->getFunction(builtin);
uint32_t groupSizeX = srcRegion->width;
uint32_t groupSizeY = srcRegion->height;
uint32_t groupSizeZ = 1u;
if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX,
&groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX,
&groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
@ -1436,17 +1436,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u};
ze_group_count_t dispatchKernelArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u};
uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)};
uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)};
builtinFunction->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinFunction->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinFunction->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinFunction->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinKernel->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
builtinKernel->setArgumentValue(5, sizeof(dstPitch), &dstPitch);
auto dstAllocationType = dstAlignedAllocation->alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
@ -1454,8 +1454,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinFunction->toHandle(),
&dispatchFuncArgs, signalEvent,
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(),
&dispatchKernelArgs, signalEvent,
numWaitEvents,
phWaitEvents,
launchParams);
@ -1473,23 +1473,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, const AlignedAllocationData &dstAllocation, const void *pattern, Event *signalEvent, const CmdListKernelLaunchParams &launchParams) {
Kernel *builtinFunction = nullptr;
Kernel *builtinKernel = nullptr;
if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
}
uint32_t groupSizeY = 1, groupSizeZ = 1;
uint32_t groupSizeX = static_cast<uint32_t>(unalignedSize);
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
auto res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
auto res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelRemainderArgs, signalEvent, launchParams);
if (res) {
return res;
}
@ -1569,12 +1569,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
middleSize -= leftRemainder;
dstAllocation.offset += leftRemainder;
}
Kernel *builtinFunction = nullptr;
Kernel *builtinKernel = nullptr;
if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateStateless);
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
}
const auto dataTypeSize = sizeof(uint32_t) * 4;
size_t adjustedSize = middleSize / dataTypeSize;
@ -1582,7 +1582,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
if (groupSizeX > adjustedSize && adjustedSize > 0) {
groupSizeX = adjustedSize;
}
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
if (builtinKernel->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
@ -1590,17 +1590,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
size_t groups = adjustedSize / groupSizeX;
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * dataTypeSize +
middleSize % dataTypeSize);
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(groups), 1u, 1u};
uint32_t value = 0;
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
appendEventForProfilingAllWalkers(signalEvent, true);
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
if (res) {
return res;
}
@ -1614,18 +1614,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
} else {
Kernel *builtinFunction = nullptr;
Kernel *builtinKernel = nullptr;
if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddleStateless);
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
}
size_t middleElSize = sizeof(uint32_t);
size_t adjustedSize = size / middleElSize;
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
uint32_t groupSizeY = 1, groupSizeZ = 1;
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
uint32_t groups = static_cast<uint32_t>(adjustedSize) / groupSizeX;
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * middleElSize +
@ -1656,15 +1656,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
patternAllocOffset += patternSizeToCopy;
} while (patternAllocOffset < patternAllocationSize);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
builtinKernel->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
appendEventForProfilingAllWalkers(signalEvent, true);
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
ze_group_count_t dispatchKernelArgs{groups, 1u, 1u};
res = appendLaunchKernelSplit(builtinKernel, &dispatchKernelArgs, signalEvent, launchParams);
if (res) {
return res;
}
@ -1673,28 +1673,28 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
uint32_t dstOffsetRemainder = groups * groupSizeX * static_cast<uint32_t>(middleElSize);
uint64_t patternOffsetRemainder = (groupSizeX * groups & (patternSizeInEls - 1)) * middleElSize;
Kernel *builtinFunctionRemainder;
Kernel *builtinKernelRemainder;
if (isStateless) {
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless);
builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftoverStateless);
} else {
builtinFunctionRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover);
builtinKernelRemainder = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferRightLeftover);
}
builtinFunctionRemainder->setGroupSize(remainingBytes, 1u, 1u);
ze_group_count_t dispatchFuncArgs{1u, 1u, 1u};
builtinKernelRemainder->setGroupSize(remainingBytes, 1u, 1u);
ze_group_count_t dispatchKernelArgs{1u, 1u, 1u};
builtinFunctionRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc);
builtinFunctionRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
builtinFunctionRemainder->setArgBufferWithAlloc(2,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
patternGfxAlloc);
builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc);
builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
builtinKernelRemainder->setArgBufferWithAlloc(2,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
patternGfxAlloc);
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
res = appendLaunchKernelSplit(builtinFunctionRemainder, &dispatchFuncArgs, signalEvent, launchParams);
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
if (res) {
return res;
}
@ -2220,20 +2220,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
UNRECOVERABLE_IF(!result);
Kernel *builtinFunction = nullptr;
Kernel *builtinKernel = nullptr;
auto useOnlyGlobalTimestamps = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u;
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
if (pOffsets == nullptr) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
builtinFunction->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps);
builtinKernel->setArgumentValue(2u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
} else {
auto pOffsetAllocationStruct = getAlignedAllocation(this->device, pOffsets, sizeof(size_t) * numEvents, false);
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
builtinFunction->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
builtinFunction->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
builtinKernel->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
offsetValPtr += sizeof(size_t);
}
@ -2241,23 +2241,23 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
if (builtinFunction->suggestGroupSize(numEvents, 1u, 1u,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->suggestGroupSize(numEvents, 1u, 1u,
&groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
ze_group_count_t dispatchFuncArgs{numEvents / groupSizeX, 1u, 1u};
ze_group_count_t dispatchKernelArgs{numEvents / groupSizeX, 1u, 1u};
auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress());
builtinFunction->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinFunction->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};
@ -2265,7 +2265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
launchParams.isDestinationAllocationInSystemMemory =
(dstAllocationType == NEO::AllocationType::BUFFER_HOST_MEMORY) ||
(dstAllocationType == NEO::AllocationType::EXTERNAL_HOST_PTR);
auto appendResult = appendLaunchKernel(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents,
auto appendResult = appendLaunchKernel(builtinKernel->toHandle(), &dispatchKernelArgs, hSignalEvent, numWaitEvents,
phWaitEvents, launchParams);
if (appendResult != ZE_RESULT_SUCCESS) {
return appendResult;

View File

@ -44,7 +44,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(event, true, false);
const auto functionImmutableData = kernel->getImmutableData();
const auto kernelImmutableData = kernel->getImmutableData();
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
@ -52,7 +52,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
this->setCommandListSLMEnable(slmEnable);
auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel);
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset();
@ -169,14 +169,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
appendSignalEventPostWalker(event, false);
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer();
for (auto resource : residencyContainer) {
commandContainer.addToResidencyContainer(resource);
}
if (functionImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
storePrintfFunction(kernel);
if (kernelImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
storePrintfKernel(kernel);
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {

View File

@ -22,13 +22,13 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass::BaseClass;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override;
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;

View File

@ -163,7 +163,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
ze_kernel_handle_t hKernel, const ze_group_count_t *threadGroupDimensions,
ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) {
@ -171,7 +171,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(hKernel, threadGroupDimensions,
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
hSignalEvent, numWaitEvents, phWaitEvents,
launchParams);
return flushImmediate(ret, true);
@ -179,13 +179,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelIndirect(
ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
if (this->isFlushTaskSubmissionEnabled) {
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer,
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true);
}

View File

@ -134,7 +134,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::Device *neoDevice = device->getNEODevice();
UNRECOVERABLE_IF(kernel == nullptr);
const auto functionImmutableData = kernel->getImmutableData();
const auto kernelImmutableData = kernel->getImmutableData();
auto &kernelDescriptor = kernel->getKernelDescriptor();
if (kernelDescriptor.kernelAttributes.flags.isInvalid) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
@ -142,8 +142,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode);
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);
kernel->patchGlobalOffset();
if (launchParams.isIndirect && threadGroupDimensions) {
@ -287,9 +287,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
// Attach Function residency to our CommandList residency
// Attach kernel residency to our CommandList residency
{
commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation());
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer();
for (auto resource : residencyContainer) {
commandContainer.addToResidencyContainer(resource);
@ -299,7 +299,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
// Store PrintfBuffer from a kernel
{
if (kernelDescriptor.kernelAttributes.flags.usesPrintf) {
storePrintfFunction(kernel);
storePrintfKernel(kernel);
}
}

View File

@ -152,16 +152,16 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
return ZE_RESULT_SUCCESS;
}
void CommandQueueImp::printFunctionsPrintfOutput() {
size_t size = this->printfFunctionContainer.size();
void CommandQueueImp::printKernelsPrintfOutput() {
size_t size = this->printfKernelContainer.size();
for (size_t i = 0; i < size; i++) {
this->printfFunctionContainer[i]->printPrintfOutput();
this->printfKernelContainer[i]->printPrintfOutput();
}
this->printfFunctionContainer.clear();
this->printfKernelContainer.clear();
}
void CommandQueueImp::postSyncOperations() {
printFunctionsPrintfOutput();
printKernelsPrintfOutput();
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() && NEO::DebugManager.flags.DebuggerLogBitmask.get()) {
device->getL0Debugger()->printTrackedAddresses(csr->getOsContext().getContextId());

View File

@ -909,9 +909,9 @@ void CommandQueueHw<gfxCoreFamily>::collectPrintfContentsFromAllCommandsLists(
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
this->printfFunctionContainer.insert(this->printfFunctionContainer.end(),
commandList->getPrintfFunctionContainer().begin(),
commandList->getPrintfFunctionContainer().end());
this->printfKernelContainer.insert(this->printfKernelContainer.end(),
commandList->getPrintfKernelContainer().begin(),
commandList->getPrintfKernelContainer().end());
}
}

View File

@ -87,14 +87,14 @@ struct CommandQueueImp : public CommandQueue {
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout);
void printFunctionsPrintfOutput();
void printKernelsPrintfOutput();
void postSyncOperations();
CommandBufferManager buffers;
NEO::HeapContainer heapContainer;
ze_command_queue_desc_t desc;
std::vector<Kernel *> printfFunctionContainer;
std::vector<Kernel *> printfKernelContainer;
Device *device = nullptr;
NEO::CommandStreamReceiver *csr = nullptr;

View File

@ -33,7 +33,7 @@ struct Module : _ze_module_handle_t {
virtual Device *getDevice() const = 0;
virtual ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) = 0;
ze_kernel_handle_t *kernelHandle) = 0;
virtual ze_result_t destroy() = 0;
virtual ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) = 0;
virtual ze_result_t getFunctionPointer(const char *pKernelName, void **pfnFunction) = 0;
@ -45,7 +45,7 @@ struct Module : _ze_module_handle_t {
ze_module_handle_t *phModules,
ze_module_build_log_handle_t *phLinkLog) = 0;
virtual const KernelImmutableData *getKernelImmutableData(const char *functionName) const = 0;
virtual const KernelImmutableData *getKernelImmutableData(const char *kernelName) const = 0;
virtual const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const = 0;
virtual uint32_t getMaxGroupSize() const = 0;
virtual bool isDebugEnabled() const = 0;

View File

@ -664,9 +664,9 @@ void ModuleImp::passDebugData() {
}
}
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *functionName) const {
const KernelImmutableData *ModuleImp::getKernelImmutableData(const char *kernelName) const {
for (auto &kernelImmData : kernelImmDatas) {
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(functionName) == 0) {
if (kernelImmData->getDescriptor().kernelMetadata.kernelName.compare(kernelName) == 0) {
return kernelImmData.get();
}
}
@ -742,7 +742,7 @@ void ModuleImp::updateBuildLog(NEO::Device *neoDevice) {
}
ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) {
ze_kernel_handle_t *kernelHandle) {
ze_result_t res;
if (!isFullyLinked) {
return ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED;
@ -750,7 +750,7 @@ ze_result_t ModuleImp::createKernel(const ze_kernel_desc_t *desc,
auto kernel = Kernel::create(productFamily, this, desc, &res);
if (res == ZE_RESULT_SUCCESS) {
*phFunction = kernel->toHandle();
*kernelHandle = kernel->toHandle();
}
return res;

View File

@ -89,7 +89,7 @@ struct ModuleImp : public Module {
ze_result_t destroy() override;
ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override;
ze_kernel_handle_t *kernelHandle) override;
ze_result_t getNativeBinary(size_t *pSize, uint8_t *pModuleNativeBinary) override;
@ -107,7 +107,7 @@ struct ModuleImp : public Module {
ze_result_t getDebugInfo(size_t *pDebugDataSize, uint8_t *pDebugData) override;
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override;
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override;
const std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmutableDataVector() const override { return kernelImmDatas; }

View File

@ -110,7 +110,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
~MockModule() override {
}
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
return mockKernelImmData;
}

View File

@ -12,7 +12,7 @@
namespace L0 {
namespace ult {
void MultiTileCommandListAppendLaunchFunctionFixture::setUp() {
void MultiTileCommandListAppendLaunchKernelFixture::setUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
@ -35,14 +35,14 @@ void MultiTileCommandListAppendLaunchFunctionFixture::setUp() {
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
void MultiTileCommandListAppendLaunchFunctionFixture::tearDown() {
void MultiTileCommandListAppendLaunchKernelFixture::tearDown() {
commandList->destroy();
contextImp->destroy();
MultiDeviceModuleFixture::tearDown();
}
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() {
void MultiTileImmediateCommandListAppendLaunchKernelFixture::setUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
@ -61,7 +61,7 @@ void MultiTileImmediateCommandListAppendLaunchFunctionFixture::setUp() {
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
}
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::tearDown() {
void MultiTileImmediateCommandListAppendLaunchKernelFixture::tearDown() {
contextImp->destroy();
MultiDeviceModuleFixture::tearDown();

View File

@ -18,7 +18,7 @@ struct Device;
namespace ult {
struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
struct MultiTileCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
void setUp();
void tearDown();
@ -28,7 +28,7 @@ struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModul
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
};
struct MultiTileImmediateCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
struct MultiTileImmediateCommandListAppendLaunchKernelFixture : public MultiDeviceModuleFixture {
void setUp();
void tearDown();

View File

@ -27,9 +27,9 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
void SetUp() override {
DeviceFixture::setUp();
dispatchFunctionArguments.groupCountX = 1u;
dispatchFunctionArguments.groupCountY = 2u;
dispatchFunctionArguments.groupCountZ = 3u;
dispatchKernelArguments.groupCountX = 1u;
dispatchKernelArguments.groupCountY = 2u;
dispatchKernelArguments.groupCountZ = 3u;
}
void TearDown() override {
@ -44,18 +44,18 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
}
std::vector<void *> isaBuffers;
ze_group_count_t dispatchFunctionArguments;
ze_group_count_t dispatchKernelArguments;
void *buffer = nullptr;
void initializeFunction(WhiteBox<::L0::Kernel> &function,
WhiteBox<::L0::KernelImmutableData> &functionData,
L0::Device *device) {
void initializeKernel(WhiteBox<::L0::Kernel> &kernel,
WhiteBox<::L0::KernelImmutableData> &kernelData,
L0::Device *device) {
uint32_t isaSize = 4096;
void *isaBuffer = malloc(isaSize);
isaBuffers.push_back(isaBuffer);
functionData.device = device;
kernelData.device = device;
if (!buffer) {
buffer = alignedMalloc(isaSize, 64);
}
@ -70,22 +70,22 @@ class CommandListCreateGen9 : public DeviceFixture, public testing::Test {
if (isaBuffer != nullptr) {
memcpy_s(allocation->getUnderlyingBuffer(), allocation->getUnderlyingBufferSize(), isaBuffer, isaSize);
}
functionData.isaGraphicsAllocation.reset(allocation);
kernelData.isaGraphicsAllocation.reset(allocation);
uint32_t crossThreadDataSize = 128;
function.crossThreadData.reset(new uint8_t[crossThreadDataSize]);
function.crossThreadDataSize = crossThreadDataSize;
kernel.crossThreadData.reset(new uint8_t[crossThreadDataSize]);
kernel.crossThreadDataSize = crossThreadDataSize;
uint32_t perThreadDataSize = 128;
function.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32));
function.perThreadDataSize = perThreadDataSize;
kernel.perThreadDataForWholeThreadGroup = static_cast<uint8_t *>(alignedMalloc(perThreadDataSize, 32));
kernel.perThreadDataSize = perThreadDataSize;
function.kernelImmData = &functionData;
kernel.kernelImmData = &kernelData;
}
void cleanupFunction(WhiteBox<::L0::KernelImmutableData> &functionData) {
functionData.isaGraphicsAllocation.reset(nullptr);
void cleanupKernel(WhiteBox<::L0::KernelImmutableData> &kernelData) {
kernelData.isaGraphicsAllocation.reset(nullptr);
}
};
@ -102,99 +102,99 @@ GEN9TEST_F(CommandListCreateGen9, WhenGettingCommandListPreemptionModeThenMatche
}
GEN9TEST_F(CommandListCreateGen9, GivenDisabledMidThreadPreemptionWhenLaunchingKernelThenThreadGroupModeSet) {
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {};
WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
NEO::KernelDescriptor kernelDescriptor;
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionThreadGroup;
kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> kernelThreadGroup;
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device);
initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionThreadGroup.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoThreadGroupData);
cleanupKernel(kernelInfoThreadGroupData);
delete commandList;
}
GEN9TEST_F(CommandListCreateGen9, GivenUsesFencesForReadWriteImagesWhenLaunchingKernelThenMidBatchModeSet) {
WhiteBox<::L0::KernelImmutableData> funcInfoMidBatchData = {};
WhiteBox<::L0::KernelImmutableData> kernelInfoMidBatchData = {};
NEO::KernelDescriptor kernelDescriptor;
funcInfoMidBatchData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionMidBatch;
kernelInfoMidBatchData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> kernelMidBatch;
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
funcInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1;
kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
kernelInfoMidBatchData.kernelDescriptor->kernelAttributes.flags.usesFencesForReadWriteImages = 1;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->workaroundTable.flags.waDisableLSQCROPERFforOCL = true;
initializeFunction(functionMidBatch, funcInfoMidBatchData, device);
initializeKernel(kernelMidBatch, kernelInfoMidBatchData, device);
ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionMidBatch.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernelMidBatch.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::MidBatch, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoMidBatchData);
cleanupKernel(kernelInfoMidBatchData);
delete commandList;
}
GEN9TEST_F(CommandListCreateGen9, WhenCommandListHasLowerPreemptionLevelThenDoNotIncreaseAgain) {
WhiteBox<::L0::KernelImmutableData> funcInfoThreadGroupData = {};
WhiteBox<::L0::KernelImmutableData> kernelInfoThreadGroupData = {};
NEO::KernelDescriptor kernelDescriptor;
funcInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> functionThreadGroup;
kernelInfoThreadGroupData.kernelDescriptor = &kernelDescriptor;
WhiteBox<::L0::Kernel> kernelThreadGroup;
funcInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
kernelInfoThreadGroupData.kernelDescriptor->kernelAttributes.flags.requiresDisabledMidThreadPreemption = 1;
initializeFunction(functionThreadGroup, funcInfoThreadGroupData, device);
initializeKernel(kernelThreadGroup, kernelInfoThreadGroupData, device);
WhiteBox<::L0::KernelImmutableData> funcInfoMidThreadData = {};
WhiteBox<::L0::KernelImmutableData> kernelInfoMidThreadData = {};
NEO::KernelDescriptor kernelDescriptor2;
funcInfoMidThreadData.kernelDescriptor = &kernelDescriptor2;
kernelInfoMidThreadData.kernelDescriptor = &kernelDescriptor2;
WhiteBox<::L0::Kernel> functionMidThread;
WhiteBox<::L0::Kernel> kernelMidThread;
initializeFunction(functionMidThread, funcInfoMidThreadData, device);
initializeKernel(kernelMidThread, kernelInfoMidThreadData, device);
ze_result_t returnValue;
auto commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(NEO::PreemptionMode::MidThread, commandList->getCommandListPreemptionMode());
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(functionThreadGroup.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernelThreadGroup.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
commandList->appendLaunchKernel(functionMidThread.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernelMidThread.toHandle(),
&dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::ThreadGroup, commandList->getCommandListPreemptionMode());
cleanupFunction(funcInfoThreadGroupData);
cleanupFunction(funcInfoMidThreadData);
cleanupKernel(kernelInfoThreadGroupData);
cleanupKernel(kernelInfoMidThreadData);
delete commandList;
}
} // namespace ult

View File

@ -81,23 +81,23 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
}
ze_result_t appendLaunchMultipleKernelsIndirect(uint32_t numKernels,
const ze_kernel_handle_t *phKernels,
const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
appendEventMultipleKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, phKernels, pNumLaunchArguments, pLaunchArgumentsBuffer,
return BaseClass::appendLaunchMultipleKernelsIndirect(numKernels, kernelHandles, pNumLaunchArguments, pLaunchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernelIndirect(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
appendEventKernelIndirectEventHandleValue = hEvent;
return BaseClass::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer,
return BaseClass::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hEvent, numWaitEvents, phWaitEvents);
}
@ -163,29 +163,29 @@ struct MockCommandList : public CommandList {
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(appendLaunchKernel, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hFunction,
(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams));
ADDMETHOD_NOBASE(appendLaunchCooperativeKernel, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hKernel,
const ze_group_count_t *pLaunchFuncArgs,
(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *launchKernelArgs,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendLaunchKernelIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(ze_kernel_handle_t hFunction,
(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *pDispatchArgumentsBuffer,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendLaunchMultipleKernelsIndirect, ze_result_t, ZE_RESULT_SUCCESS,
(uint32_t numFunctions,
const ze_kernel_handle_t *phFunctions,
(uint32_t numKernels,
const ze_kernel_handle_t *kernelHandles,
const uint32_t *pNumLaunchArguments,
const ze_group_count_t *pLaunchArgumentsBuffer,
ze_event_handle_t hEvent,
@ -505,14 +505,14 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo
public:
CmdListHelper cmdListHelper;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents,
const CmdListKernelLaunchParams &launchParams) override {
const auto kernel = Kernel::fromHandle(hKernel);
const auto kernel = Kernel::fromHandle(kernelHandle);
cmdListHelper.isaAllocation = kernel->getIsaAllocation();
cmdListHelper.residencyContainer = kernel->getResidencyContainer();
cmdListHelper.groupSize = kernel->getGroupSize();

View File

@ -28,7 +28,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::csr;
using BaseClass::device;
using BaseClass::preemptionCmdSyncProgramming;
using BaseClass::printfFunctionContainer;
using BaseClass::printfKernelContainer;
using BaseClass::submitBatchBuffer;
using BaseClass::synchronizeByPollingForTaskCount;
using BaseClass::taskCount;
@ -62,7 +62,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
using BaseClass::commandStream;
using BaseClass::prepareAndSubmitBatchBuffer;
using BaseClass::printfFunctionContainer;
using BaseClass::printfKernelContainer;
using L0::CommandQueue::activeSubDevices;
using L0::CommandQueue::internalUsage;
using L0::CommandQueue::multiReturnPointCommandList;

View File

@ -20,9 +20,9 @@ struct MockDeviceForRebuildBuilins : public Mock<DeviceImp> {
MockModuleForRebuildBuiltins(Device *device, ModuleType type) : ModuleImp(device, nullptr, type) {}
ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override {
ze_kernel_handle_t *kernelHandle) override {
*phFunction = new Mock<Kernel>();
*kernelHandle = new Mock<Kernel>();
return ZE_RESULT_SUCCESS;
}
};

View File

@ -43,11 +43,11 @@ struct Mock<Module> : public Module {
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) {}
Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){};
ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction));
ADDMETHOD_NOBASE(createKernel, ze_result_t, ZE_RESULT_SUCCESS, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *kernelHandle));
ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ());
ADDMETHOD_NOBASE(getFunctionPointer, ze_result_t, ZE_RESULT_SUCCESS, (const char *pKernelName, void **pfnFunction));
ADDMETHOD_NOBASE(getNativeBinary, ze_result_t, ZE_RESULT_SUCCESS, (size_t * pSize, uint8_t *pModuleNativeBinary));
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *functionName));
ADDMETHOD_CONST_NOBASE(getKernelImmutableData, const L0::KernelImmutableData *, nullptr, (const char *kernelName));
ADDMETHOD_CONST_NOBASE(getMaxGroupSize, uint32_t, 256, ());
ADDMETHOD_NOBASE(getKernelNames, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, const char **pNames));
ADDMETHOD_NOBASE(performDynamicLink, ze_result_t, ZE_RESULT_SUCCESS,
@ -91,7 +91,7 @@ struct MockModule : public L0::ModuleImp {
~MockModule() override = default;
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
const KernelImmutableData *getKernelImmutableData(const char *kernelName) const override {
return kernelImmData;
}

View File

@ -200,8 +200,8 @@ HWTEST_F(TestBuiltinFunctionsLibImplDefault, givenRebuildPrecompiledKernelsDebug
MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {}
ze_result_t createKernel(const ze_kernel_desc_t *desc,
ze_kernel_handle_t *phFunction) override {
*phFunction = nullptr;
ze_kernel_handle_t *kernelHandle) override {
*kernelHandle = nullptr;
return ZE_RESULT_SUCCESS;
}
};

View File

@ -262,14 +262,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchMultipl
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
const ze_kernel_handle_t launchFn = kernel->toHandle();
const ze_kernel_handle_t launchKernels = kernel->toHandle();
uint32_t *numLaunchArgs;
ze_device_mem_alloc_desc_t deviceDesc = {};
returnValue = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
returnValue = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, event->toHandle(), 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(event->toHandle(), commandList->appendEventMultipleKernelIndirectEventHandleValue);

View File

@ -72,11 +72,11 @@ TEST(zeCommandListAppendWriteGlobalTimestamp, whenCalledThenRedirectedToObject)
TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) {
MockCommandList commandList;
Mock<::L0::Kernel> kernel;
ze_group_count_t dispatchFunctionArguments;
ze_group_count_t dispatchKernelArguments;
auto result =
zeCommandListAppendLaunchKernel(commandList.toHandle(), kernel.toHandle(),
&dispatchFunctionArguments, nullptr, 0, nullptr);
&dispatchKernelArguments, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST(zeCommandListAppendEventReset, whenCalledThenRedirectedToObject) {

View File

@ -193,8 +193,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
@ -208,12 +208,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfFunctionContainer()[0]);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(kernel.get(), commandList->getPrintfKernelContainer()[0]);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->getPrintfFunctionContainer().size());
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
}
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
@ -617,8 +617,8 @@ HWTEST_F(CommandListAppendLaunchKernel, givenCommandListWhenResetCalledThenState
commandList->commandContainer.getResidencyContainer().size());
ASSERT_EQ(commandListControl->commandContainer.getDeallocationContainer().size(),
commandList->commandContainer.getDeallocationContainer().size());
ASSERT_EQ(commandListControl->getPrintfFunctionContainer().size(),
commandList->getPrintfFunctionContainer().size());
ASSERT_EQ(commandListControl->getPrintfKernelContainer().size(),
commandList->getPrintfKernelContainer().size());
ASSERT_EQ(commandListControl->commandContainer.getCommandStream()->getUsed(), commandList->commandContainer.getCommandStream()->getUsed());
ASSERT_EQ(commandListControl->commandContainer.slmSize, commandList->commandContainer.slmSize);

View File

@ -1276,9 +1276,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
}
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionFixture>;
using MultiTileCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileCommandListAppendLaunchKernelFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTes
EXPECT_EQ(4u, commandList->partitionCount);
}
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenCooperativeKernelWhenAppendingKernelsThenDoNotUseImplicitScaling, IsAtLeastXeHpCore) {
ze_group_count_t groupCount{1, 1, 1};
auto commandListWithNonCooperativeKernel = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
@ -1342,7 +1342,7 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
}
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest,
HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest,
givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;

View File

@ -128,12 +128,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
ze_result_t returnValue;
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
const ze_kernel_handle_t launchFn = kernel->toHandle();
const ze_kernel_handle_t launchKernels = kernel->toHandle();
uint32_t *numLaunchArgs;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr);
result = commandList->appendLaunchMultipleKernelsIndirect(1, &launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 0;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@ -156,13 +156,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenAppendLaunchMult
using MI_MATH = typename FamilyType::MI_MATH;
ze_result_t returnValue;
auto commandList = std::unique_ptr<L0::CommandList>(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
const ze_kernel_handle_t launchFn[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
const ze_kernel_handle_t launchKernels[3] = {kernel->toHandle(), kernel->toHandle(), kernel->toHandle()};
uint32_t *numLaunchArgs;
const uint32_t numKernels = 3;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(
device->toHandle(), &deviceDesc, 16384u, 4096u, reinterpret_cast<void **>(&numLaunchArgs));
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchFn, numLaunchArgs, nullptr, nullptr, 0, nullptr);
result = commandList->appendLaunchMultipleKernelsIndirect(numKernels, launchKernels, numLaunchArgs, nullptr, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
*numLaunchArgs = 2;
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
@ -710,9 +710,9 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
context->freeMem(alloc);
}
using MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchFunctionFixture>;
using MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchKernelFixture>;
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
@ -775,7 +775,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
EXPECT_EQ(cmdList.end(), itorSemaphoreWait);
}
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;

View File

@ -63,7 +63,7 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) {
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
}
TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfFunctionsStoredAndClearsFunctionContainer) {
TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutputOnPrintfKernelsStoredAndClearsKernelContainer) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
@ -76,12 +76,12 @@ TEST_F(CommandQueueCreate, whenSynchronizeByPollingTaskCountThenCallsPrintOutput
Mock<Kernel> kernel1, kernel2;
commandQueue->printfFunctionContainer.push_back(&kernel1);
commandQueue->printfFunctionContainer.push_back(&kernel2);
commandQueue->printfKernelContainer.push_back(&kernel1);
commandQueue->printfKernelContainer.push_back(&kernel2);
commandQueue->synchronizeByPollingForTaskCount(0u);
EXPECT_EQ(0u, commandQueue->printfFunctionContainer.size());
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
EXPECT_EQ(1u, kernel1.printPrintfOutputCalledTimes);
EXPECT_EQ(1u, kernel2.printPrintfOutputCalledTimes);
@ -255,9 +255,9 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
const size_t numHandles = 100;
ze_command_list_handle_t cmdListHandles[numHandles];
@ -305,9 +305,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperWhenExecutingThenMergeStates
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
commandList->nonImmediateLogicalStateHelper.reset(mockCmdListLogicalStateHelper);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@ -341,9 +341,9 @@ HWTEST_F(CommandQueueCreate, givenLogicalStateHelperAndImmediateCmdListWhenExecu
auto commandList = std::unique_ptr<L0::ult::CommandList>(whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)));
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles = commandList->toHandle();
@ -367,9 +367,9 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
ze_command_list_handle_t cmdListHandles[1] = {commandList->toHandle()};

View File

@ -745,9 +745,9 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
kernel.residencyContainer.push_back(gpuAlloc);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
ze_group_count_t dispatchKernelArguments{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr, launchParams);
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchKernelArguments, nullptr, 0, nullptr, launchParams);
auto deviceImp = static_cast<DeviceImp *>(device);
auto pageFaultCmdQueue = whiteboxCast(deviceImp->pageFaultCommandList->cmdQImmediate);

View File

@ -6,7 +6,7 @@
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_function.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_patch_value.cpp
)

View File

@ -20,26 +20,26 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
uint32_t *crossThreadData =
reinterpret_cast<uint32_t *>(alignedMalloc(sizeof(uint32_t[6]), 32));
WhiteBox<::L0::KernelImmutableData> funcInfo = {};
WhiteBox<::L0::KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t);
funcInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t);
kernelInfo.kernelDescriptor = &descriptor;
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[0] = 0 * sizeof(uint32_t);
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[1] = 1 * sizeof(uint32_t);
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.globalWorkSize[2] = 2 * sizeof(uint32_t);
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[0] = 3 * sizeof(uint32_t);
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[1] = 4 * sizeof(uint32_t);
kernelInfo.kernelDescriptor->payloadMappings.dispatchTraits.numWorkGroups[2] = 5 * sizeof(uint32_t);
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
function.crossThreadDataSize = sizeof(uint32_t[6]);
function.groupSize[0] = 2;
function.groupSize[1] = 3;
function.groupSize[2] = 5;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.crossThreadData.reset(reinterpret_cast<uint8_t *>(crossThreadData));
kernel.crossThreadDataSize = sizeof(uint32_t[6]);
kernel.groupSize[0] = 2;
kernel.groupSize[1] = 3;
kernel.groupSize[2] = 5;
function.KernelImp::setGroupCount(7, 11, 13);
auto crossThread = function.KernelImp::getCrossThreadData();
kernel.KernelImp::setGroupCount(7, 11, 13);
auto crossThread = kernel.KernelImp::getCrossThreadData();
ASSERT_NE(nullptr, crossThread);
const uint32_t *globalWorkSizes = reinterpret_cast<const uint32_t *>(crossThread);
EXPECT_EQ(2U * 7U, globalWorkSizes[0]);
@ -51,30 +51,30 @@ TEST_F(KernelImp, GivenCrossThreadDataThenIsCorrectlyPatchedWithGlobalWorkSizeAn
EXPECT_EQ(11U, numGroups[1]);
EXPECT_EQ(13U, numGroups[2]);
function.crossThreadData.release();
kernel.crossThreadData.release();
alignedFree(crossThreadData);
}
TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSetValidNumberOfBits) {
NEO::KernelDescriptor descriptor = {};
WhiteBox<KernelImmutableData> funcInfo = {};
funcInfo.kernelDescriptor = &descriptor;
WhiteBox<KernelImmutableData> kernelInfo = {};
kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr);
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.module = &module;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.module = &module;
const std::array<uint32_t, 4> testedSimd = {{1, 8, 16, 32}};
for (auto simd : testedSimd) {
descriptor.kernelAttributes.simdSize = simd;
function.KernelImp::setGroupSize(simd, 1, 1);
kernel.KernelImp::setGroupSize(simd, 1, 1);
if (simd == 1) {
EXPECT_EQ(maxNBitValue(32), function.KernelImp::getThreadExecutionMask());
EXPECT_EQ(maxNBitValue(32), kernel.KernelImp::getThreadExecutionMask());
} else {
EXPECT_EQ(maxNBitValue(simd), function.KernelImp::getThreadExecutionMask());
EXPECT_EQ(maxNBitValue(simd), kernel.KernelImp::getThreadExecutionMask());
}
}
}
@ -82,20 +82,20 @@ TEST_F(KernelImp, givenExecutionMaskWithoutReminderWhenProgrammingItsValueThenSe
TEST_F(KernelImp, WhenSuggestingGroupSizeThenClampToMaxGroupSize) {
DebugManagerStateRestore restorer;
WhiteBox<KernelImmutableData> funcInfo = {};
WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
kernelInfo.kernelDescriptor = &descriptor;
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
Mock<Module> module(device, nullptr);
module.getMaxGroupSizeResult = 8;
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.module = &module;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.module = &module;
uint32_t groupSize[3];
function.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(8U, groupSize[0]);
EXPECT_EQ(1U, groupSize[1]);
EXPECT_EQ(1U, groupSize[2]);
@ -118,9 +118,9 @@ INSTANTIATE_TEST_CASE_P(, KernelImpSuggestGroupSize,
TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen) {
DebugManagerStateRestore restorer;
WhiteBox<KernelImmutableData> funcInfo = {};
WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
kernelInfo.kernelDescriptor = &descriptor;
NEO::DebugManager.flags.EnableComputeWorkSizeND.set(false);
@ -128,105 +128,105 @@ TEST_P(KernelImpSuggestGroupSize, WhenSuggestingGroupThenProperGroupSizeChosen)
uint32_t size = GetParam();
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.module = &module;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.module = &module;
uint32_t groupSize[3];
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(size, size, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1,
groupSize + 2);
kernel.KernelImp::suggestGroupSize(size, size, size, groupSize, groupSize + 1,
groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(size, 1, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(1, size, 1, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, 1U % groupSize[2]);
function.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(1, 1, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(1, size, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, 1U % groupSize[0]);
EXPECT_EQ(0U, size % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]);
function.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2);
kernel.KernelImp::suggestGroupSize(size, 1, size, groupSize, groupSize + 1, groupSize + 2);
EXPECT_EQ(0U, size % groupSize[0]);
EXPECT_EQ(0U, 1U % groupSize[1]);
EXPECT_EQ(0U, size % groupSize[2]);
}
TEST_F(KernelImp, GivenInvalidValuesWhenSettingGroupSizeThenInvalidArgumentErrorIsReturned) {
Mock<Kernel> function;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(0U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 0U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, function.KernelImp::setGroupSize(1U, 1U, 0U));
Mock<Kernel> kernel;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(0U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 0U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, kernel.KernelImp::setGroupSize(1U, 1U, 0U));
}
TEST_F(KernelImp, givenSetGroupSizeWithGreaterGroupSizeThanAllowedThenCorrectErrorCodeIsReturned) {
WhiteBox<KernelImmutableData> funcInfo = {};
WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr);
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.module = &module;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.module = &module;
uint32_t maxGroupSizeX = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[0]);
uint32_t maxGroupSizeY = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[1]);
uint32_t maxGroupSizeZ = static_cast<uint32_t>(device->getDeviceInfo().maxWorkItemSizes[2]);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, function.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX, maxGroupSizeY, maxGroupSizeZ));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(maxGroupSizeX + 1U, 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, maxGroupSizeY + 1U, 1U));
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, kernel.KernelImp::setGroupSize(1U, 1U, maxGroupSizeZ + 1U));
}
TEST_F(KernelImp, GivenNumChannelsZeroWhenSettingGroupSizeThenLocalIdsNotGenerated) {
WhiteBox<KernelImmutableData> funcInfo = {};
WhiteBox<KernelImmutableData> kernelInfo = {};
NEO::KernelDescriptor descriptor;
funcInfo.kernelDescriptor = &descriptor;
kernelInfo.kernelDescriptor = &descriptor;
Mock<Module> module(device, nullptr);
Mock<Kernel> function;
function.kernelImmData = &funcInfo;
function.module = &module;
Mock<Kernel> kernel;
kernel.kernelImmData = &kernelInfo;
kernel.module = &module;
function.KernelImp::setGroupSize(16U, 16U, 1U);
kernel.KernelImp::setGroupSize(16U, 16U, 1U);
std::vector<char> memBefore;
{
auto perThreadData =
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData());
reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
memBefore.assign(perThreadData,
perThreadData + function.KernelImp::getPerThreadDataSize());
perThreadData + kernel.KernelImp::getPerThreadDataSize());
}
function.KernelImp::setGroupSize(8U, 32U, 1U);
kernel.KernelImp::setGroupSize(8U, 32U, 1U);
std::vector<char> memAfter;
{
auto perThreadData =
reinterpret_cast<const char *>(function.KernelImp::getPerThreadData());
reinterpret_cast<const char *>(kernel.KernelImp::getPerThreadData());
memAfter.assign(perThreadData,
perThreadData + function.KernelImp::getPerThreadDataSize());
perThreadData + kernel.KernelImp::getPerThreadDataSize());
}
EXPECT_EQ(memAfter, memBefore);
@ -253,12 +253,12 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
uint32_t dssCount;
uint32_t availableSlm;
uint32_t maxBarrierCount;
WhiteBox<::L0::KernelImmutableData> funcInfo;
WhiteBox<::L0::KernelImmutableData> kernelInfo;
NEO::KernelDescriptor kernelDescriptor;
void SetUp() override {
KernelImp::SetUp();
funcInfo.kernelDescriptor = &kernelDescriptor;
kernelInfo.kernelDescriptor = &kernelDescriptor;
auto &hardwareInfo = device->getHwInfo();
auto &hwHelper = device->getHwHelper();
availableThreadCount = hwHelper.calculateAvailableThreadCount(hardwareInfo, numGrf);
@ -270,16 +270,16 @@ class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
maxBarrierCount = static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice());
funcInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
funcInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
}
uint32_t getMaxWorkGroupCount() {
funcInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
funcInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
Mock<Kernel> kernel;
kernel.kernelImmData = &funcInfo;
kernel.kernelImmData = &kernelInfo;
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
kernel.module = module.get();

View File

@ -529,7 +529,7 @@ HWTEST_F(ModuleTest, GivenIncorrectNameWhenCreatingKernelThenResultErrorInvalidA
ze_kernel_handle_t kernelHandle;
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "nonexistent_function";
kernelDesc.pKernelName = "nonexistent_kernel";
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);

View File

@ -10,7 +10,7 @@
namespace L0 {
namespace ult {
void onEnterCommandListAppendLaunchFunction(
void onEnterCommandListAppendLaunchKernel(
ze_command_list_append_launch_kernel_params_t *params,
ze_result_t result,
void *pTracerUserData,
@ -18,7 +18,7 @@ void onEnterCommandListAppendLaunchFunction(
int a = 0;
a++;
}
void onExitCommandListAppendLaunchFunction(
void onExitCommandListAppendLaunchKernel(
ze_command_list_append_launch_kernel_params_t *params,
ze_result_t result,
void *pTracerUserData,
@ -168,8 +168,8 @@ TEST_F(ZeApiTracingCoreTests, WhenCreateTracerAndsetCallbacksAndEnableTracingAnd
zet_core_callbacks_t prologCbs = {};
zet_core_callbacks_t epilogCbs = {};
prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchFunction;
epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchFunction;
prologCbs.CommandList.pfnAppendLaunchKernelCb = onEnterCommandListAppendLaunchKernel;
epilogCbs.CommandList.pfnAppendLaunchKernelCb = onExitCommandListAppendLaunchKernel;
result = zetTracerExpSetPrologues(apiTracerHandle, &prologCbs);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);