Remove device enqueue part 6

- isParentKernel, peekParentKernel, parentKernel
- structs: AUBParentKernelFixture, MockParentKernel,
ParentKernelCommandQueueFixture

Related-To: NEO-6559
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-01-13 15:27:58 +00:00
committed by Compute-Runtime-Automation
parent d9aae805c7
commit 59683ec491
27 changed files with 24 additions and 895 deletions

View File

@ -98,7 +98,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
} }
if (AuxTranslationMode::Builtin == auxTranslationMode) { if (AuxTranslationMode::Builtin == auxTranslationMode) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
} }
@ -127,8 +126,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
return; return;
} }
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
TagNodeBase *hwTimeStamps = nullptr; TagNodeBase *hwTimeStamps = nullptr;
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
@ -333,11 +330,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
if (blockQueue) { if (blockQueue) {
if (parentKernel) {
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
}
enqueueBlocked(commandType, enqueueBlocked(commandType,
surfacesForResidency, surfacesForResidency,
numSurfaceForResidency, numSurfaceForResidency,
@ -414,13 +406,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
hwTimeStamps = event->getHwTimeStampNode(); hwTimeStamps = event->getHwTimeStampNode();
} }
if (auto parentKernel = multiDispatchInfo.peekParentKernel()) {
parentKernel->createReflectionSurface();
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
}
if (event && this->isPerfCountersEnabled()) { if (event && this->isPerfCountersEnabled()) {
hwPerfCounter = event->getHwPerfCounterNode(); hwPerfCounter = event->getHwPerfCounterNode();
} }
@ -761,7 +746,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
getSliceCount(), //sliceCount getSliceCount(), //sliceCount
blocking, //blocking blocking, //blocking
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush
multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(), //useSLM multiDispatchInfo.usesSlm(), //useSLM
true, //guardCommandBufferWithPipeControl true, //guardCommandBufferWithPipeControl
commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired
requiresCoherency, //requiresCoherency requiresCoherency, //requiresCoherency
@ -905,7 +890,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
} }
PreemptionMode preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); PreemptionMode preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
bool slmUsed = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); bool slmUsed = multiDispatchInfo.usesSlm();
command = std::make_unique<CommandComputeKernel>(*this, command = std::make_unique<CommandComputeKernel>(*this,
blockedCommandsData, blockedCommandsData,
allSurfaces, allSurfaces,

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -43,10 +43,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
auto &kernel = *pKernel; auto &kernel = *pKernel;
const auto &kernelInfo = kernel.getKernelInfo(); const auto &kernelInfo = kernel.getKernelInfo();
if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) {
return CL_INVALID_OPERATION;
}
if (!kernel.isPatched()) { if (!kernel.isPatched()) {
if (event) { if (event) {
*event = nullptr; *event = nullptr;

View File

@ -129,12 +129,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
} }
// clang-format on // clang-format on
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
if (heapType == IndirectHeap::SURFACE_STATE) {
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
}
}
if (ih == nullptr) if (ih == nullptr)
ih = &commandQueue.getIndirectHeap(heapType, expectedSize); ih = &commandQueue.getIndirectHeap(heapType, expectedSize);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -49,7 +49,6 @@ class HardwareInterface {
CommandQueue &commandQueue, CommandQueue &commandQueue,
const MultiDispatchInfo &multiDispatchInfo, const MultiDispatchInfo &multiDispatchInfo,
size_t &totalInterfaceDescriptorTableSize, size_t &totalInterfaceDescriptorTableSize,
Kernel *parentKernel,
IndirectHeap *dsh, IndirectHeap *dsh,
LinearStream *commandStream); LinearStream *commandStream);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -73,7 +73,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
LinearStream *commandStream = nullptr; LinearStream *commandStream = nullptr;
IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
auto parentKernel = multiDispatchInfo.peekParentKernel();
auto mainKernel = multiDispatchInfo.peekMainKernel(); auto mainKernel = multiDispatchInfo.peekMainKernel();
auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo); auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo);
@ -125,8 +124,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA);
getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, dsh, commandStream);
parentKernel, dsh, commandStream);
// Program media interface descriptor load // Program media interface descriptor load
HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad( HardwareCommandsHelper<GfxFamily>::sendMediaInterfaceDescriptorLoad(
@ -255,22 +253,13 @@ void HardwareInterface<GfxFamily>::dispatchKernelCommands(CommandQueue &commandQ
template <typename GfxFamily> template <typename GfxFamily>
void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo,
bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
if (blockedQueue) { if (blockedQueue) {
size_t dshSize = 0; size_t dshSize = 0;
size_t colorCalcSize = 0; size_t colorCalcSize = 0;
size_t sshSize = HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(multiDispatchInfo); size_t sshSize = HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(multiDispatchInfo);
bool iohEqualsDsh = false; bool iohEqualsDsh = false;
if (parentKernel) { dshSize = HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(multiDispatchInfo);
dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize();
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
iohEqualsDsh = true;
colorCalcSize = static_cast<size_t>(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize);
} else {
dshSize = HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(multiDispatchInfo);
}
commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh);
dsh->getSpace(colorCalcSize); dsh->getSpace(colorCalcSize);
@ -284,12 +273,6 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(multiDispatchInfo), ioh);
} }
} else { } else {
if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE);
// clean reserved bindless offsets
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
}
dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo); dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo);
ioh = &getIndirectHeap<GfxFamily, IndirectHeap::INDIRECT_OBJECT>(commandQueue, multiDispatchInfo); ioh = &getIndirectHeap<GfxFamily, IndirectHeap::INDIRECT_OBJECT>(commandQueue, multiDispatchInfo);
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo); ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2021 Intel Corporation * Copyright (C) 2019-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -19,18 +19,13 @@ inline void HardwareInterface<GfxFamily>::getDefaultDshSpace(
CommandQueue &commandQueue, CommandQueue &commandQueue,
const MultiDispatchInfo &multiDispatchInfo, const MultiDispatchInfo &multiDispatchInfo,
size_t &totalInterfaceDescriptorTableSize, size_t &totalInterfaceDescriptorTableSize,
Kernel *parentKernel,
IndirectHeap *dsh, IndirectHeap *dsh,
LinearStream *commandStream) { LinearStream *commandStream) {
size_t numDispatches = multiDispatchInfo.size(); size_t numDispatches = multiDispatchInfo.size();
totalInterfaceDescriptorTableSize *= numDispatches; totalInterfaceDescriptorTableSize *= numDispatches;
if (!parentKernel) { dsh->getSpace(totalInterfaceDescriptorTableSize);
dsh->getSpace(totalInterfaceDescriptorTableSize);
} else {
dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed());
}
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021 Intel Corporation * Copyright (C) 2021-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -24,13 +24,8 @@ inline void HardwareInterface<GfxFamily>::getDefaultDshSpace(
CommandQueue &commandQueue, CommandQueue &commandQueue,
const MultiDispatchInfo &multiDispatchInfo, const MultiDispatchInfo &multiDispatchInfo,
size_t &totalInterfaceDescriptorTableSize, size_t &totalInterfaceDescriptorTableSize,
Kernel *parentKernel,
IndirectHeap *dsh, IndirectHeap *dsh,
LinearStream *commandStream) { LinearStream *commandStream) {
if (parentKernel) {
dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed());
}
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -72,7 +72,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
// Enlarge local copy of SSH by 1 SS // Enlarge local copy of SSH by 1 SS
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily); GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel)) { if (!gtpinHelper.addSurfaceState(pKernel)) {
// Kernel with no SSH or Kernel EM, not supported // Kernel with no SSH or Kernel EM, not supported
return; return;
} }
@ -117,7 +117,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
auto rootDeviceIndex = device.getRootDeviceIndex(); auto rootDeviceIndex = device.getRootDeviceIndex();
auto pMultiDeviceKernel = castToObjectOrAbort<MultiDeviceKernel>(kernel); auto pMultiDeviceKernel = castToObjectOrAbort<MultiDeviceKernel>(kernel);
auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize() == 0) { if (pKernel->getSurfaceStateHeapSize() == 0) {
// Kernel with no SSH, not supported // Kernel with no SSH, not supported
return; return;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -20,7 +20,7 @@ bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel) {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
size_t sshSize = pKernel->getSurfaceStateHeapSize(); size_t sshSize = pKernel->getSurfaceStateHeapSize();
if ((sshSize == 0) || pKernel->isParentKernel) { if (sshSize == 0) {
// Kernels which do not use SSH or use Execution Model are not supported (yet) // Kernels which do not use SSH or use Execution Model are not supported (yet)
return false; return false;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -33,10 +33,6 @@ Kernel *MultiDispatchInfo::peekMainKernel() const {
return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel(); return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel();
} }
Kernel *MultiDispatchInfo::peekParentKernel() const {
return (mainKernel && mainKernel->isParentKernel) ? mainKernel : nullptr;
}
void MultiDispatchInfo::backupUnifiedMemorySyncRequirement() { void MultiDispatchInfo::backupUnifiedMemorySyncRequirement() {
for (const auto &dispatchInfo : dispatchInfos) { for (const auto &dispatchInfo : dispatchInfos) {
dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(true); dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(true);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -187,7 +187,6 @@ struct MultiDispatchInfo {
redescribedSurfaces.push_back(memObj.release()); redescribedSurfaces.push_back(memObj.release());
} }
Kernel *peekParentKernel() const;
Kernel *peekMainKernel() const; Kernel *peekMainKernel() const;
void setBuiltinOpParams(const BuiltinOpParams &builtinOpParams) { void setBuiltinOpParams(const BuiltinOpParams &builtinOpParams) {

View File

@ -140,20 +140,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
return completionStamp; return completionStamp;
} }
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
bool executionModelKernel = kernel->isParentKernel;
auto devQueue = commandQueue.getContext().getDefaultDeviceQueue();
auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation(); auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
if (executionModelKernel) {
while (!devQueue->isEMCriticalSectionFree())
;
devQueue->resetDeviceQueue();
devQueue->acquireEMCriticalSection();
}
IndirectHeap *dsh = kernelOperation->dsh.get(); IndirectHeap *dsh = kernelOperation->dsh.get();
IndirectHeap *ioh = kernelOperation->ioh.get(); IndirectHeap *ioh = kernelOperation->ioh.get();
IndirectHeap *ssh = kernelOperation->ssh.get(); IndirectHeap *ssh = kernelOperation->ssh.get();

View File

@ -68,8 +68,7 @@ class Surface;
uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d;
Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg)
: isParentKernel(kernelInfoArg.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue), : executionEnvironment(programArg->getExecutionEnvironment()),
executionEnvironment(programArg->getExecutionEnvironment()),
program(programArg), program(programArg),
clDevice(clDeviceArg), clDevice(clDeviceArg),
kernelInfo(kernelInfoArg) { kernelInfo(kernelInfoArg) {
@ -262,9 +261,6 @@ cl_int Kernel::initialize() {
program->getContextPtr()->setResolvesRequiredInKernels(true); program->getContextPtr()->setResolvesRequiredInKernels(true);
} }
if (isParentKernel) {
program->allocateBlockPrivateSurfaces(*pClDevice);
}
if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) {
debugEnabled = true; debugEnabled = true;
} }
@ -1791,129 +1787,6 @@ void Kernel::unsetArg(uint32_t argIndex) {
void Kernel::createReflectionSurface() { void Kernel::createReflectionSurface() {
auto pClDevice = &clDevice; auto pClDevice = &clDevice;
if (this->isParentKernel && kernelReflectionSurface == nullptr) {
auto &hwInfo = pClDevice->getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
BlockKernelManager *blockManager = program->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
ObjectCounts objectCount;
getParentObjectCounts(objectCount);
uint32_t parentImageCount = objectCount.imageCount;
uint32_t parentSamplerCount = objectCount.samplerCount;
size_t maxConstantBufferSize = 0;
std::vector<IGIL_KernelCurbeParams> *curbeParamsForBlocks = new std::vector<IGIL_KernelCurbeParams>[blockCount];
uint64_t *tokenMask = new uint64_t[blockCount];
uint32_t *sshTokenOffsetsFromKernelData = new uint32_t[blockCount];
size_t kernelReflectionSize = alignUp(sizeof(IGIL_KernelDataHeader) + blockCount * sizeof(IGIL_KernelAddressData), sizeof(void *));
uint32_t kernelDataOffset = static_cast<uint32_t>(kernelReflectionSize);
uint32_t parentSSHAlignedSize = alignUp(this->kernelInfo.heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement());
uint32_t btOffset = parentSSHAlignedSize;
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
size_t samplerStateAndBorderColorSize = 0;
uint32_t firstSSHTokenIndex = 0;
ReflectionSurfaceHelper::getCurbeParams(curbeParamsForBlocks[i], tokenMask[i], firstSSHTokenIndex, *pBlockInfo, hwInfo);
maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast<size_t>(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize));
samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(hwInfo);
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment);
samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize();
samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, sizeof(void *));
sshTokenOffsetsFromKernelData[i] = offsetof(IGIL_KernelData, m_data) + sizeof(IGIL_KernelCurbeParams) * firstSSHTokenIndex;
kernelReflectionSize += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParamsForBlocks[i].size(), sizeof(void *));
kernelReflectionSize += parentSamplerCount * sizeof(IGIL_SamplerParams) + samplerStateAndBorderColorSize;
}
maxConstantBufferSize = alignUp(maxConstantBufferSize, sizeof(void *));
kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *));
kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters);
kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams);
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
{pClDevice->getRootDeviceIndex(), kernelReflectionSize,
GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER,
pClDevice->getDeviceBitfield()});
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
uint32_t newKernelDataOffset = ReflectionSurfaceHelper::setKernelData(kernelReflectionSurface->getUnderlyingBuffer(),
kernelDataOffset,
curbeParamsForBlocks[i],
tokenMask[i],
maxConstantBufferSize,
parentSamplerCount,
*pBlockInfo,
hwInfo);
uint32_t offset = static_cast<uint32_t>(offsetof(IGIL_KernelDataHeader, m_data) + sizeof(IGIL_KernelAddressData) * i);
uint32_t samplerHeapOffset = static_cast<uint32_t>(alignUp(kernelDataOffset + sizeof(IGIL_KernelData) + curbeParamsForBlocks[i].size() * sizeof(IGIL_KernelCurbeParams), sizeof(void *)));
uint32_t samplerHeapSize = static_cast<uint32_t>(alignUp(pBlockInfo->getSamplerStateArraySize(hwInfo), Sampler::samplerStateArrayAlignment) + pBlockInfo->getBorderColorStateSize());
uint32_t constantBufferOffset = alignUp(samplerHeapOffset + samplerHeapSize, sizeof(void *));
uint32_t samplerParamsOffset = 0;
if (parentSamplerCount) {
samplerParamsOffset = newKernelDataOffset - sizeof(IGIL_SamplerParams) * parentSamplerCount;
IGIL_SamplerParams *pSamplerParams = (IGIL_SamplerParams *)ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerParamsOffset);
uint32_t sampler = 0;
const auto &args = pBlockInfo->kernelDescriptor.payloadMappings.explicitArgs;
for (uint32_t argID = 0; argID < args.size(); argID++) {
if (args[argID].is<ArgDescriptor::ArgTSampler>()) {
pSamplerParams[sampler].m_ArgID = argID;
pSamplerParams[sampler].m_SamplerStateOffset = args[argID].as<ArgDescSampler>().bindful;
sampler++;
}
}
}
ReflectionSurfaceHelper::setKernelAddressData(kernelReflectionSurface->getUnderlyingBuffer(),
offset,
kernelDataOffset,
samplerHeapOffset,
constantBufferOffset,
samplerParamsOffset,
sshTokenOffsetsFromKernelData[i] + kernelDataOffset,
btOffset,
*pBlockInfo,
hwInfo);
if (samplerHeapSize > 0) {
void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerHeapOffset);
const void *pSrc = ptrOffset(pBlockInfo->heapInfo.pDsh, pBlockInfo->getBorderColorOffset());
memcpy_s(pDst, samplerHeapSize, pSrc, samplerHeapSize);
}
void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), constantBufferOffset);
const char *pSrc = pBlockInfo->crossThreadData;
memcpy_s(pDst, pBlockInfo->getConstantBufferSize(), pSrc, pBlockInfo->getConstantBufferSize());
btOffset += pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset;
kernelDataOffset = newKernelDataOffset;
}
uint32_t samplerOffset = 0;
if (parentSamplerCount) {
samplerOffset = kernelDataOffset + parentImageCount * sizeof(IGIL_ImageParamters);
}
ReflectionSurfaceHelper::setKernelDataHeader(kernelReflectionSurface->getUnderlyingBuffer(), blockCount, parentImageCount, parentSamplerCount, kernelDataOffset, samplerOffset);
delete[] curbeParamsForBlocks;
delete[] tokenMask;
delete[] sshTokenOffsetsFromKernelData;
// Patch constant values once after reflection surface creation
patchBlocksCurbeWithConstantValues();
}
if (DebugManager.flags.ForceDispatchScheduler.get()) { if (DebugManager.flags.ForceDispatchScheduler.get()) {
if (kernelReflectionSurface == nullptr) { if (kernelReflectionSurface == nullptr) {
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
@ -1927,7 +1800,6 @@ void Kernel::createReflectionSurface() {
void Kernel::getParentObjectCounts(ObjectCounts &objectCount) { void Kernel::getParentObjectCounts(ObjectCounts &objectCount) {
objectCount.imageCount = 0; objectCount.imageCount = 0;
objectCount.samplerCount = 0; objectCount.samplerCount = 0;
DEBUG_BREAK_IF(!isParentKernel);
for (const auto &arg : this->kernelArguments) { for (const auto &arg : this->kernelArguments) {
if (arg.type == SAMPLER_OBJ) { if (arg.type == SAMPLER_OBJ) {
@ -1942,22 +1814,6 @@ bool Kernel::hasPrintfOutput() const {
return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf; return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf;
} }
size_t Kernel::getInstructionHeapSizeForExecutionModel() const {
BlockKernelManager *blockManager = program->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
size_t totalSize = 0;
if (isParentKernel) {
totalSize = kernelBinaryAlignment - 1; // for initial alignment
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
totalSize += pBlockInfo->heapInfo.KernelHeapSize;
totalSize = alignUp(totalSize, kernelBinaryAlignment);
}
}
return totalSize;
}
void Kernel::patchBlocksCurbeWithConstantValues() { void Kernel::patchBlocksCurbeWithConstantValues() {
auto rootDeviceIndex = clDevice.getRootDeviceIndex(); auto rootDeviceIndex = clDevice.getRootDeviceIndex();
BlockKernelManager *blockManager = program->getBlockKernelManager(); BlockKernelManager *blockManager = program->getBlockKernelManager();
@ -2622,10 +2478,6 @@ void Kernel::setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset
ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset); ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset);
} }
bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() {
return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf();
}
uint64_t Kernel::getKernelStartOffset( uint64_t Kernel::getKernelStartOffset(
const bool localIdsGenerationByRuntime, const bool localIdsGenerationByRuntime,
const bool kernelUsesLocalIds, const bool kernelUsesLocalIds,

View File

@ -236,8 +236,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
return kernelReflectionSurface; return kernelReflectionSurface;
} }
size_t getInstructionHeapSizeForExecutionModel() const;
// Helpers // Helpers
cl_int setArg(uint32_t argIndex, uint32_t argValue); cl_int setArg(uint32_t argIndex, uint32_t argValue);
cl_int setArg(uint32_t argIndex, uint64_t argValue); cl_int setArg(uint32_t argIndex, uint64_t argValue);
@ -324,7 +322,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
uint32_t allBufferArgsStateful = CL_TRUE; uint32_t allBufferArgsStateful = CL_TRUE;
bool isBuiltIn = false; bool isBuiltIn = false;
const bool isParentKernel;
uint32_t getThreadArbitrationPolicy() const { uint32_t getThreadArbitrationPolicy() const {
return threadArbitrationPolicy; return threadArbitrationPolicy;
@ -333,8 +330,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
return executionType; return executionType;
} }
bool checkIfIsParentKernelAndBlocksUsesPrintf();
bool is32Bit() const { bool is32Bit() const {
return kernelInfo.kernelDescriptor.kernelAttributes.gpuPointerSize == 4; return kernelInfo.kernelDescriptor.kernelAttributes.gpuPointerSize == 4;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -38,7 +38,7 @@ PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo,
} }
auto mainKernel = multiDispatchInfo.peekMainKernel(); auto mainKernel = multiDispatchInfo.peekMainKernel();
if (mainKernel != nullptr) { if (mainKernel != nullptr) {
if (mainKernel->checkIfIsParentKernelAndBlocksUsesPrintf() || mainKernel->getImplicitArgs()) { if (mainKernel->getImplicitArgs()) {
return new PrintfHandler(device); return new PrintfHandler(device);
} }
} }

View File

@ -1,5 +1,5 @@
# #
# Copyright (C) 2018-2021 Intel Corporation # Copyright (C) 2018-2022 Intel Corporation
# #
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
# #
@ -8,7 +8,6 @@ target_sources(igdrcl_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/aub_parent_kernel_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h

View File

@ -1,37 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
namespace NEO {
static const char programFile[] = "simple_block_kernel";
static const char kernelName[] = "kernel_reflection";
class AUBParentKernelFixture : public CommandEnqueueAUBFixture,
public HelloWorldKernelFixture,
public testing::Test {
public:
using HelloWorldKernelFixture::SetUp;
void SetUp() override {
REQUIRE_OCL_21_OR_SKIP(defaultHwInfo);
CommandEnqueueAUBFixture::SetUp();
ASSERT_NE(nullptr, pClDevice);
HelloWorldKernelFixture::SetUp(pClDevice, programFile, kernelName, "-cl-std=CL2.0");
}
void TearDown() override {
if (IsSkipped()) {
return;
}
HelloWorldKernelFixture::TearDown();
CommandEnqueueAUBFixture::TearDown();
}
};
} // namespace NEO

View File

@ -19,7 +19,6 @@ set(IGDRCL_SRCS_tests_fixtures
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/hello_world_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_kernel_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp

View File

@ -1,39 +0,0 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture,
testing::Test {
void SetUp() override {
device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr, rootDeviceIndex)};
CommandQueueHwFixture::SetUp(device, 0);
}
void TearDown() override {
CommandQueueHwFixture::TearDown();
delete device;
}
std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
auto commandStream = new LinearStream();
auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
}
const uint32_t rootDeviceIndex = 0u;
};

View File

@ -2033,23 +2033,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenLowMemoryConditionOccursThe
injectFailures(allocBufferFunc); injectFailures(allocBufferFunc);
} }
TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedAndFalseIsReturned) {
GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*pContext));
parentKernel->sshLocalSize = 64;
parentKernel->pSshLocal.reset(new char[64]);
size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize();
bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get());
EXPECT_FALSE(surfaceAdded);
size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize();
EXPECT_EQ(sizeSurfaceStates2, sizeSurfaceStates1);
}
TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) {
cl_kernel kernel = nullptr; cl_kernel kernel = nullptr;
cl_program pProgram = nullptr; cl_program pProgram = nullptr;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -266,46 +266,32 @@ TEST_F(DispatchInfoTest, WhenSettingValuesInDispatchInfoThenThoseValuesAreSet) {
EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups()); EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups());
} }
TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParentAndMainKernel) { TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryMainKernel) {
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*pContext));
std::unique_ptr<MockKernel> baseKernel(MockKernel::create(*pDevice, pProgram)); std::unique_ptr<MockKernel> baseKernel(MockKernel::create(*pDevice, pProgram));
std::unique_ptr<MockKernel> builtInKernel(MockKernel::create(*pDevice, pProgram)); std::unique_ptr<MockKernel> builtInKernel(MockKernel::create(*pDevice, pProgram));
builtInKernel->isBuiltIn = true; builtInKernel->isBuiltIn = true;
DispatchInfo parentKernelDispatchInfo(pClDevice, parentKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
DispatchInfo baseDispatchInfo(pClDevice, baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo baseDispatchInfo(pClDevice, baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
DispatchInfo builtInDispatchInfo(pClDevice, builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo builtInDispatchInfo(pClDevice, builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
{
MultiDispatchInfo multiDispatchInfo(parentKernel.get());
multiDispatchInfo.push(parentKernelDispatchInfo);
EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekParentKernel());
EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekMainKernel());
}
{ {
MultiDispatchInfo multiDispatchInfo(baseKernel.get()); MultiDispatchInfo multiDispatchInfo(baseKernel.get());
multiDispatchInfo.push(builtInDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo);
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick builtin kernel EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick builtin kernel
multiDispatchInfo.push(baseDispatchInfo); multiDispatchInfo.push(baseDispatchInfo);
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel());
} }
{ {
MultiDispatchInfo multiDispatchInfo; MultiDispatchInfo multiDispatchInfo;
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel());
multiDispatchInfo.push(builtInDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo);
EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel()); EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel());
} }
{ {
MultiDispatchInfo multiDispatchInfo; MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.push(parentKernelDispatchInfo);
multiDispatchInfo.push(baseDispatchInfo); multiDispatchInfo.push(baseDispatchInfo);
multiDispatchInfo.push(builtInDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo);

View File

@ -27,7 +27,6 @@ set(IGDRCL_SRCS_tests_kernel
${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp
) )

View File

@ -13,7 +13,6 @@
#include "opencl/source/kernel/kernel.h" #include "opencl/source/kernel/kernel.h"
#include "opencl/source/program/printf_handler.h" #include "opencl/source/program/printf_handler.h"
#include "opencl/source/sampler/sampler.h" #include "opencl/source/sampler/sampler.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/fixtures/platform_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h"
@ -32,21 +31,6 @@
using namespace NEO; using namespace NEO;
TEST(KernelReflectionSurfaceTestSingle, GivenNonParentKernelWhenCreatingKernelReflectionSurfaceThenKernelReflectionSurfaceIsNotCreated) {
MockClDevice device{new MockDevice};
MockProgram program(toClDeviceVector(device));
KernelInfo info;
MockKernel kernel(&program, info, device);
EXPECT_FALSE(kernel.isParentKernel);
kernel.createReflectionSurface();
auto reflectionSurface = kernel.getKernelReflectionSurface();
EXPECT_EQ(nullptr, reflectionSurface);
}
class ReflectionSurfaceHelperTest : public testing::TestWithParam<std::tuple<const IGIL_KernelCurbeParams, const IGIL_KernelCurbeParams, bool>> { class ReflectionSurfaceHelperTest : public testing::TestWithParam<std::tuple<const IGIL_KernelCurbeParams, const IGIL_KernelCurbeParams, bool>> {
protected: protected:
@ -678,168 +662,3 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenUndefinedOffsetsWhenPatchingBlocksCu
EXPECT_THAT(patchedValues, MemCompare(reference.get(), 10 * sizeof(IGIL_KernelDataHeader) - constBufferOffset)); EXPECT_THAT(patchedValues, MemCompare(reference.get(), 10 * sizeof(IGIL_KernelDataHeader) - constBufferOffset));
} }
class ReflectionSurfaceConstantValuesPatchingTest : public ClDeviceFixture,
public ::testing::Test {
public:
void SetUp() override {
ClDeviceFixture::SetUp();
}
void TearDown() override {
ClDeviceFixture::TearDown();
}
};
TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramGlobalMemoryAddressIsPatched) {
MockContext context(pClDevice);
MockParentKernel::CreateParams createParams{};
createParams.addChildGlobalMemory = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
// graphicsMemory is released by Program
GraphicsAllocation *globalMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
parentKernel->mockProgram->setGlobalSurface(globalMemory);
// Allocte reflectionSurface, 2 * 4096 should be enough
GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize});
parentKernel->setReflectionSurface(reflectionSurface);
memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize());
const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t));
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
pKernelHeader->m_numberOfKernels = 1;
pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset;
parentKernel->patchBlocksCurbeWithConstantValues();
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless;
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
EXPECT_EQ(globalMemory->getGpuAddressToPatch(), *pCurbe);
}
TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAndProgramWithoutGlobalMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) {
MockContext context(pClDevice);
MockParentKernel::CreateParams createParams{};
createParams.addChildGlobalMemory = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
if (parentKernel->mockProgram->getGlobalSurface(pClDevice->getRootDeviceIndex())) {
pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getGlobalSurface(pClDevice->getRootDeviceIndex()));
parentKernel->mockProgram->setGlobalSurface(nullptr);
}
// Allocte reflectionSurface, 2 * 4096 should be enough
GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize});
parentKernel->setReflectionSurface(reflectionSurface);
memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize());
const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t));
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
pKernelHeader->m_numberOfKernels = 1;
pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset;
parentKernel->patchBlocksCurbeWithConstantValues();
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless;
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
EXPECT_EQ(0u, *pCurbe);
}
TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramConstantMemoryAddressIsPatched) {
MockContext context(pClDevice);
MockParentKernel::CreateParams createParams{};
createParams.addChildConstantMemory = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
// graphicsMemory is released by Program
GraphicsAllocation *constantMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
parentKernel->mockProgram->setConstantSurface(constantMemory);
// Allocte reflectionSurface, 2 * 4096 should be enough
GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize});
parentKernel->setReflectionSurface(reflectionSurface);
memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize());
const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t));
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
pKernelHeader->m_numberOfKernels = 1;
pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset;
parentKernel->patchBlocksCurbeWithConstantValues();
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless;
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset);
uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
EXPECT_EQ(constantMemory->getGpuAddressToPatch(), *pCurbeToPatch);
std::unique_ptr<char> zeroMemory = std::unique_ptr<char>(new char[4096]);
memset(zeroMemory.get(), 0, 4096);
// memory before is not written
EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset)));
//memory after is not written
EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t))));
}
TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryAndProgramWithoutConstantMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) {
MockContext context(pClDevice);
MockParentKernel::CreateParams createParams{};
createParams.addChildConstantMemory = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
if (parentKernel->mockProgram->getConstantSurface(pClDevice->getRootDeviceIndex())) {
pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getConstantSurface(pClDevice->getRootDeviceIndex()));
parentKernel->mockProgram->setConstantSurface(nullptr);
}
// Allocte reflectionSurface, 2 * 4096 should be enough
GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize});
parentKernel->setReflectionSurface(reflectionSurface);
memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize());
const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t));
IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface->getUnderlyingBuffer());
pKernelHeader->m_numberOfKernels = 1;
pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset;
parentKernel->patchBlocksCurbeWithConstantValues();
auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0);
uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless;
uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset);
uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset);
EXPECT_EQ(0u, *pCurbeToPatch);
std::unique_ptr<char> zeroMemory = std::unique_ptr<char>(new char[4096]);
memset(zeroMemory.get(), 0, 4096);
// memory before is not written
EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset)));
//memory after is not written
EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t))));
}

View File

@ -34,7 +34,6 @@
#include "opencl/source/mem_obj/image.h" #include "opencl/source/mem_obj/image.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/execution_model_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h"
@ -2182,13 +2181,6 @@ TEST(KernelInfoTest, givenHwHelperWhenCreatingKernelAllocationThenCorrectPadding
clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation()); clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation());
} }
TEST(KernelTest, givenNormalKernelWhenGettingInstructionHeapSizeForExecutionModelThenZeroIsReturned) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
MockKernelWithInternals kernel(*device);
EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel());
}
TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) { TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) {
struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder {
using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder; using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder;

View File

@ -1,146 +0,0 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include <memory>
using namespace NEO;
class MockKernelWithArgumentAccess : public Kernel {
public:
std::vector<SimpleKernelArgInfo> &getKernelArguments() {
return kernelArguments;
}
class ObjectCountsPublic : public Kernel::ObjectCounts {
};
MockKernelWithArgumentAccess(Program *programArg, KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg) {
}
void getParentObjectCountsPublic(MockKernelWithArgumentAccess::ObjectCountsPublic &objectCount) {
getParentObjectCounts(objectCount);
}
};
TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) {
MockClDevice *device = new MockClDevice{new MockDevice};
MockProgram program(toClDeviceVector(*device));
KernelInfo info;
info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
MockKernelWithArgumentAccess kernel(&program, info, *device);
std::vector<Kernel::SimpleKernelArgInfo> &args = kernel.getKernelArguments();
Kernel::SimpleKernelArgInfo argInfo;
argInfo.type = Kernel::kernelArgType::SAMPLER_OBJ;
args.push_back(argInfo);
argInfo.type = Kernel::kernelArgType::IMAGE_OBJ;
args.push_back(argInfo);
MockKernelWithArgumentAccess::ObjectCountsPublic objectCounts;
kernel.getParentObjectCountsPublic(objectCounts);
EXPECT_EQ(1u, objectCounts.imageCount);
EXPECT_EQ(1u, objectCounts.samplerCount);
delete device;
}
TEST(ParentKernelTest, WhenPatchingBlocksSimdSizeThenPatchIsAppliedCorrectly) {
MockClDevice device{new MockDevice};
MockContext context(&device);
MockParentKernel::CreateParams createParams{};
createParams.addChildSimdSize = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
MockProgram *program = (MockProgram *)parentKernel->mockProgram;
parentKernel->patchBlocksSimdSize();
void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second);
uint32_t *simdSize = reinterpret_cast<uint32_t *>(blockSimdSize);
EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize);
}
TEST(ParentKernelTest, GivenParentKernelWhenCheckingForDeviceEnqueueThenTrueIsReturned) {
MockClDevice device{new MockDevice};
MockContext context(&device);
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context));
EXPECT_TRUE(parentKernel->getKernelInfo().hasDeviceEnqueue());
}
TEST(ParentKernelTest, GivenNormalKernelWhenCheckingForDeviceEnqueueThenFalseIsReturned) {
MockClDevice device{new MockDevice};
MockKernelWithInternals kernel(device);
EXPECT_FALSE(kernel.kernelInfo.hasDeviceEnqueue());
}
TEST(ParentKernelTest, WhenInitializingParentKernelThenBlocksSimdSizeIsPatched) {
MockClDevice device{new MockDevice};
MockContext context(&device);
MockParentKernel::CreateParams createParams{};
createParams.addChildSimdSize = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
MockProgram *program = (MockProgram *)parentKernel->mockProgram;
parentKernel->initialize();
void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second);
uint32_t *simdSize = reinterpret_cast<uint32_t *>(blockSimdSize);
EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize);
}
TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsAllocated) {
MockClDevice device{new MockDevice};
MockContext context(&device);
MockParentKernel::CreateParams createParams{};
createParams.addChildSimdSize = true;
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(context, createParams));
MockProgram *program = (MockProgram *)parentKernel->mockProgram;
auto infoBlock = new MockKernelInfo();
infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
uint32_t crossThreadOffsetBlock = 0;
infoBlock->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
infoBlock->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
infoBlock->setPrivateMemory(1000, false, 8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
infoBlock->setLocalIds({0, 0, 0});
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
infoBlock->setDeviceSideEnqueueBlockInterfaceDescriptorOffset(0);
infoBlock->heapInfo.pDsh = (void *)new uint64_t[64];
infoBlock->heapInfo.DynamicStateHeapSize = 64 * sizeof(uint64_t);
infoBlock->setCrossThreadDataSize(crossThreadOffsetBlock);
infoBlock->crossThreadData = new char[crossThreadOffsetBlock];
program->blockKernelManager->addBlockKernelInfo(infoBlock);
parentKernel->initialize();
EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(program->getBlockKernelManager()->getCount() - 1));
}

View File

@ -409,144 +409,6 @@ class MockKernelWithInternals {
char dshLocal[128]; char dshLocal[128];
std::vector<Kernel::SimpleKernelArgInfo> defaultKernelArguments; std::vector<Kernel::SimpleKernelArgInfo> defaultKernelArguments;
}; };
class MockParentKernel : public Kernel {
public:
struct CreateParams {
bool addChildSimdSize = false;
bool addChildGlobalMemory = false;
bool addChildConstantMemory = false;
bool addPrintfForParent = false;
bool addPrintfForBlock = false;
};
using Kernel::auxTranslationRequired;
using Kernel::kernelInfo;
using Kernel::patchBlocksCurbeWithConstantValues;
using Kernel::pImplicitArgs;
using Kernel::pSshLocal;
using Kernel::sshLocalSize;
static MockParentKernel *create(Context &context) {
CreateParams createParams{};
return create(context, createParams);
}
static MockParentKernel *create(Context &context, const CreateParams &createParams) {
auto clDevice = context.getDevice(0);
auto info = new MockKernelInfo();
const size_t crossThreadSize = 160;
uint32_t crossThreadOffset = 0;
uint32_t crossThreadOffsetBlock = 0;
info->setLocalIds({0, 0, 0});
info->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
info->kernelDescriptor.kernelAttributes.simdSize = 32;
info->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffset);
crossThreadOffset += 8;
info->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffset);
crossThreadOffset += 8;
if (createParams.addPrintfForParent) {
info->setPrintfSurface(8, crossThreadOffset);
crossThreadOffset += 8;
}
ClDeviceVector deviceVector;
deviceVector.push_back(clDevice);
MockProgram *mockProgram = new MockProgram(&context, false, deviceVector);
if (createParams.addChildSimdSize) {
info->childrenKernelsIdOffset.push_back({0, crossThreadOffset});
}
UNRECOVERABLE_IF(crossThreadSize < crossThreadOffset + 8);
info->crossThreadData = new char[crossThreadSize];
auto parent = new MockParentKernel(mockProgram, *info);
parent->crossThreadData = new char[crossThreadSize];
memset(parent->crossThreadData, 0, crossThreadSize);
parent->crossThreadDataSize = crossThreadSize;
parent->mockKernelInfo = info;
auto infoBlock = new MockKernelInfo();
infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
infoBlock->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
infoBlock->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffset);
crossThreadOffsetBlock += 8;
if (createParams.addPrintfForBlock) {
infoBlock->setPrintfSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
}
if (createParams.addChildGlobalMemory) {
infoBlock->setGlobalVariablesSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
}
if (createParams.addChildConstantMemory) {
infoBlock->setGlobalConstantsSurface(8, crossThreadOffsetBlock);
crossThreadOffsetBlock += 8;
}
infoBlock->setLocalIds({0, 0, 0});
infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true;
infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32;
infoBlock->setDeviceSideEnqueueBlockInterfaceDescriptorOffset(0);
infoBlock->heapInfo.pDsh = (void *)new uint64_t[64];
infoBlock->heapInfo.DynamicStateHeapSize = 64 * sizeof(uint64_t);
size_t crossThreadDataSize = crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize;
infoBlock->crossThreadData = new char[crossThreadDataSize];
infoBlock->setCrossThreadDataSize(static_cast<uint16_t>(crossThreadDataSize));
mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock);
parent->mockProgram = mockProgram;
return parent;
}
MockParentKernel(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, *programArg->getDevices()[0]) {
}
~MockParentKernel() override {
delete &kernelInfo;
BlockKernelManager *blockManager = program->getBlockKernelManager();
for (uint32_t i = 0; i < blockManager->getCount(); i++) {
const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i);
delete[](uint64_t *) blockInfo->heapInfo.pDsh;
}
if (mockProgram) {
mockProgram->decRefInternal();
}
}
Context *getContext() {
return &mockProgram->getContext();
}
void setReflectionSurface(GraphicsAllocation *reflectionSurface) {
kernelReflectionSurface = reflectionSurface;
}
MockProgram *mockProgram;
KernelInfo *mockKernelInfo = nullptr;
};
class MockDebugKernel : public MockKernel { class MockDebugKernel : public MockKernel {
public: public:
MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) { MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -256,73 +256,6 @@ HWTEST_F(PrintfHandlerTests, givenPrintfHandlerWhenEnqueueIsBlockedThenDontUsePr
EXPECT_FALSE(cmdQ.isQueueBlocked()); EXPECT_FALSE(cmdQ.isQueueBlocked());
} }
TEST_F(PrintfHandlerTests, givenParentKernelWithoutPrintfAndBlockKernelWithPrintfWhenPrintfHandlerCreateCalledThenResultIsAnObject) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockContext context(device.get());
MockParentKernel::CreateParams createParams{};
createParams.addPrintfForParent = false;
createParams.addPrintfForBlock = true;
std::unique_ptr<MockParentKernel> parentKernelWithoutPrintf(MockParentKernel::create(context, createParams));
MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithoutPrintf.get());
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
ASSERT_NE(nullptr, printfHandler.get());
}
TEST_F(PrintfHandlerTests, givenKernelWithImplicitArgsButWithoutPrintfWhenPrintfHandlerCreateCalledThenResultIsAnObject) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockContext context(device.get());
MockParentKernel::CreateParams createParams{};
createParams.addPrintfForParent = false;
createParams.addPrintfForBlock = false;
std::unique_ptr<MockParentKernel> parentKernelWithoutPrintf(MockParentKernel::create(context, createParams));
parentKernelWithoutPrintf->pImplicitArgs = std::make_unique<ImplicitArgs>();
*parentKernelWithoutPrintf->pImplicitArgs = {};
MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithoutPrintf.get());
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
ASSERT_NE(nullptr, printfHandler.get());
}
TEST_F(PrintfHandlerTests, givenParentKernelAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsNullptr) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockContext context(device.get());
MockParentKernel::CreateParams createParams{};
createParams.addPrintfForBlock = false;
createParams.addPrintfForParent = false;
std::unique_ptr<MockParentKernel> blockKernelWithoutPrintf(MockParentKernel::create(context, createParams));
MockMultiDispatchInfo multiDispatchInfo(device.get(), blockKernelWithoutPrintf.get());
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
ASSERT_EQ(nullptr, printfHandler.get());
}
TEST_F(PrintfHandlerTests, givenParentKernelWithPrintfAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsAnObject) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockContext context(device.get());
MockParentKernel::CreateParams createParams{};
createParams.addPrintfForBlock = false;
createParams.addPrintfForParent = true;
std::unique_ptr<MockParentKernel> parentKernelWithPrintfBlockKernelWithoutPrintf(MockParentKernel::create(context, createParams));
MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithPrintfBlockKernelWithoutPrintf.get());
std::unique_ptr<PrintfHandler> printfHandler(PrintfHandler::create(multiDispatchInfo, *device));
ASSERT_NE(nullptr, printfHandler);
}
TEST_F(PrintfHandlerTests, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAndDispatchingPrintfHandlerThenPickMainKernel) { TEST_F(PrintfHandlerTests, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAndDispatchingPrintfHandlerThenPickMainKernel) {
MockContext context; MockContext context;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)); auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));