diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 399f65a2e6..ccb132dacb 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -98,7 +98,6 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount } if (AuxTranslationMode::Builtin == auxTranslationMode) { - UNRECOVERABLE_IF(kernel->isParentKernel); dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } @@ -127,8 +126,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, return; } - Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); - TagNodeBase *hwTimeStamps = nullptr; CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); @@ -333,11 +330,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); if (blockQueue) { - if (parentKernel) { - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM; - } - enqueueBlocked(commandType, surfacesForResidency, numSurfaceForResidency, @@ -414,13 +406,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf hwTimeStamps = event->getHwTimeStampNode(); } - if (auto parentKernel = multiDispatchInfo.peekParentKernel()) { - parentKernel->createReflectionSurface(); - parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue()); - parentKernel->patchEventPool(context->getDefaultDeviceQueue()); - parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get()); - } - if (event && this->isPerfCountersEnabled()) { hwPerfCounter = event->getHwPerfCounterNode(); } @@ -761,7 +746,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( getSliceCount(), //sliceCount blocking, //blocking shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush - multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(), //useSLM + multiDispatchInfo.usesSlm(), //useSLM true, //guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired requiresCoherency, //requiresCoherency @@ -905,7 +890,7 @@ void CommandQueueHw::enqueueBlocked( } PreemptionMode preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo); - bool slmUsed = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel(); + bool slmUsed = multiDispatchInfo.usesSlm(); command = std::make_unique(*this, blockedCommandsData, allSurfaces, diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index ce830d6cec..a86176888c 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,10 +43,6 @@ cl_int CommandQueueHw::enqueueKernel( auto &kernel = *pKernel; const auto &kernelInfo = kernel.getKernelInfo(); - if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) { - return CL_INVALID_OPERATION; - } - if (!kernel.isPatched()) { if (event) { *event = nullptr; diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 2ed3207c9d..aa6323623b 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -129,12 +129,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf } // clang-format on - if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) { - if (heapType == IndirectHeap::SURFACE_STATE) { - expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - } - } - if (ih == nullptr) ih = &commandQueue.getIndirectHeap(heapType, expectedSize); diff --git a/opencl/source/command_queue/hardware_interface.h b/opencl/source/command_queue/hardware_interface.h index 9ea53591d2..0b6905dbf6 100644 --- a/opencl/source/command_queue/hardware_interface.h +++ b/opencl/source/command_queue/hardware_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -49,7 +49,6 @@ class HardwareInterface { CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, - Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream); diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index fab9005519..3aa7588a09 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -73,7 +73,6 @@ void HardwareInterface::dispatchWalker( LinearStream *commandStream = nullptr; IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - auto parentKernel = multiDispatchInfo.peekParentKernel(); auto mainKernel = multiDispatchInfo.peekMainKernel(); auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo); @@ -125,8 +124,7 @@ void HardwareInterface::dispatchWalker( size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); - getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, - parentKernel, dsh, commandStream); + getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, dsh, commandStream); // Program media interface descriptor load HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( @@ -255,22 +253,13 @@ void HardwareInterface::dispatchKernelCommands(CommandQueue &commandQ template void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { - auto parentKernel = multiDispatchInfo.peekParentKernel(); - if (blockedQueue) { size_t dshSize = 0; size_t colorCalcSize = 0; size_t sshSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); bool iohEqualsDsh = false; - if (parentKernel) { - dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(); - sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); - iohEqualsDsh = true; - colorCalcSize = static_cast(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize); - } else { - dshSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); - } + dshSize = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, dshSize, dsh); dsh->getSpace(colorCalcSize); @@ -284,12 +273,6 @@ void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueu HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); } } else { - if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) { - commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE); - // clean reserved bindless offsets - ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); - ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace()); - } dsh = &getIndirectHeap(commandQueue, multiDispatchInfo); ioh = &getIndirectHeap(commandQueue, multiDispatchInfo); ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); diff --git a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl index 183d85c58f..fa235de64b 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -19,18 +19,13 @@ inline void HardwareInterface::getDefaultDshSpace( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, - Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream) { size_t numDispatches = multiDispatchInfo.size(); totalInterfaceDescriptorTableSize *= numDispatches; - if (!parentKernel) { - dsh->getSpace(totalInterfaceDescriptorTableSize); - } else { - dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); - } + dsh->getSpace(totalInterfaceDescriptorTableSize); } template diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 03fa671394..c05fc446e8 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,13 +24,8 @@ inline void HardwareInterface::getDefaultDshSpace( CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, size_t &totalInterfaceDescriptorTableSize, - Kernel *parentKernel, IndirectHeap *dsh, LinearStream *commandStream) { - - if (parentKernel) { - dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); - } } template diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index c31e185fab..72804eef26 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -72,7 +72,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { // Enlarge local copy of SSH by 1 SS GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel)) { + if (!gtpinHelper.addSurfaceState(pKernel)) { // Kernel with no SSH or Kernel EM, not supported return; } @@ -117,7 +117,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize() == 0) { + if (pKernel->getSurfaceStateHeapSize() == 0) { // Kernel with no SSH, not supported return; } diff --git a/opencl/source/gtpin/gtpin_hw_helper.inl b/opencl/source/gtpin/gtpin_hw_helper.inl index df5728e064..bdfdf0b8fa 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.inl +++ b/opencl/source/gtpin/gtpin_hw_helper.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,7 +20,7 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; size_t sshSize = pKernel->getSurfaceStateHeapSize(); - if ((sshSize == 0) || pKernel->isParentKernel) { + if (sshSize == 0) { // Kernels which do not use SSH or use Execution Model are not supported (yet) return false; } diff --git a/opencl/source/helpers/dispatch_info.cpp b/opencl/source/helpers/dispatch_info.cpp index 2f5e053a7c..7dc22c293d 100644 --- a/opencl/source/helpers/dispatch_info.cpp +++ b/opencl/source/helpers/dispatch_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,10 +33,6 @@ Kernel *MultiDispatchInfo::peekMainKernel() const { return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel(); } -Kernel *MultiDispatchInfo::peekParentKernel() const { - return (mainKernel && mainKernel->isParentKernel) ? mainKernel : nullptr; -} - void MultiDispatchInfo::backupUnifiedMemorySyncRequirement() { for (const auto &dispatchInfo : dispatchInfos) { dispatchInfo.getKernel()->setUnifiedMemorySyncRequirement(true); diff --git a/opencl/source/helpers/dispatch_info.h b/opencl/source/helpers/dispatch_info.h index 24451f1fe1..0dd1be958e 100644 --- a/opencl/source/helpers/dispatch_info.h +++ b/opencl/source/helpers/dispatch_info.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -187,7 +187,6 @@ struct MultiDispatchInfo { redescribedSurfaces.push_back(memObj.release()); } - Kernel *peekParentKernel() const; Kernel *peekMainKernel() const; void setBuiltinOpParams(const BuiltinOpParams &builtinOpParams) { diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 9b8b4f0707..103a4a1ed2 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -140,20 +140,10 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate return completionStamp; } auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver(); - bool executionModelKernel = kernel->isParentKernel; - auto devQueue = commandQueue.getContext().getDefaultDeviceQueue(); auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation(); auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership(); - if (executionModelKernel) { - while (!devQueue->isEMCriticalSectionFree()) - ; - - devQueue->resetDeviceQueue(); - devQueue->acquireEMCriticalSection(); - } - IndirectHeap *dsh = kernelOperation->dsh.get(); IndirectHeap *ioh = kernelOperation->ioh.get(); IndirectHeap *ssh = kernelOperation->ssh.get(); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 72c9a65e8c..07af05afcb 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -68,8 +68,7 @@ class Surface; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) - : isParentKernel(kernelInfoArg.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue), - executionEnvironment(programArg->getExecutionEnvironment()), + : executionEnvironment(programArg->getExecutionEnvironment()), program(programArg), clDevice(clDeviceArg), kernelInfo(kernelInfoArg) { @@ -262,9 +261,6 @@ cl_int Kernel::initialize() { program->getContextPtr()->setResolvesRequiredInKernels(true); } - if (isParentKernel) { - program->allocateBlockPrivateSurfaces(*pClDevice); - } if (program->isKernelDebugEnabled() && isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful)) { debugEnabled = true; } @@ -1791,129 +1787,6 @@ void Kernel::unsetArg(uint32_t argIndex) { void Kernel::createReflectionSurface() { auto pClDevice = &clDevice; - if (this->isParentKernel && kernelReflectionSurface == nullptr) { - auto &hwInfo = pClDevice->getHardwareInfo(); - auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - BlockKernelManager *blockManager = program->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - ObjectCounts objectCount; - getParentObjectCounts(objectCount); - uint32_t parentImageCount = objectCount.imageCount; - uint32_t parentSamplerCount = objectCount.samplerCount; - size_t maxConstantBufferSize = 0; - - std::vector *curbeParamsForBlocks = new std::vector[blockCount]; - - uint64_t *tokenMask = new uint64_t[blockCount]; - uint32_t *sshTokenOffsetsFromKernelData = new uint32_t[blockCount]; - - size_t kernelReflectionSize = alignUp(sizeof(IGIL_KernelDataHeader) + blockCount * sizeof(IGIL_KernelAddressData), sizeof(void *)); - uint32_t kernelDataOffset = static_cast(kernelReflectionSize); - uint32_t parentSSHAlignedSize = alignUp(this->kernelInfo.heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); - uint32_t btOffset = parentSSHAlignedSize; - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - size_t samplerStateAndBorderColorSize = 0; - - uint32_t firstSSHTokenIndex = 0; - - ReflectionSurfaceHelper::getCurbeParams(curbeParamsForBlocks[i], tokenMask[i], firstSSHTokenIndex, *pBlockInfo, hwInfo); - - maxConstantBufferSize = std::max(maxConstantBufferSize, static_cast(pBlockInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize)); - - samplerStateAndBorderColorSize = pBlockInfo->getSamplerStateArraySize(hwInfo); - samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, Sampler::samplerStateArrayAlignment); - samplerStateAndBorderColorSize += pBlockInfo->getBorderColorStateSize(); - samplerStateAndBorderColorSize = alignUp(samplerStateAndBorderColorSize, sizeof(void *)); - - sshTokenOffsetsFromKernelData[i] = offsetof(IGIL_KernelData, m_data) + sizeof(IGIL_KernelCurbeParams) * firstSSHTokenIndex; - - kernelReflectionSize += alignUp(sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams) * curbeParamsForBlocks[i].size(), sizeof(void *)); - kernelReflectionSize += parentSamplerCount * sizeof(IGIL_SamplerParams) + samplerStateAndBorderColorSize; - } - - maxConstantBufferSize = alignUp(maxConstantBufferSize, sizeof(void *)); - kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *)); - kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters); - kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams); - kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( - {pClDevice->getRootDeviceIndex(), kernelReflectionSize, - GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, - pClDevice->getDeviceBitfield()}); - - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - uint32_t newKernelDataOffset = ReflectionSurfaceHelper::setKernelData(kernelReflectionSurface->getUnderlyingBuffer(), - kernelDataOffset, - curbeParamsForBlocks[i], - tokenMask[i], - maxConstantBufferSize, - parentSamplerCount, - *pBlockInfo, - hwInfo); - - uint32_t offset = static_cast(offsetof(IGIL_KernelDataHeader, m_data) + sizeof(IGIL_KernelAddressData) * i); - - uint32_t samplerHeapOffset = static_cast(alignUp(kernelDataOffset + sizeof(IGIL_KernelData) + curbeParamsForBlocks[i].size() * sizeof(IGIL_KernelCurbeParams), sizeof(void *))); - uint32_t samplerHeapSize = static_cast(alignUp(pBlockInfo->getSamplerStateArraySize(hwInfo), Sampler::samplerStateArrayAlignment) + pBlockInfo->getBorderColorStateSize()); - uint32_t constantBufferOffset = alignUp(samplerHeapOffset + samplerHeapSize, sizeof(void *)); - - uint32_t samplerParamsOffset = 0; - if (parentSamplerCount) { - samplerParamsOffset = newKernelDataOffset - sizeof(IGIL_SamplerParams) * parentSamplerCount; - IGIL_SamplerParams *pSamplerParams = (IGIL_SamplerParams *)ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerParamsOffset); - uint32_t sampler = 0; - const auto &args = pBlockInfo->kernelDescriptor.payloadMappings.explicitArgs; - for (uint32_t argID = 0; argID < args.size(); argID++) { - if (args[argID].is()) { - - pSamplerParams[sampler].m_ArgID = argID; - pSamplerParams[sampler].m_SamplerStateOffset = args[argID].as().bindful; - sampler++; - } - } - } - - ReflectionSurfaceHelper::setKernelAddressData(kernelReflectionSurface->getUnderlyingBuffer(), - offset, - kernelDataOffset, - samplerHeapOffset, - constantBufferOffset, - samplerParamsOffset, - sshTokenOffsetsFromKernelData[i] + kernelDataOffset, - btOffset, - *pBlockInfo, - hwInfo); - - if (samplerHeapSize > 0) { - void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), samplerHeapOffset); - const void *pSrc = ptrOffset(pBlockInfo->heapInfo.pDsh, pBlockInfo->getBorderColorOffset()); - memcpy_s(pDst, samplerHeapSize, pSrc, samplerHeapSize); - } - - void *pDst = ptrOffset(kernelReflectionSurface->getUnderlyingBuffer(), constantBufferOffset); - const char *pSrc = pBlockInfo->crossThreadData; - memcpy_s(pDst, pBlockInfo->getConstantBufferSize(), pSrc, pBlockInfo->getConstantBufferSize()); - - btOffset += pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset; - kernelDataOffset = newKernelDataOffset; - } - - uint32_t samplerOffset = 0; - if (parentSamplerCount) { - samplerOffset = kernelDataOffset + parentImageCount * sizeof(IGIL_ImageParamters); - } - ReflectionSurfaceHelper::setKernelDataHeader(kernelReflectionSurface->getUnderlyingBuffer(), blockCount, parentImageCount, parentSamplerCount, kernelDataOffset, samplerOffset); - delete[] curbeParamsForBlocks; - delete[] tokenMask; - delete[] sshTokenOffsetsFromKernelData; - - // Patch constant values once after reflection surface creation - patchBlocksCurbeWithConstantValues(); - } - if (DebugManager.flags.ForceDispatchScheduler.get()) { if (kernelReflectionSurface == nullptr) { kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( @@ -1927,7 +1800,6 @@ void Kernel::createReflectionSurface() { void Kernel::getParentObjectCounts(ObjectCounts &objectCount) { objectCount.imageCount = 0; objectCount.samplerCount = 0; - DEBUG_BREAK_IF(!isParentKernel); for (const auto &arg : this->kernelArguments) { if (arg.type == SAMPLER_OBJ) { @@ -1942,22 +1814,6 @@ bool Kernel::hasPrintfOutput() const { return kernelInfo.kernelDescriptor.kernelAttributes.flags.usesPrintf; } -size_t Kernel::getInstructionHeapSizeForExecutionModel() const { - BlockKernelManager *blockManager = program->getBlockKernelManager(); - uint32_t blockCount = static_cast(blockManager->getCount()); - - size_t totalSize = 0; - if (isParentKernel) { - totalSize = kernelBinaryAlignment - 1; // for initial alignment - for (uint32_t i = 0; i < blockCount; i++) { - const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); - totalSize += pBlockInfo->heapInfo.KernelHeapSize; - totalSize = alignUp(totalSize, kernelBinaryAlignment); - } - } - return totalSize; -} - void Kernel::patchBlocksCurbeWithConstantValues() { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); BlockKernelManager *blockManager = program->getBlockKernelManager(); @@ -2622,10 +2478,6 @@ void Kernel::setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset); } -bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { - return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf(); -} - uint64_t Kernel::getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 3f6c6417eb..1e14fcdd4f 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -236,8 +236,6 @@ class Kernel : public ReferenceTrackedObject { return kernelReflectionSurface; } - size_t getInstructionHeapSizeForExecutionModel() const; - // Helpers cl_int setArg(uint32_t argIndex, uint32_t argValue); cl_int setArg(uint32_t argIndex, uint64_t argValue); @@ -324,7 +322,6 @@ class Kernel : public ReferenceTrackedObject { uint32_t allBufferArgsStateful = CL_TRUE; bool isBuiltIn = false; - const bool isParentKernel; uint32_t getThreadArbitrationPolicy() const { return threadArbitrationPolicy; @@ -333,8 +330,6 @@ class Kernel : public ReferenceTrackedObject { return executionType; } - bool checkIfIsParentKernelAndBlocksUsesPrintf(); - bool is32Bit() const { return kernelInfo.kernelDescriptor.kernelAttributes.gpuPointerSize == 4; } diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index a91220a535..e074991560 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ PrintfHandler *PrintfHandler::create(const MultiDispatchInfo &multiDispatchInfo, } auto mainKernel = multiDispatchInfo.peekMainKernel(); if (mainKernel != nullptr) { - if (mainKernel->checkIfIsParentKernelAndBlocksUsesPrintf() || mainKernel->getImplicitArgs()) { + if (mainKernel->getImplicitArgs()) { return new PrintfHandler(device); } } diff --git a/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt b/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt index 396aaaeafc..f386ec1cdb 100644 --- a/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt +++ b/opencl/test/unit_test/aub_tests/fixtures/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -8,7 +8,6 @@ target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_fixture.h - ${CMAKE_CURRENT_SOURCE_DIR}/aub_parent_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/run_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/simple_arg_fixture.h diff --git a/opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h b/opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h deleted file mode 100644 index 7fdd9d7dc6..0000000000 --- a/opencl/test/unit_test/aub_tests/fixtures/aub_parent_kernel_fixture.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2018-2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -#include "opencl/test/unit_test/aub_tests/command_queue/command_enqueue_fixture.h" -#include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" -#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" - -namespace NEO { -static const char programFile[] = "simple_block_kernel"; -static const char kernelName[] = "kernel_reflection"; - -class AUBParentKernelFixture : public CommandEnqueueAUBFixture, - public HelloWorldKernelFixture, - public testing::Test { - public: - using HelloWorldKernelFixture::SetUp; - - void SetUp() override { - REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); - CommandEnqueueAUBFixture::SetUp(); - ASSERT_NE(nullptr, pClDevice); - HelloWorldKernelFixture::SetUp(pClDevice, programFile, kernelName, "-cl-std=CL2.0"); - } - void TearDown() override { - if (IsSkipped()) { - return; - } - HelloWorldKernelFixture::TearDown(); - CommandEnqueueAUBFixture::TearDown(); - } -}; -} // namespace NEO diff --git a/opencl/test/unit_test/fixtures/CMakeLists.txt b/opencl/test/unit_test/fixtures/CMakeLists.txt index 960782106f..675ea05b7d 100644 --- a/opencl/test/unit_test/fixtures/CMakeLists.txt +++ b/opencl/test/unit_test/fixtures/CMakeLists.txt @@ -19,7 +19,6 @@ set(IGDRCL_SRCS_tests_fixtures ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device_instrumentation_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_handler_fixture.h - ${CMAKE_CURRENT_SOURCE_DIR}/execution_model_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/hello_world_kernel_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/image_fixture.cpp diff --git a/opencl/test/unit_test/fixtures/execution_model_fixture.h b/opencl/test/unit_test/fixtures/execution_model_fixture.h deleted file mode 100644 index 92751ad475..0000000000 --- a/opencl/test/unit_test/fixtures/execution_model_fixture.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2018-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -#include "shared/test/common/helpers/debug_manager_state_restore.h" - -#include "opencl/source/device_queue/device_queue.h" -#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" -#include "opencl/test/unit_test/mocks/mock_cl_device.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" - -struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture, - testing::Test { - - void SetUp() override { - device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)}; - CommandQueueHwFixture::SetUp(device, 0); - } - void TearDown() override { - CommandQueueHwFixture::TearDown(); - delete device; - } - - std::unique_ptr createBlockedCommandsData(CommandQueue &commandQueue) { - auto commandStream = new LinearStream(); - - auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver(); - gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1); - - return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); - } - const uint32_t rootDeviceIndex = 0u; -}; diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 8662b1b322..908abe1cf0 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -2033,23 +2033,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenLowMemoryConditionOccursThe injectFailures(allocBufferFunc); } -TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedAndFalseIsReturned) { - GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; - GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - std::unique_ptr parentKernel(MockParentKernel::create(*pContext)); - - parentKernel->sshLocalSize = 64; - parentKernel->pSshLocal.reset(new char[64]); - - size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(); - - bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get()); - EXPECT_FALSE(surfaceAdded); - - size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(); - EXPECT_EQ(sizeSurfaceStates2, sizeSurfaceStates1); -} - TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index 7482eeee95..60ba84ddcf 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -266,46 +266,32 @@ TEST_F(DispatchInfoTest, WhenSettingValuesInDispatchInfoThenThoseValuesAreSet) { EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups()); } -TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParentAndMainKernel) { - std::unique_ptr parentKernel(MockParentKernel::create(*pContext)); +TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryMainKernel) { std::unique_ptr baseKernel(MockKernel::create(*pDevice, pProgram)); std::unique_ptr builtInKernel(MockKernel::create(*pDevice, pProgram)); builtInKernel->isBuiltIn = true; - DispatchInfo parentKernelDispatchInfo(pClDevice, parentKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo baseDispatchInfo(pClDevice, baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); DispatchInfo builtInDispatchInfo(pClDevice, builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); - { - MultiDispatchInfo multiDispatchInfo(parentKernel.get()); - multiDispatchInfo.push(parentKernelDispatchInfo); - EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekParentKernel()); - EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekMainKernel()); - } - { MultiDispatchInfo multiDispatchInfo(baseKernel.get()); multiDispatchInfo.push(builtInDispatchInfo); - EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick builtin kernel multiDispatchInfo.push(baseDispatchInfo); - EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; - EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel()); multiDispatchInfo.push(builtInDispatchInfo); - EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel()); EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel()); } { MultiDispatchInfo multiDispatchInfo; - multiDispatchInfo.push(parentKernelDispatchInfo); multiDispatchInfo.push(baseDispatchInfo); multiDispatchInfo.push(builtInDispatchInfo); diff --git a/opencl/test/unit_test/kernel/CMakeLists.txt b/opencl/test/unit_test/kernel/CMakeLists.txt index f2782d1012..54c1066157 100644 --- a/opencl/test/unit_test/kernel/CMakeLists.txt +++ b/opencl/test/unit_test/kernel/CMakeLists.txt @@ -27,7 +27,6 @@ set(IGDRCL_SRCS_tests_kernel ${CMAKE_CURRENT_SOURCE_DIR}/kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_transformable_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/debug_kernel_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/parent_kernel_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/substitute_kernel_heap_tests.cpp ) diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 99fd368685..5d41c834a8 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -13,7 +13,6 @@ #include "opencl/source/kernel/kernel.h" #include "opencl/source/program/printf_handler.h" #include "opencl/source/sampler/sampler.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/platform_fixture.h" @@ -32,21 +31,6 @@ using namespace NEO; -TEST(KernelReflectionSurfaceTestSingle, GivenNonParentKernelWhenCreatingKernelReflectionSurfaceThenKernelReflectionSurfaceIsNotCreated) { - MockClDevice device{new MockDevice}; - MockProgram program(toClDeviceVector(device)); - KernelInfo info; - MockKernel kernel(&program, info, device); - - EXPECT_FALSE(kernel.isParentKernel); - - kernel.createReflectionSurface(); - - auto reflectionSurface = kernel.getKernelReflectionSurface(); - - EXPECT_EQ(nullptr, reflectionSurface); -} - class ReflectionSurfaceHelperTest : public testing::TestWithParam> { protected: @@ -678,168 +662,3 @@ TEST_F(ReflectionSurfaceHelperFixture, GivenUndefinedOffsetsWhenPatchingBlocksCu EXPECT_THAT(patchedValues, MemCompare(reference.get(), 10 * sizeof(IGIL_KernelDataHeader) - constBufferOffset)); } - -class ReflectionSurfaceConstantValuesPatchingTest : public ClDeviceFixture, - public ::testing::Test { - public: - void SetUp() override { - ClDeviceFixture::SetUp(); - } - void TearDown() override { - ClDeviceFixture::TearDown(); - } -}; - -TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramGlobalMemoryAddressIsPatched) { - - MockContext context(pClDevice); - MockParentKernel::CreateParams createParams{}; - createParams.addChildGlobalMemory = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - - // graphicsMemory is released by Program - GraphicsAllocation *globalMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - - parentKernel->mockProgram->setGlobalSurface(globalMemory); - - // Allocte reflectionSurface, 2 * 4096 should be enough - GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); - parentKernel->setReflectionSurface(reflectionSurface); - - memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); - - const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - pKernelHeader->m_numberOfKernels = 1; - pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; - - parentKernel->patchBlocksCurbeWithConstantValues(); - - auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - - uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless; - - uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); - - EXPECT_EQ(globalMemory->getGpuAddressToPatch(), *pCurbe); -} - -TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithGlobalMemoryAndProgramWithoutGlobalMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { - - MockContext context(pClDevice); - MockParentKernel::CreateParams createParams{}; - createParams.addChildGlobalMemory = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - - if (parentKernel->mockProgram->getGlobalSurface(pClDevice->getRootDeviceIndex())) { - pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getGlobalSurface(pClDevice->getRootDeviceIndex())); - parentKernel->mockProgram->setGlobalSurface(nullptr); - } - - // Allocte reflectionSurface, 2 * 4096 should be enough - GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); - parentKernel->setReflectionSurface(reflectionSurface); - - memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); - - const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - pKernelHeader->m_numberOfKernels = 1; - pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; - - parentKernel->patchBlocksCurbeWithConstantValues(); - - auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - - uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless; - uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); - - EXPECT_EQ(0u, *pCurbe); -} - -TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryWhenReflectionSurfaceIsPatchedWithConstantValuesThenProgramConstantMemoryAddressIsPatched) { - - MockContext context(pClDevice); - MockParentKernel::CreateParams createParams{}; - createParams.addChildConstantMemory = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - - // graphicsMemory is released by Program - GraphicsAllocation *constantMemory = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}); - - parentKernel->mockProgram->setConstantSurface(constantMemory); - - // Allocte reflectionSurface, 2 * 4096 should be enough - GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); - parentKernel->setReflectionSurface(reflectionSurface); - - memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); - - const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - pKernelHeader->m_numberOfKernels = 1; - pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; - - parentKernel->patchBlocksCurbeWithConstantValues(); - - auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - - uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless; - - uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); - uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); - - EXPECT_EQ(constantMemory->getGpuAddressToPatch(), *pCurbeToPatch); - - std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); - memset(zeroMemory.get(), 0, 4096); - // memory before is not written - EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); - - //memory after is not written - EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); -} - -TEST_F(ReflectionSurfaceConstantValuesPatchingTest, GivenBlockWithConstantMemoryAndProgramWithoutConstantMemortWhenReflectionSurfaceIsPatchedWithConstantValuesThenZeroAddressIsPatched) { - - MockContext context(pClDevice); - MockParentKernel::CreateParams createParams{}; - createParams.addChildConstantMemory = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - - if (parentKernel->mockProgram->getConstantSurface(pClDevice->getRootDeviceIndex())) { - pDevice->getMemoryManager()->freeGraphicsMemory(parentKernel->mockProgram->getConstantSurface(pClDevice->getRootDeviceIndex())); - parentKernel->mockProgram->setConstantSurface(nullptr); - } - - // Allocte reflectionSurface, 2 * 4096 should be enough - GraphicsAllocation *reflectionSurface = pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), 2 * MemoryConstants::pageSize}); - parentKernel->setReflectionSurface(reflectionSurface); - - memset(reflectionSurface->getUnderlyingBuffer(), 0, reflectionSurface->getUnderlyingBufferSize()); - - const uint32_t constBufferOffset = (uint32_t)alignUp(sizeof(IGIL_KernelDataHeader) + sizeof(IGIL_KernelAddressData) + sizeof(IGIL_KernelData) + sizeof(IGIL_KernelCurbeParams), sizeof(uint64_t)); - IGIL_KernelDataHeader *pKernelHeader = reinterpret_cast(reflectionSurface->getUnderlyingBuffer()); - pKernelHeader->m_numberOfKernels = 1; - pKernelHeader->m_data[0].m_ConstantBufferOffset = constBufferOffset; - - parentKernel->patchBlocksCurbeWithConstantValues(); - - auto *blockInfo = parentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0); - - uint32_t blockPatchOffset = blockInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless; - - uint64_t *pCurbe = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset); - uint64_t *pCurbeToPatch = (uint64_t *)ptrOffset(reflectionSurface->getUnderlyingBuffer(), constBufferOffset + blockPatchOffset); - - EXPECT_EQ(0u, *pCurbeToPatch); - - std::unique_ptr zeroMemory = std::unique_ptr(new char[4096]); - memset(zeroMemory.get(), 0, 4096); - - // memory before is not written - EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbe, std::min(4096u, blockPatchOffset))); - - //memory after is not written - EXPECT_THAT(zeroMemory.get(), MemCompare(pCurbeToPatch + 1, std::min(4096u, 8192u - constBufferOffset - blockPatchOffset - (uint32_t)sizeof(uint64_t)))); -} diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 433b3bd138..35672ad4c1 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -34,7 +34,6 @@ #include "opencl/source/mem_obj/image.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/fixtures/device_host_queue_fixture.h" -#include "opencl/test/unit_test/fixtures/execution_model_fixture.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/helpers/gtest_helpers.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" @@ -2182,13 +2181,6 @@ TEST(KernelInfoTest, givenHwHelperWhenCreatingKernelAllocationThenCorrectPadding clDevice->getMemoryManager()->freeGraphicsMemory(mockKernel->kernelInfo.getGraphicsAllocation()); } -TEST(KernelTest, givenNormalKernelWhenGettingInstructionHeapSizeForExecutionModelThenZeroIsReturned) { - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - MockKernelWithInternals kernel(*device); - - EXPECT_EQ(0u, kernel.mockKernel->getInstructionHeapSizeForExecutionModel()); -} - TEST(KernelTest, WhenSettingKernelArgThenBuiltinDispatchInfoBuilderIsUsed) { struct MockBuiltinDispatchBuilder : BuiltinDispatchInfoBuilder { using BuiltinDispatchInfoBuilder::BuiltinDispatchInfoBuilder; diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp deleted file mode 100644 index d50eeeb65b..0000000000 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (C) 2018-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/test/common/mocks/mock_device.h" -#include "shared/test/common/test_macros/test.h" - -#include "opencl/test/unit_test/mocks/mock_cl_device.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_program.h" - -#include - -using namespace NEO; - -class MockKernelWithArgumentAccess : public Kernel { - public: - std::vector &getKernelArguments() { - return kernelArguments; - } - - class ObjectCountsPublic : public Kernel::ObjectCounts { - }; - - MockKernelWithArgumentAccess(Program *programArg, KernelInfo &kernelInfoArg, ClDevice &clDeviceArg) : Kernel(programArg, kernelInfoArg, clDeviceArg) { - } - - void getParentObjectCountsPublic(MockKernelWithArgumentAccess::ObjectCountsPublic &objectCount) { - getParentObjectCounts(objectCount); - } -}; - -TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) { - MockClDevice *device = new MockClDevice{new MockDevice}; - MockProgram program(toClDeviceVector(*device)); - KernelInfo info; - info.kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - - MockKernelWithArgumentAccess kernel(&program, info, *device); - - std::vector &args = kernel.getKernelArguments(); - - Kernel::SimpleKernelArgInfo argInfo; - argInfo.type = Kernel::kernelArgType::SAMPLER_OBJ; - args.push_back(argInfo); - argInfo.type = Kernel::kernelArgType::IMAGE_OBJ; - args.push_back(argInfo); - - MockKernelWithArgumentAccess::ObjectCountsPublic objectCounts; - - kernel.getParentObjectCountsPublic(objectCounts); - - EXPECT_EQ(1u, objectCounts.imageCount); - EXPECT_EQ(1u, objectCounts.samplerCount); - - delete device; -} - -TEST(ParentKernelTest, WhenPatchingBlocksSimdSizeThenPatchIsAppliedCorrectly) { - MockClDevice device{new MockDevice}; - MockContext context(&device); - MockParentKernel::CreateParams createParams{}; - createParams.addChildSimdSize = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - MockProgram *program = (MockProgram *)parentKernel->mockProgram; - - parentKernel->patchBlocksSimdSize(); - - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); - uint32_t *simdSize = reinterpret_cast(blockSimdSize); - - EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); -} - -TEST(ParentKernelTest, GivenParentKernelWhenCheckingForDeviceEnqueueThenTrueIsReturned) { - MockClDevice device{new MockDevice}; - MockContext context(&device); - std::unique_ptr parentKernel(MockParentKernel::create(context)); - - EXPECT_TRUE(parentKernel->getKernelInfo().hasDeviceEnqueue()); -} - -TEST(ParentKernelTest, GivenNormalKernelWhenCheckingForDeviceEnqueueThenFalseIsReturned) { - MockClDevice device{new MockDevice}; - MockKernelWithInternals kernel(device); - - EXPECT_FALSE(kernel.kernelInfo.hasDeviceEnqueue()); -} - -TEST(ParentKernelTest, WhenInitializingParentKernelThenBlocksSimdSizeIsPatched) { - MockClDevice device{new MockDevice}; - MockContext context(&device); - MockParentKernel::CreateParams createParams{}; - createParams.addChildSimdSize = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - MockProgram *program = (MockProgram *)parentKernel->mockProgram; - - parentKernel->initialize(); - - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); - uint32_t *simdSize = reinterpret_cast(blockSimdSize); - - EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); -} - -TEST(ParentKernelTest, WhenInitializingParentKernelThenPrivateMemoryForBlocksIsAllocated) { - MockClDevice device{new MockDevice}; - MockContext context(&device); - MockParentKernel::CreateParams createParams{}; - createParams.addChildSimdSize = true; - std::unique_ptr parentKernel(MockParentKernel::create(context, createParams)); - MockProgram *program = (MockProgram *)parentKernel->mockProgram; - - auto infoBlock = new MockKernelInfo(); - infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - - uint32_t crossThreadOffsetBlock = 0; - infoBlock->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - - infoBlock->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - - infoBlock->setPrivateMemory(1000, false, 8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - - infoBlock->setLocalIds({0, 0, 0}); - - infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - infoBlock->setDeviceSideEnqueueBlockInterfaceDescriptorOffset(0); - - infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; - infoBlock->heapInfo.DynamicStateHeapSize = 64 * sizeof(uint64_t); - - infoBlock->setCrossThreadDataSize(crossThreadOffsetBlock); - infoBlock->crossThreadData = new char[crossThreadOffsetBlock]; - - program->blockKernelManager->addBlockKernelInfo(infoBlock); - - parentKernel->initialize(); - - EXPECT_NE(nullptr, program->getBlockKernelManager()->getPrivateSurface(program->getBlockKernelManager()->getCount() - 1)); -} \ No newline at end of file diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 95241f6b6f..ad7d219dda 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -409,144 +409,6 @@ class MockKernelWithInternals { char dshLocal[128]; std::vector defaultKernelArguments; }; - -class MockParentKernel : public Kernel { - public: - struct CreateParams { - bool addChildSimdSize = false; - bool addChildGlobalMemory = false; - bool addChildConstantMemory = false; - bool addPrintfForParent = false; - bool addPrintfForBlock = false; - }; - using Kernel::auxTranslationRequired; - using Kernel::kernelInfo; - using Kernel::patchBlocksCurbeWithConstantValues; - using Kernel::pImplicitArgs; - using Kernel::pSshLocal; - using Kernel::sshLocalSize; - static MockParentKernel *create(Context &context) { - CreateParams createParams{}; - return create(context, createParams); - } - static MockParentKernel *create(Context &context, const CreateParams &createParams) { - auto clDevice = context.getDevice(0); - - auto info = new MockKernelInfo(); - const size_t crossThreadSize = 160; - uint32_t crossThreadOffset = 0; - uint32_t crossThreadOffsetBlock = 0; - - info->setLocalIds({0, 0, 0}); - - info->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - info->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - info->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; - info->kernelDescriptor.kernelAttributes.simdSize = 32; - - info->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffset); - crossThreadOffset += 8; - - info->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffset); - crossThreadOffset += 8; - - if (createParams.addPrintfForParent) { - info->setPrintfSurface(8, crossThreadOffset); - crossThreadOffset += 8; - } - - ClDeviceVector deviceVector; - deviceVector.push_back(clDevice); - MockProgram *mockProgram = new MockProgram(&context, false, deviceVector); - - if (createParams.addChildSimdSize) { - info->childrenKernelsIdOffset.push_back({0, crossThreadOffset}); - } - - UNRECOVERABLE_IF(crossThreadSize < crossThreadOffset + 8); - info->crossThreadData = new char[crossThreadSize]; - - auto parent = new MockParentKernel(mockProgram, *info); - parent->crossThreadData = new char[crossThreadSize]; - memset(parent->crossThreadData, 0, crossThreadSize); - parent->crossThreadDataSize = crossThreadSize; - parent->mockKernelInfo = info; - - auto infoBlock = new MockKernelInfo(); - - infoBlock->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - - infoBlock->setDeviceSideEnqueueDefaultQueueSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - - infoBlock->setDeviceSideEnqueueEventPoolSurface(8, crossThreadOffset); - crossThreadOffsetBlock += 8; - - if (createParams.addPrintfForBlock) { - infoBlock->setPrintfSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - } - - if (createParams.addChildGlobalMemory) { - infoBlock->setGlobalVariablesSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - } - - if (createParams.addChildConstantMemory) { - infoBlock->setGlobalConstantsSurface(8, crossThreadOffsetBlock); - crossThreadOffsetBlock += 8; - } - - infoBlock->setLocalIds({0, 0, 0}); - - infoBlock->kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue = true; - infoBlock->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; - infoBlock->kernelDescriptor.kernelAttributes.simdSize = 32; - - infoBlock->setDeviceSideEnqueueBlockInterfaceDescriptorOffset(0); - - infoBlock->heapInfo.pDsh = (void *)new uint64_t[64]; - infoBlock->heapInfo.DynamicStateHeapSize = 64 * sizeof(uint64_t); - - size_t crossThreadDataSize = crossThreadOffsetBlock > crossThreadSize ? crossThreadOffsetBlock : crossThreadSize; - infoBlock->crossThreadData = new char[crossThreadDataSize]; - infoBlock->setCrossThreadDataSize(static_cast(crossThreadDataSize)); - - mockProgram->blockKernelManager->addBlockKernelInfo(infoBlock); - parent->mockProgram = mockProgram; - - return parent; - } - - MockParentKernel(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, *programArg->getDevices()[0]) { - } - - ~MockParentKernel() override { - delete &kernelInfo; - BlockKernelManager *blockManager = program->getBlockKernelManager(); - - for (uint32_t i = 0; i < blockManager->getCount(); i++) { - const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); - delete[](uint64_t *) blockInfo->heapInfo.pDsh; - } - - if (mockProgram) { - mockProgram->decRefInternal(); - } - } - - Context *getContext() { - return &mockProgram->getContext(); - } - - void setReflectionSurface(GraphicsAllocation *reflectionSurface) { - kernelReflectionSurface = reflectionSurface; - } - - MockProgram *mockProgram; - KernelInfo *mockKernelInfo = nullptr; -}; - class MockDebugKernel : public MockKernel { public: MockDebugKernel(Program *program, const KernelInfo &kernelInfo, ClDevice &clDeviceArg) : MockKernel(program, kernelInfo, clDeviceArg) { diff --git a/opencl/test/unit_test/program/printf_handler_tests.cpp b/opencl/test/unit_test/program/printf_handler_tests.cpp index 07b2424aea..8e376ba95e 100644 --- a/opencl/test/unit_test/program/printf_handler_tests.cpp +++ b/opencl/test/unit_test/program/printf_handler_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -256,73 +256,6 @@ HWTEST_F(PrintfHandlerTests, givenPrintfHandlerWhenEnqueueIsBlockedThenDontUsePr EXPECT_FALSE(cmdQ.isQueueBlocked()); } -TEST_F(PrintfHandlerTests, givenParentKernelWithoutPrintfAndBlockKernelWithPrintfWhenPrintfHandlerCreateCalledThenResultIsAnObject) { - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); - MockContext context(device.get()); - MockParentKernel::CreateParams createParams{}; - createParams.addPrintfForParent = false; - createParams.addPrintfForBlock = true; - std::unique_ptr parentKernelWithoutPrintf(MockParentKernel::create(context, createParams)); - - MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithoutPrintf.get()); - - std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); - - ASSERT_NE(nullptr, printfHandler.get()); -} - -TEST_F(PrintfHandlerTests, givenKernelWithImplicitArgsButWithoutPrintfWhenPrintfHandlerCreateCalledThenResultIsAnObject) { - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); - MockContext context(device.get()); - MockParentKernel::CreateParams createParams{}; - createParams.addPrintfForParent = false; - createParams.addPrintfForBlock = false; - std::unique_ptr parentKernelWithoutPrintf(MockParentKernel::create(context, createParams)); - - parentKernelWithoutPrintf->pImplicitArgs = std::make_unique(); - - *parentKernelWithoutPrintf->pImplicitArgs = {}; - - MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithoutPrintf.get()); - - std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); - - ASSERT_NE(nullptr, printfHandler.get()); -} - -TEST_F(PrintfHandlerTests, givenParentKernelAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsNullptr) { - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); - MockContext context(device.get()); - MockParentKernel::CreateParams createParams{}; - createParams.addPrintfForBlock = false; - createParams.addPrintfForParent = false; - std::unique_ptr blockKernelWithoutPrintf(MockParentKernel::create(context, createParams)); - - MockMultiDispatchInfo multiDispatchInfo(device.get(), blockKernelWithoutPrintf.get()); - - std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); - - ASSERT_EQ(nullptr, printfHandler.get()); -} -TEST_F(PrintfHandlerTests, givenParentKernelWithPrintfAndBlockKernelWithoutPrintfWhenPrintfHandlerCreateCalledThenResaultIsAnObject) { - - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); - MockContext context(device.get()); - MockParentKernel::CreateParams createParams{}; - createParams.addPrintfForBlock = false; - createParams.addPrintfForParent = true; - std::unique_ptr parentKernelWithPrintfBlockKernelWithoutPrintf(MockParentKernel::create(context, createParams)); - - MockMultiDispatchInfo multiDispatchInfo(device.get(), parentKernelWithPrintfBlockKernelWithoutPrintf.get()); - - std::unique_ptr printfHandler(PrintfHandler::create(multiDispatchInfo, *device)); - - ASSERT_NE(nullptr, printfHandler); -} - TEST_F(PrintfHandlerTests, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAndDispatchingPrintfHandlerThenPickMainKernel) { MockContext context; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr));