From 7ec69c33f9ee797699f5f4882b690cce400eee9a Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Mon, 23 Nov 2020 18:01:38 +0000 Subject: [PATCH] Store SSH per root device in Kernel Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski --- opencl/source/command_queue/command_queue.cpp | 2 +- opencl/source/command_queue/enqueue_common.h | 5 +- opencl/source/command_queue/gpgpu_walker.h | 2 +- .../command_queue/hardware_interface_base.inl | 3 +- opencl/source/gtpin/gtpin_callbacks.cpp | 12 ++-- opencl/source/gtpin/gtpin_hw_helper.h | 8 +-- opencl/source/gtpin/gtpin_hw_helper.inl | 16 +++--- .../source/helpers/hardware_commands_helper.h | 4 +- .../helpers/hardware_commands_helper_base.inl | 12 ++-- opencl/source/kernel/kernel.cpp | 55 ++++++++++--------- opencl/source/kernel/kernel.h | 14 ++--- opencl/source/program/printf_handler.cpp | 2 +- .../accelerators/media_image_arg_tests.cpp | 4 +- ...cl_mem_locally_uncached_resource_tests.cpp | 3 +- .../unit_test/built_ins/built_in_tests.cpp | 16 +++--- .../command_queue/command_queue_tests.cpp | 8 +-- .../command_queue/dispatch_walker_tests.cpp | 2 +- .../command_queue/enqueue_handler_tests.cpp | 2 +- .../get_size_required_buffer_tests.cpp | 6 +- .../get_size_required_image_tests.cpp | 12 ++-- .../sync_buffer_handler_tests.cpp | 2 +- .../device_queue/device_queue_hw_tests.cpp | 8 +-- .../enqueue_execution_model_kernel_tests.cpp | 4 +- .../parent_kernel_dispatch_tests.cpp | 4 +- .../scheduler_dispatch_tests.cpp | 6 +- .../submit_blocked_parent_kernel_tests.cpp | 14 ++--- .../fixtures/execution_model_fixture.h | 3 +- .../gen8/scheduler_dispatch_tests_gen8.cpp | 2 +- opencl/test/unit_test/gtpin/gtpin_tests.cpp | 40 +++++++------- .../hardware_commands_helper_tests.cpp | 8 +-- .../helpers/hardware_commands_helper_tests.h | 2 +- .../kernel/kernel_arg_buffer_tests.cpp | 6 +- .../kernel/kernel_arg_pipe_tests.cpp | 6 +- .../unit_test/kernel/kernel_arg_svm_tests.cpp | 30 +++++----- opencl/test/unit_test/kernel/kernel_tests.cpp | 44 +++++++-------- .../kernel/kernel_transformable_tests.cpp | 18 +++--- .../mem_obj/buffer_set_arg_tests.cpp | 6 +- .../unit_test/mem_obj/image_set_arg_tests.cpp | 38 ++++++------- .../memory_manager/memory_manager_tests.cpp | 6 +- opencl/test/unit_test/mocks/mock_kernel.h | 16 +++--- 40 files changed, 231 insertions(+), 220 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index e61c0e633f..a30e30b157 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -530,7 +530,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) { DEBUG_BREAK_IF(!kernel->requiresSshForBuffers()); - auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(device->getRootDeviceIndex())), kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface->Offset); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 221708dcd2..f5e9a6ae42 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -330,7 +330,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (blockQueue) { if (parentKernel) { - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, device->getRootDeviceIndex()); blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM; } @@ -534,7 +534,8 @@ void CommandQueueHw::processDeviceEnqueue(DeviceQueueHw *d TagNode *hwTimeStamps, bool &blocking) { auto parentKernel = multiDispatchInfo.peekParentKernel(); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + auto rootDeviceIndex = devQueueHw->getDevice().getRootDeviceIndex(); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType()); uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1; diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 32fb391d2f..d0817f6713 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -200,7 +200,7 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) { if (heapType == IndirectHeap::SURFACE_STATE) { - expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + expectedSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, commandQueue.getDevice().getRootDeviceIndex()); } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT) { DeviceQueueHw *pDevQueue = castToObject>(commandQueue.getContext().getDefaultDeviceQueue()); diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 55c0f3350d..d21218a80d 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -248,6 +248,7 @@ template void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) { auto parentKernel = multiDispatchInfo.peekParentKernel(); + auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); if (blockedQueue) { size_t dshSize = 0; @@ -257,7 +258,7 @@ void HardwareInterface::obtainIndirectHeaps(CommandQueue &commandQueu if (parentKernel) { dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(); - sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + sshSize += HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); iohEqualsDsh = true; colorCalcSize = static_cast(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize); } else { diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 533c5c87d7..9a185bc9a8 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -66,9 +66,10 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { size_t gtpinBTI = pKernel->getNumberOfBindingTableStates(); // Enlarge local copy of SSH by 1 SS auto device = pKernel->getDevices()[0]; + auto rootDeviceIndex = device->getRootDeviceIndex(); GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel)) { + if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) { // Kernel with no SSH or Kernel EM, not supported return; } @@ -103,8 +104,10 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { if (isGTPinInitialized) { + auto pCmdQ = reinterpret_cast(pCmdQueue); + auto &device = pCmdQ->getDevice(); auto pKernel = castToObjectOrAbort(kernel); - if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize() == 0) { + if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(device.getRootDeviceIndex()) == 0) { // Kernel with no SSH, not supported return; } @@ -132,14 +135,13 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { if (!resource) { return; } - auto &device = *pKernel->getDevices()[0]; GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1; - void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); + void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, device.getRootDeviceIndex()); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); - pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device.getDevice()); + pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device); } } diff --git a/opencl/source/gtpin/gtpin_hw_helper.h b/opencl/source/gtpin/gtpin_hw_helper.h index 7ff896be0f..bcf49ed333 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.h +++ b/opencl/source/gtpin/gtpin_hw_helper.h @@ -15,8 +15,8 @@ class GTPinHwHelper { public: static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore); virtual uint32_t getGenVersion() = 0; - virtual bool addSurfaceState(Kernel *pKernel) = 0; - virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0; + virtual bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) = 0; + virtual void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) = 0; protected: GTPinHwHelper(){}; @@ -30,8 +30,8 @@ class GTPinHwHelperHw : public GTPinHwHelper { return gtpinHwHelper; } uint32_t getGenVersion() override; - bool addSurfaceState(Kernel *pKernel) override; - void *getSurfaceState(Kernel *pKernel, size_t bti) override; + bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) override; + void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) override; private: GTPinHwHelperHw(){}; diff --git a/opencl/source/gtpin/gtpin_hw_helper.inl b/opencl/source/gtpin/gtpin_hw_helper.inl index a74719ef8a..e481d22294 100644 --- a/opencl/source/gtpin/gtpin_hw_helper.inl +++ b/opencl/source/gtpin/gtpin_hw_helper.inl @@ -15,11 +15,11 @@ namespace NEO { template -bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { +bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - size_t sshSize = pKernel->getSurfaceStateHeapSize(); + size_t sshSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); if ((sshSize == 0) || pKernel->isParentKernel) { // Kernels which do not use SSH or use Execution Model are not supported (yet) return false; @@ -29,7 +29,7 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { size_t sizeToEnlarge = ssSize + btsSize; size_t currBTOffset = pKernel->getBindingTableOffset(); size_t currSurfaceStateSize = currBTOffset; - char *pSsh = static_cast(pKernel->getSurfaceStateHeap()); + char *pSsh = static_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); char *pNewSsh = new char[sshSize + sizeToEnlarge]; memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize); RENDER_SURFACE_STATE *pSS = reinterpret_cast(pNewSsh + currSurfaceStateSize); @@ -40,19 +40,19 @@ bool GTPinHwHelperHw::addSurfaceState(Kernel *pKernel) { BINDING_TABLE_STATE *pNewBTS = reinterpret_cast(pNewSsh + newSurfaceStateSize + currBTCount * btsSize); *pNewBTS = GfxFamily::cmdInitBindingTableState; pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset); - pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); + pKernel->resizeSurfaceStateHeap(rootDeviceIndex, pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); return true; } template -void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti) { +void *GTPinHwHelperHw::getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) { using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; - if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) { + if ((nullptr == pKernel->getSurfaceStateHeap(rootDeviceIndex)) || (bti >= pKernel->getNumberOfBindingTableStates())) { return nullptr; } - auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); - auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer()); + auto *pBts = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE)))); + auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pBts->getSurfaceStatePointer()); return pSurfaceState; } diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index 07c1339c1d..31d52c68c8 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -126,7 +126,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { const Kernel &kernel, size_t localWorkSize = 256); static size_t getSizeRequiredSSH( - const Kernel &kernel); + const Kernel &kernel, uint32_t rootDeviceIndex); static size_t getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo); @@ -135,7 +135,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static size_t getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo); - static size_t getSshSizeForExecutionModel(const Kernel &kernel); + static size_t getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex); static void setInterfaceDescriptorOffset( WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 4038a91657..be7aa425cd 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -75,9 +75,9 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH( template size_t HardwareCommandsHelper::getSizeRequiredSSH( - const Kernel &kernel) { + const Kernel &kernel, uint32_t rootDeviceIndex) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; - auto sizeSSH = kernel.getSurfaceStateHeapSize(); + auto sizeSSH = kernel.getSurfaceStateHeapSize(rootDeviceIndex); sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; return sizeSSH; } @@ -112,11 +112,11 @@ size_t HardwareCommandsHelper::getTotalSizeRequiredIOH( template size_t HardwareCommandsHelper::getTotalSizeRequiredSSH( const MultiDispatchInfo &multiDispatchInfo) { - return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); }); + return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel(), dispatchInfo.getClDevice().getRootDeviceIndex()); }); } template -size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kernel &kernel) { +size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex) { typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE; size_t totalSize = 0; @@ -136,7 +136,7 @@ size_t HardwareCommandsHelper::getSshSizeForExecutionModel(const Kern SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel(); - totalSize += getSizeRequiredSSH(scheduler); + totalSize += getSizeRequiredSSH(scheduler, rootDeviceIndex); totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); @@ -237,7 +237,7 @@ size_t HardwareCommandsHelper::sendIndirectState( kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed()); auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0, - kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), + kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex), kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); // Copy our sampler state if it exists diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 3e7fef9029..130262cb37 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -140,7 +140,7 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic uint32_t sshOffset = patch.SurfaceStateHeapOffset; auto rootDeviceIndex = allocation.getRootDeviceIndex(); void *crossThreadData = getCrossThreadData(rootDeviceIndex); - void *ssh = getSurfaceStateHeap(); + void *ssh = getSurfaceStateHeap(rootDeviceIndex); if (crossThreadData != nullptr) { auto pp = ptrOffset(crossThreadData, crossThreadDataOffset); uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); @@ -235,13 +235,14 @@ cl_int Kernel::initialize() { } // allocate our own SSH, if necessary - sshLocalSize = heapInfo.SurfaceStateHeapSize; + kernelDeviceInfos[rootDeviceIndex].sshLocalSize = heapInfo.SurfaceStateHeapSize; - if (sshLocalSize) { - pSshLocal = std::make_unique(sshLocalSize); + if (kernelDeviceInfos[rootDeviceIndex].sshLocalSize) { + kernelDeviceInfos[rootDeviceIndex].pSshLocal = std::make_unique(kernelDeviceInfos[rootDeviceIndex].sshLocalSize); // copy the ssh into our local copy - memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, sshLocalSize); + memcpy_s(kernelDeviceInfos[rootDeviceIndex].pSshLocal.get(), kernelDeviceInfos[rootDeviceIndex].sshLocalSize, + heapInfo.pSsh, kernelDeviceInfos[rootDeviceIndex].sshLocalSize); } numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0; localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0; @@ -287,7 +288,7 @@ cl_int Kernel::initialize() { if (patchInfo.pAllocateStatelessEventPoolSurface) { if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } @@ -296,7 +297,7 @@ cl_int Kernel::initialize() { if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } @@ -783,8 +784,8 @@ void Kernel::setStartOffset(uint32_t offset) { this->startOffset = offset; } -void *Kernel::getSurfaceStateHeap() const { - return kernelInfo.usesSsh ? pSshLocal.get() : nullptr; +void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const { + return kernelInfo.usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr; } size_t Kernel::getDynamicStateHeapSize() const { @@ -795,9 +796,9 @@ const void *Kernel::getDynamicStateHeap() const { return kernelInfo.heapInfo.pDsh; } -size_t Kernel::getSurfaceStateHeapSize() const { +size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const { return kernelInfo.usesSsh - ? sshLocalSize + ? kernelDeviceInfos[rootDeviceIndex].sshLocalSize : 0; } @@ -805,9 +806,9 @@ size_t Kernel::getNumberOfBindingTableStates() const { return numberOfBindingTableStates; } -void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { - pSshLocal.reset(static_cast(pNewSsh)); - sshLocalSize = static_cast(newSshSize); +void Kernel::resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) { + kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(static_cast(pNewSsh)); + kernelDeviceInfos[rootDeviceIndex].sshLocalSize = static_cast(newSshSize); numberOfBindingTableStates = newBindingTableCount; localBindingTableOffset = newBindingTableOffset; } @@ -882,7 +883,7 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0); } if (!kernelArguments[argIndex].isPatched) { @@ -913,7 +914,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio if (requiresSshForBuffers()) { const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); size_t allocSize = 0; size_t offset = 0; if (svmAlloc != nullptr) { @@ -1317,7 +1318,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, } if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, getDevice().getDevice()); } @@ -1342,7 +1343,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize); if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0); } @@ -1391,7 +1392,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, auto graphicsAllocation = pipe->getGraphicsAllocation(getDevice().getRootDeviceIndex()); if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, pipe->getSize(), pipe->getCpuAddress(), 0, graphicsAllocation, 0, 0); @@ -1429,7 +1430,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, storeKernelArg(argIndex, IMAGE_OBJ, clMemObj, argVal, argSize); - auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); + auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); DEBUG_BREAK_IF(!kernelArgInfo.isImage); // Sets SS structure @@ -2250,7 +2251,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { static_cast(devQueue->getQueueBuffer()->getGpuAddressToPatch())); } if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0); @@ -2272,7 +2273,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) { } if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(), (void *)devQueue->getEventPoolBuffer()->getGpuAddress(), 0, devQueue->getEventPoolBuffer(), 0, 0); @@ -2298,13 +2299,14 @@ bool Kernel::usesSyncBuffer() { } void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { + auto rootDeviceIndex = device.getRootDeviceIndex(); auto &patchInfo = kernelInfo.patchInfo; - auto bufferPatchAddress = ptrOffset(getCrossThreadData(device.getRootDeviceIndex()), patchInfo.pAllocateSyncBuffer->DataParamOffset); + auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), patchInfo.pAllocateSyncBuffer->DataParamOffset); patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); if (requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(getSurfaceStateHeap(rootDeviceIndex)), patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset); auto addressToPatch = gfxAllocation->getUnderlyingBuffer(); auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize(); @@ -2353,10 +2355,11 @@ void Kernel::resolveArgs() { } } } + auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (canTransformImageTo2dArray) { - imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap()); + imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); } else if (imageTransformer->didTransform()) { - imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap()); + imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); } } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 2cf9b651bf..e2f6d89e07 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -153,18 +153,18 @@ class Kernel : public BaseObject<_cl_kernel> { size_t *paramValueSizeRet) const; const void *getKernelHeap() const; - void *getSurfaceStateHeap() const; + void *getSurfaceStateHeap(uint32_t rootDeviceIndex) const; const void *getDynamicStateHeap() const; size_t getKernelHeapSize() const; - size_t getSurfaceStateHeapSize() const; + size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const; size_t getDynamicStateHeapSize() const; size_t getNumberOfBindingTableStates() const; size_t getBindingTableOffset() const { return localBindingTableOffset; } - void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); + void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset); void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize); bool isKernelHeapSubstituted() const; @@ -524,8 +524,6 @@ class Kernel : public BaseObject<_cl_kernel> { size_t numberOfBindingTableStates = 0u; size_t localBindingTableOffset = 0u; - std::unique_ptr pSshLocal; - uint32_t sshLocalSize = 0u; GraphicsAllocation *kernelReflectionSurface = nullptr; @@ -550,13 +548,15 @@ class Kernel : public BaseObject<_cl_kernel> { bool debugEnabled = false; uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; - struct KernelDeviceInfo { + struct KernelDeviceInfo : public NonCopyableClass { + std::unique_ptr pSshLocal; + uint32_t sshLocalSize = 0u; char *crossThreadData = nullptr; uint32_t crossThreadDataSize = 0u; GraphicsAllocation *privateSurface = nullptr; uint64_t privateSurfaceSize = 0u; }; - StackVec kernelDeviceInfos; + std::vector kernelDeviceInfos; }; } // namespace NEO diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index c12052d804..44ade48fd1 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -62,7 +62,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (kernel->requiresSshForBuffers()) { - auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap()), + auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset); void *addressToPatch = printfSurface->getUnderlyingBuffer(); size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index 83971ffe2a..87f57c7a24 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -81,7 +81,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) { typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE; auto pSurfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setMediaImageArg(const_cast(pSurfaceState), pClDevice->getRootDeviceIndex()); @@ -109,7 +109,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) { ASSERT_EQ(CL_SUCCESS, retVal); auto pSurfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); uint64_t surfaceAddress = pSurfaceState->getSurfaceBaseAddress(); diff --git a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp index 8417b075c1..7cd82259fc 100644 --- a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp @@ -27,8 +27,9 @@ namespace clMemLocallyUncachedResourceTests { template uint32_t argMocs(Kernel &kernel, size_t argIndex) { + auto rootDeviceIndex = kernel.getDevices()[0]->getRootDeviceIndex(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(); + auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(rootDeviceIndex); auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); return surfaceState->getMemoryObjectControlState(); diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index 57e24fad99..62fa4102b1 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -484,12 +484,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe // read args auto argNum = 0; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); - sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); @@ -499,12 +499,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe // write args auto argNum = 1; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); - sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); @@ -541,7 +541,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet { // read arg auto argNum = 0; - auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); @@ -550,7 +550,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet { // write arg auto argNum = 1; - auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); @@ -586,7 +586,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet { // read arg auto argNum = 0; - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); @@ -595,7 +595,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet { // write arg auto argNum = 1; - auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(); + auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index f17abddff5..d02c49191a 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1038,7 +1038,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset); + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex); kernel->getAllocatedKernelInfo()->usesSsh = true; auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); @@ -1047,7 +1047,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation(); ASSERT_NE(nullptr, debugSurface); - RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); + RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } @@ -1058,7 +1058,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); MockCommandQueue cmdQ(context.get(), pClDevice, 0); - kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset); + kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex); kernel->getAllocatedKernelInfo()->usesSsh = true; auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver(); commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize); @@ -1068,7 +1068,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet cmdQ.setupDebugSurface(kernel.get()); EXPECT_EQ(debugSurface, commandStreamReceiver.getDebugSurfaceAllocation()); - RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(); + RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex); EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); } diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 04bf812539..87e1bda34e 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -733,7 +733,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, kernel, Math::computeTotalElementsCount(localWorkgroupSize)); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel, rootDeviceIndex); EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace()); EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace()); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 0047af8e33..e875952a5a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -578,7 +578,7 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs kernel->initialize(); auto bindingTableState = reinterpret_cast( - ptrOffset(kernel->getSurfaceStateHeap(), sPatchBindingTableState.Offset)); + ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex), sPatchBindingTableState.Offset)); bindingTableState->setSurfaceStatePointer(0); auto mockCmdQ = clUniquePtr(new MockCommandQueueHw(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index 8a2468cb44..e79b6b52f1 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -400,7 +400,7 @@ HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSize builder.buildDispatchInfos(multiDispatchInfo); builder.buildDispatchInfos(multiDispatchInfo); - auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(); + auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(rootDeviceIndex); sizeSSH += sizeSSH ? FamilyType::BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0; sizeSSH = alignUp(sizeSSH, MemoryConstants::cacheLineSize); @@ -439,7 +439,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenH auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -478,7 +478,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelTh auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); EXPECT_EQ(0u, expectedSizeDSH % 64); diff --git a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp index d636cce3f7..3d00f31654 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp @@ -93,7 +93,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferCons auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -140,7 +140,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get()); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel.get()); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel.get()); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get()); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get(), rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -197,7 +197,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndComman auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -252,7 +252,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBu auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -307,7 +307,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndComman auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -362,7 +362,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBu auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); - auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); // Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended. expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index f308c2ba2e..aa988999ef 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -180,7 +180,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer pClDevice->allocateSyncBufferHandler(); auto syncBufferHandler = getSyncBufferHandler(); - auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex), sPatchAllocateSyncBuffer.SurfaceStateHeapOffset)); auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress(); surfaceState->setSurfaceBaseAddress(bufferAddress + 1); diff --git a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp index 4015d48e44..76bdc1e888 100644 --- a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp @@ -537,7 +537,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectState auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); auto usedBeforeSSH = ssh->getUsed(); @@ -565,7 +565,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); @@ -593,7 +593,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE); ASSERT_NE(nullptr, dsh); - size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + size_t surfaceStateHeapSize = HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); @@ -631,7 +631,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall } auto surfaceStateHeapSize = - HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel)); + HardwareCommandsHelper::getSshSizeForExecutionModel(const_cast(*pKernel), rootDeviceIndex); auto ssh = std::make_unique(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize); devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false); diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 49e1e87bf9..ca9564c6df 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -297,7 +297,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu BlockKernelManager *blockManager = pProgram->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); - size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(); + size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); @@ -340,7 +340,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE))); } - blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize()); + blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize(rootDeviceIndex)); } delete blockKernel; diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index 25119134d0..464acd730c 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -132,7 +132,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex); EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace()); } @@ -162,7 +162,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue ASSERT_NE(nullptr, blockedCommandsData); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper::getDefaultSshUsage(); - size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel); + size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex); size_t sshUsed = blockedCommandsData->ssh->getUsed(); diff --git a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp index 04d2229b1a..27a2a1f419 100644 --- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp @@ -53,7 +53,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched EXPECT_NE(nullptr, executionModelDsh); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), @@ -174,7 +174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); @@ -209,7 +209,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSet SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSizeRequiredSSH(scheduler, rootDeviceIndex); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); LinearStream &commandStream = getCommandStream(*pCmdQ, CsrDependencies(), diff --git a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp index 172d5cb04e..7c73e59880 100644 --- a/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp @@ -97,7 +97,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca dsh->getSpace(mockDevQueue.getDshOffset()); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()}); auto blockedCommandData = std::make_unique(new LinearStream(cmdStreamAllocation), @@ -162,7 +162,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -203,7 +203,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -241,7 +241,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -282,7 +282,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); blockedCommandData->setHeaps(dsh, ioh, ssh); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM; PreemptionMode preemptionMode = device->getPreemptionMode(); @@ -308,7 +308,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue MockCommandQueue cmdQ(context, device, properties); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); size_t heapSize = 20; @@ -362,7 +362,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen parentKernel->createReflectionSurface(); context->setDefaultDeviceQueue(&mockDevQueue); - size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minSizeSSHForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); size_t heapSize = 20; diff --git a/opencl/test/unit_test/fixtures/execution_model_fixture.h b/opencl/test/unit_test/fixtures/execution_model_fixture.h index 912ca179bf..63318b225c 100644 --- a/opencl/test/unit_test/fixtures/execution_model_fixture.h +++ b/opencl/test/unit_test/fixtures/execution_model_fixture.h @@ -109,7 +109,7 @@ struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture, testing::Test { void SetUp() override { - device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; + device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)}; CommandQueueHwFixture::SetUp(device, 0); } void TearDown() override { @@ -125,4 +125,5 @@ struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture, return std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } + const uint32_t rootDeviceIndex = 0u; }; diff --git a/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp b/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp index c55257a7cf..20c007cbb7 100644 --- a/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp +++ b/opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp @@ -31,7 +31,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta DeviceQueueHw *pDevQueueHw = castToObject>(pDevQueue); SchedulerKernel &scheduler = context->getSchedulerKernel(); - size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel); + size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex); // Setup heaps in pCmdQ MultiDispatchInfo multiDispatchinfo(&scheduler); diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 20917137a7..1a54501083 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -160,7 +160,8 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture { executionEnvironment->memoryManager.reset(memoryManager); initPlatform(); pDevice = pPlatform->getClDevice(0); - cl_device_id device = (cl_device_id)pDevice; + rootDeviceIndex = pDevice->getRootDeviceIndex(); + cl_device_id device = pDevice; ContextFixture::SetUp(1, &device); driverServices.bufferAllocate = nullptr; @@ -193,6 +194,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture { driver_services_t driverServices; gtpin::ocl::gtpin_events_t gtpinCallbacks; MockMemoryManagerWithFailures *memoryManager = nullptr; + uint32_t rootDeviceIndex = std::numeric_limits::max(); }; typedef Test GTPinTests; @@ -1279,7 +1281,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); int prevCount2 = KernelSubmitCallbackCount; int prevCount3 = CommandBufferCreateCallbackCount; @@ -1392,7 +1394,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs // Verify that when SSH is removed then during kernel execution // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); cl_event userEvent = clCreateUserEvent(context, &retVal); EXPECT_EQ(CL_SUCCESS, retVal); @@ -2167,15 +2169,15 @@ TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedA std::unique_ptr parentKernel(MockParentKernel::create(*pContext)); parentKernel->mockKernelInfo->usesSsh = true; - parentKernel->sshLocalSize = 64; - parentKernel->pSshLocal.reset(new char[64]); + parentKernel->kernelDeviceInfos[rootDeviceIndex].sshLocalSize = 64; + parentKernel->kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(new char[64]); - size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(); + size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex); - bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get()); + bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get(), rootDeviceIndex); EXPECT_FALSE(surfaceAdded); - size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(); + size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_EQ(sizeSurfaceStates2, sizeSurfaceStates1); } @@ -2225,47 +2227,47 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { size_t numBTS1 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(2u, numBTS1); - size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(); + size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_NE(0u, sizeSurfaceStates1); size_t offsetBTS1 = pKernel->getBindingTableOffset(); EXPECT_NE(0u, offsetBTS1); GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily; GTPinHwHelper >pinHelper = GTPinHwHelper::get(genFamily); - void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0); + void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); EXPECT_NE(nullptr, pSS1); // Enlarge SSH by one SURFACE STATE element - bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel); + bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); EXPECT_TRUE(surfaceAdded); size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); - size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(); + size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1); size_t offsetBTS2 = pKernel->getBindingTableOffset(); EXPECT_GT(offsetBTS2, offsetBTS1); - void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0); + void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); EXPECT_NE(pSS2, pSS1); - pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2); + pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2, rootDeviceIndex); EXPECT_EQ(nullptr, pSS2); // Remove kernel's SSH - pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); + pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0); // Try to enlarge SSH once again, this time the operation must fail - surfaceAdded = gtpinHelper.addSurfaceState(pKernel); + surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex); EXPECT_FALSE(surfaceAdded); size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); - size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(); + size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex); EXPECT_EQ(0u, sizeSurfaceStates3); size_t offsetBTS3 = pKernel->getBindingTableOffset(); EXPECT_EQ(0u, offsetBTS3); - void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0); + void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex); EXPECT_EQ(nullptr, pSS3); // Cleanup @@ -2396,7 +2398,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCo std::unique_ptr cmdQ(new MockCommandQueue(context.get(), pDevice, nullptr)); std::unique_ptr pKernel(new MockKernel(pProgramm.get(), *pKernelInfo)); - pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap)); + pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap), rootDeviceIndex); kernelOffset = 0x1234; EXPECT_NE(pKernel->getStartOffset(), kernelOffset); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 38c18a0a37..04b4f53b93 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -363,7 +363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes auto usedAfterSSH = ssh.getUsed(); auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel, localWorkSize); - auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel); + auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); EXPECT_GE(sizeRequiredDSH, usedAfterDSH - usedBeforeDSH); EXPECT_GE(sizeRequiredIOH, usedAfterIOH - usedBeforeIOH); @@ -1005,7 +1005,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd } mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData)); - mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal)); + mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal), rootDeviceIndex); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); @@ -1096,12 +1096,12 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeR totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries; auto &scheduler = pContext->getSchedulerKernel(); - auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(); + auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(rootDeviceIndex); totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0); totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel)); + EXPECT_EQ(totalSize, HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex)); } static const char *binaryFile = "simple_block_kernel"; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index 110ed65755..731abf1056 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -46,7 +46,7 @@ struct HardwareCommandsTest : ClDeviceFixture, template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0, - srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), + srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex), srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } }; diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 3ee16a3343..b0c4ad48d5 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -58,7 +58,7 @@ TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgument EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete buffer; } @@ -76,11 +76,11 @@ HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumen EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress); diff --git a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp index ea61c47c23..43db09eb00 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp @@ -110,7 +110,7 @@ TEST_F(KernelArgPipeTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsA auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete pipe; } @@ -127,11 +127,11 @@ HWTEST_F(KernelArgPipeTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArguments auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 47e87bbfb3..6040a74d83 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -101,7 +101,7 @@ TEST_F(KernelArgSvmTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAr auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete[] svmPtr; } @@ -115,11 +115,11 @@ HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsA auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -154,7 +154,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocStatelessWhenSettingKernelArgThenArgu auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete[] svmPtr; } @@ -170,11 +170,11 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -196,7 +196,7 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -214,7 +214,7 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); void *surfaceAddress = reinterpret_cast(surfaceState->getSurfaceBaseAddress()); @@ -237,7 +237,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { svmPtr.resize(256); pKernel->setCrossThreadData(nullptr, sizeof(void *)); - pKernel->setSshLocal(nullptr, rendSurfSize); + pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); pKernelInfo->requiresSshForBuffers = true; pKernelInfo->usesSsh = true; { @@ -254,8 +254,8 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { ASSERT_GE(pKernel->getCrossThreadDataSize(rootDeviceIndex), sizeof(void *)); *reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)) = 0U; - ASSERT_GE(pKernel->getSurfaceStateHeapSize(), rendSurfSize); - RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap()); + ASSERT_GE(pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); + RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap(rootDeviceIndex)); memset(surfState, 0, rendSurfSize); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch); @@ -278,7 +278,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); - pKernel->setSshLocal(nullptr, 0); + pKernel->setSshLocal(nullptr, 0, rootDeviceIndex); pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch); } } @@ -389,7 +389,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN kai.offsetBufferOffset = kai.kernelArgPatchInfoVector[0].size; this->pKernel->setCrossThreadData(nullptr, kai.offsetBufferOffset + sizeof(uint32_t)); - this->pKernel->setSshLocal(nullptr, rendSurfSize); + this->pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex); this->pKernelInfo->requiresSshForBuffers = true; this->pKernelInfo->usesSsh = true; { @@ -405,8 +405,8 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN *expectedPointerPatchPtr = reinterpret_cast(0U); *expectedOffsetPatchPtr = 0U; - ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(), rendSurfSize); - RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap()); + ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize); + RENDER_SURFACE_STATE *surfState = reinterpret_cast(this->pKernel->getSurfaceStateHeap(rootDeviceIndex)); memset(surfState, 0, rendSurfSize); TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 1caef7ee1a..28dabfbc6d 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -736,13 +736,13 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); auto bufferAddress = pKernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface->getGpuAddress(); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -766,7 +766,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv char buffer[16]; MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer)); - MockContext context; + MockContext context(pClDevice); MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); @@ -779,8 +779,8 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); program.setConstantSurface(nullptr); delete pKernel; @@ -984,11 +984,11 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1025,8 +1025,8 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); program.setGlobalSurface(nullptr); delete pKernel; @@ -1156,11 +1156,11 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1197,8 +1197,8 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); - EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); + EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex)); program.setConstantSurface(nullptr); delete pKernel; @@ -1238,11 +1238,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1291,7 +1291,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1363,7 +1363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); if (pClDevice->areOcl21FeaturesSupported() == false) { - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); } else { } @@ -1442,11 +1442,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1493,11 +1493,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe pKernel->patchDefaultDeviceQueue(pDevQueue); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -1537,7 +1537,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); - EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete pKernel; } diff --git a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp index a13c1c736d..0b02cff30c 100644 --- a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp @@ -21,6 +21,7 @@ using namespace NEO; class KernelTransformableTest : public ::testing::Test { public: void SetUp() override { + rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); pKernelInfo = std::make_unique(); KernelArgPatchInfo kernelArgPatchInfo; @@ -74,6 +75,7 @@ class KernelTransformableTest : public ::testing::Test { std::unique_ptr image; SKernelBinaryHeaderCommon kernelHeader; char surfaceStateHeap[0x80]; + uint32_t rootDeviceIndex = std::numeric_limits::max(); }; HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) { @@ -93,7 +95,7 @@ HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTran pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType()); @@ -120,7 +122,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType()); @@ -147,7 +149,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -179,7 +181,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTran pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -201,7 +203,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamp pKernelInfo->kernelArgInfo[2].isTransformable = true; pKernelInfo->kernelArgInfo[3].isTransformable = true; - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -233,7 +235,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -265,7 +267,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenRes pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); @@ -303,7 +305,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImag pKernel->setArg(2, sizeof(clImage), &clImage); pKernel->setArg(3, sizeof(clImage), &clImage); - auto ssh = pKernel->getSurfaceStateHeap(); + auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex); auto firstSurfaceState = reinterpret_cast(ptrOffset(ssh, firstImageOffset)); auto secondSurfaceState = reinterpret_cast(ptrOffset(ssh, secondImageOffset)); diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index cec23012e1..39b01c90eb 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -125,7 +125,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSu using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; @@ -145,7 +145,7 @@ HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhen using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); pKernelInfo->requiresSshForBuffers = true; @@ -186,7 +186,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramN HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSetThenSetNonAuxMode) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex()); graphicsAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), false)); diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp index 7c45dc5f38..72f980b23d 100644 --- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp @@ -120,7 +120,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); srcImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex()); @@ -195,7 +195,7 @@ HWTEST_F(ImageSetArgTest, givenCubeMapIndexWhenSetKernelArgImageIsCalledThenModi src2dImage->setCubeFaceIndex(cubeFaceIndex); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); src2dImage->setImageArg(const_cast(surfaceState), false, 0, pClDevice->getRootDeviceIndex()); @@ -298,7 +298,7 @@ HWTEST_F(ImageSetArgTest, givenNonCubeMapIndexWhenSetKernelArgImageIsCalledThenD typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(srcImage->getCubeFaceIndex(), __GMM_NO_CUBE_MAP); @@ -327,7 +327,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto graphicsAllocation = srcAllocation; @@ -357,7 +357,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) { ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; @@ -408,7 +408,7 @@ HWTEST_F(ImageSetArgTest, givenImage2DWithMipMapsWhenSetKernelArgIsCalledThenMip ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ((uint32_t)mipLevel, surfaceState->getSurfaceMinLod()); EXPECT_EQ((uint32_t)mipCount, surfaceState->getMipCountLod() + 1); @@ -429,7 +429,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -477,7 +477,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); @@ -533,7 +533,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithoutUnifiedAuxC ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_FALSE(Image::isDepthFormat(image->getImageFormat())); @@ -569,7 +569,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatWhenSetArgIsCalledThenProgramAuxFields ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -600,7 +600,7 @@ HWTEST_F(ImageSetArgTest, givenMultisampledR32Floatx8x24DepthStencilFormatWhenSe retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -627,7 +627,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationAndRenderCompressionWhenSetArgOnMult retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() == @@ -657,7 +657,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatAndRenderCompressionWhenSetArgOnMultis retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat())); @@ -692,7 +692,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); @@ -723,7 +723,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_NE(0u, surfaceState->getAuxiliarySurfaceBaseAddress()); @@ -758,7 +758,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj); ASSERT_EQ(CL_SUCCESS, retVal); - auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); EXPECT_EQ(pitchValue, surfaceState->getAuxiliarySurfacePitch()); @@ -793,7 +793,7 @@ HWTEST_F(ImageSetArgTest, GivenImageFrom1dBufferWhenSettingKernelArgThenProperti ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); auto image = castToObject(imageFromBuffer); @@ -837,7 +837,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); //for CL_LUMINANCE format we override channels to RED to be spec complaint. EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed()); @@ -963,7 +963,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo ASSERT_EQ(CL_SUCCESS, retVal); auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), + ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap)); size_t rPitch = srcImage->getImageDesc().image_row_pitch; diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 3c3b108d93..7cc8a9755b 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -544,7 +544,7 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); - EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize()); + EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize(rootDeviceIndex)); delete printfHandler; } @@ -575,11 +575,11 @@ HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCr auto printfAllocation = printfHandler->getSurface(); auto allocationAddress = printfAllocation->getGpuAddress(); - EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize()); + EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize(device->getRootDeviceIndex())); typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( - ptrOffset(kernel.mockKernel->getSurfaceStateHeap(), + ptrOffset(kernel.mockKernel->getSurfaceStateHeap(device->getRootDeviceIndex()), kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 34d94e2e57..d21e4dbef8 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -43,7 +43,6 @@ class MockKernel : public Kernel { using Kernel::numberOfBindingTableStates; using Kernel::patchBufferOffset; using Kernel::patchWithImplicitSurface; - using Kernel::sshLocalSize; using Kernel::svmAllocationsRequireCacheFlush; using Kernel::threadArbitrationPolicy; using Kernel::unifiedMemoryControls; @@ -181,15 +180,15 @@ class MockKernel : public Kernel { kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = static_cast(mockCrossThreadData.size()); } - void setSshLocal(const void *sshPattern, uint32_t newSshSize) { - sshLocalSize = newSshSize; + void setSshLocal(const void *sshPattern, uint32_t newSshSize, uint32_t rootDeviceIndex) { + kernelDeviceInfos[rootDeviceIndex].sshLocalSize = newSshSize; if (newSshSize == 0) { - pSshLocal.reset(nullptr); + kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(nullptr); } else { - pSshLocal = std::make_unique(newSshSize); + kernelDeviceInfos[rootDeviceIndex].pSshLocal = std::make_unique(newSshSize); if (sshPattern) { - memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize); + memcpy_s(kernelDeviceInfos[rootDeviceIndex].pSshLocal.get(), newSshSize, sshPattern, newSshSize); } } } @@ -291,7 +290,7 @@ class MockKernelWithInternals { mockProgram = new MockProgram(context, false, deviceVector); mockKernel = new MockKernel(mockProgram, kernelInfo); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); - mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); + mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), deviceArg.getRootDeviceIndex()); if (addDefaultArg) { defaultKernelArguments.resize(2); @@ -358,10 +357,9 @@ class MockKernelWithInternals { class MockParentKernel : public Kernel { public: using Kernel::auxTranslationRequired; + using Kernel::kernelDeviceInfos; using Kernel::kernelInfo; using Kernel::patchBlocksCurbeWithConstantValues; - using Kernel::pSshLocal; - using Kernel::sshLocalSize; static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) { auto clDevice = context.getDevice(0);