diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index fc9dbfcd18..c89315ae5e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -2530,6 +2530,19 @@ void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &ke } } +bool Kernel::hasDirectStatelessAccessToSharedBuffer() const { + for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { + const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[i]; + if (BUFFER_OBJ == kernelArguments.at(i).type && !arg.as().isPureStateful()) { + auto buffer = castToObject(getKernelArg(i)); + if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::SHARED_BUFFER) { + return true; + } + } + } + return false; +} + bool Kernel::hasDirectStatelessAccessToHostMemory() const { for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[i]; @@ -2848,7 +2861,9 @@ bool Kernel::requiresLimitedWorkgroupSize() const { void Kernel::updateAuxTranslationRequired() { const auto &hwInfoConfig = *HwInfoConfig::get(getDevice().getHardwareInfo().platform.eProductFamily); if (hwInfoConfig.allowStatelessCompression(getDevice().getHardwareInfo())) { - if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory()) { + if (hasDirectStatelessAccessToHostMemory() || + hasIndirectStatelessAccessToHostMemory() || + hasDirectStatelessAccessToSharedBuffer()) { setAuxTranslationRequired(true); } } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 733e7f437c..dc410c900d 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -514,6 +514,7 @@ class Kernel : public ReferenceTrackedObject { void resolveArgs(); void reconfigureKernel(); + bool hasDirectStatelessAccessToSharedBuffer() const; bool hasDirectStatelessAccessToHostMemory() const; bool hasIndirectStatelessAccessToHostMemory() const; diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 6ea17e39a6..add7b72885 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -333,6 +333,23 @@ TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryI } } +TEST_F(KernelArgBufferTest, givenSharedBufferWhenHasDirectStatelessAccessToSharedBufferIsCalledThenReturnCorrectValue) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER); + + auto val = (cl_mem)&buffer; + auto pVal = &val; + + for (auto pureStatefulBufferAccess : {false, true}) { + pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess); + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToSharedBuffer()); + } +} + TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { MockBuffer buffer; buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); @@ -462,6 +479,28 @@ TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslatio } } +TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToSharedBufferWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { + DebugManagerStateRestore debugRestorer; + DebugManager.flags.EnableStatelessCompression.set(1); + + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::SHARED_BUFFER); + + auto val = (cl_mem)&buffer; + auto pVal = &val; + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pKernel->hasDirectStatelessAccessToSharedBuffer()); + + EXPECT_FALSE(pKernel->isAuxTranslationRequired()); + + pKernel->updateAuxTranslationRequired(); + + EXPECT_TRUE(pKernel->isAuxTranslationRequired()); +} + TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) { DebugManagerStateRestore debugRestorer; DebugManager.flags.EnableStatelessCompression.set(1); diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index fdbec23583..84f34700f3 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -107,6 +107,7 @@ class MockKernel : public Kernel { using Kernel::getDevice; using Kernel::getHardwareInfo; using Kernel::hasDirectStatelessAccessToHostMemory; + using Kernel::hasDirectStatelessAccessToSharedBuffer; using Kernel::hasIndirectStatelessAccessToHostMemory; using Kernel::isSchedulerKernel; using Kernel::kernelArgHandlers;