diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index d15644e08f..ec8d774ebb 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -65,6 +65,7 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount forceDispatchScheduler(multiDispatchInfo); } else { auto rootDeviceIndex = device->getRootDeviceIndex(); + kernel->updateAuxTranslationRequired(); if (kernel->isAuxTranslationRequired()) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); builtInLock.takeOwnership(builder); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 32015db9c9..ee761f818b 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -2464,6 +2464,18 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF } } +bool Kernel::hasDirectStatelessAccessToHostMemory() const { + for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { + if (BUFFER_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) { + auto buffer = castToObject(getKernelArg(i)); + if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) { + return true; + } + } + } + return false; +} + void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const { if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo(rootDeviceIndex))) { return; diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 83efde294b..e379dd319b 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -114,6 +114,8 @@ class Kernel : public BaseObject<_cl_kernel> { } bool isAuxTranslationRequired() const { return auxTranslationRequired; } + void setAuxTranslationRequired(bool onOff) { auxTranslationRequired = onOff; } + void updateAuxTranslationRequired(); char *getCrossThreadData(uint32_t rootDeviceIndex) const { return kernelDeviceInfos[rootDeviceIndex].crossThreadData; @@ -491,6 +493,7 @@ class Kernel : public BaseObject<_cl_kernel> { void resolveArgs(); void reconfigureKernel(uint32_t rootDeviceIndex); + bool hasDirectStatelessAccessToHostMemory() const; void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; diff --git a/opencl/source/kernel/kernel_extra.cpp b/opencl/source/kernel/kernel_extra.cpp index 4ecaabeb5e..686d7552f6 100644 --- a/opencl/source/kernel/kernel_extra.cpp +++ b/opencl/source/kernel/kernel_extra.cpp @@ -41,4 +41,6 @@ int32_t Kernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t return CL_INVALID_VALUE; } +void Kernel::updateAuxTranslationRequired() { +} } // namespace NEO diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index b0c4ad48d5..3fa28385a8 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -201,6 +201,59 @@ TEST_F(KernelArgBufferTest, givenNoCacheFlushBufferWhenSettingAsArgThenNotExpect EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]); } +TEST_F(KernelArgBufferTest, givenBufferWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); + + auto val = (cl_mem)&buffer; + auto pVal = &val; + + for (auto pureStatefulBufferAccess : {false, true}) { + pKernelInfo->kernelArgInfo[0].pureStatefulBufferAccess = pureStatefulBufferAccess; + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); + } +} + +TEST_F(KernelArgBufferTest, givenBufferInHostMemoryWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnCorrectValue) { + MockBuffer buffer; + buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); + + auto val = (cl_mem)&buffer; + auto pVal = &val; + + for (auto pureStatefulBufferAccess : {false, true}) { + pKernelInfo->kernelArgInfo[0].pureStatefulBufferAccess = pureStatefulBufferAccess; + + auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory()); + } +} + +TEST_F(KernelArgBufferTest, givenInvalidMemObjWhenHasDirectStatelessAccessToHostMemoryIsCalledThenReturnFalse) { + KernelInfo kernelInfo; + MockKernel emptyKernel(pProgram, MockKernel::toKernelInfoContainer(kernelInfo, 0)); + EXPECT_FALSE(emptyKernel.hasDirectStatelessAccessToHostMemory()); + + pKernel->kernelArguments.at(0).type = Kernel::NONE_OBJ; + EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); + + pKernel->kernelArguments.at(0).type = Kernel::BUFFER_OBJ; + EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory()); +} + +TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslationRequiredReturnsCorrectValue) { + for (auto auxTranslationRequired : {false, true}) { + pKernel->setAuxTranslationRequired(auxTranslationRequired); + EXPECT_EQ(auxTranslationRequired, pKernel->isAuxTranslationRequired()); + } +} + class KernelArgBufferFixtureBindless : public KernelArgBufferFixture { public: void SetUp() { diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 6ddba217f8..a7bf0e0253 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -33,6 +33,7 @@ class MockKernel : public Kernel { using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; using Kernel::executionType; + using Kernel::hasDirectStatelessAccessToHostMemory; using Kernel::isSchedulerKernel; using Kernel::kernelArgHandlers; using Kernel::kernelArgRequiresCacheFlush;