From 7ab86d44d66845cea488f38b43ba69c478bc7f12 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Wed, 2 Mar 2022 11:08:18 +0000 Subject: [PATCH] Improve caching in clSetKernelArgSVMPointer Check allocId earlier and also reuse if allocationsCounter did not change from last call. Related-To: NEO-6737 Co-authored-by: Michal Mrozek Signed-off-by: Dominik Dabek --- opencl/source/api/api.cpp | 45 ++++++++++++------- opencl/source/kernel/kernel.cpp | 1 + opencl/source/kernel/kernel.h | 1 + .../cl_set_kernel_arg_svm_pointer_tests.inl | 9 +++- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index ae643b5e2d..7b6098f3a4 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -4866,13 +4866,41 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, auto retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel)); API_ENTER(&retVal); - DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue); - if (CL_SUCCESS != retVal) { TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); return retVal; } + if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) { + retVal = CL_INVALID_ARG_INDEX; + TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); + return retVal; + } + + auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager(); + + if (argValue != nullptr) { + if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 && + pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) { + bool reuseFromCache = false; + + if (svmManager->allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) { + reuseFromCache = true; + } else { + auto svmData = svmManager->getSVMAlloc(argValue); + if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) { + reuseFromCache = true; + } + } + if (reuseFromCache) { + TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); + return CL_SUCCESS; + } + } + } + + DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue); + for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { const HardwareInfo &hwInfo = pDevice->getHardwareInfo(); if (!hwInfo.capabilityTable.ftrSvm) { @@ -4882,12 +4910,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } - if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) { - retVal = CL_INVALID_ARG_INDEX; - TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); - return retVal; - } - for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex()); cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo() @@ -4905,7 +4927,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, MultiGraphicsAllocation *pSvmAllocs = nullptr; uint32_t allocId = 0u; if (argValue != nullptr) { - auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager(); auto svmData = svmManager->getSVMAlloc(argValue); if (svmData == nullptr) { for (const auto &pDevice : pMultiDeviceKernel->getDevices()) { @@ -4916,12 +4937,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, } } } else { - if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 && - pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId() && - pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) { - TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); - return CL_SUCCESS; - } pSvmAllocs = &svmData->gpuAllocations; allocId = svmData->getAllocId(); } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 9f37d8d437..b762ed4107 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -930,6 +930,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); kernelArguments[argIndex].allocId = allocId; + kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u; if (!kernelArguments[argIndex].isPatched) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index b68f155675..caa562b5a2 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -68,6 +68,7 @@ class Kernel : public ReferenceTrackedObject { bool isPatched = false; bool isStatelessUncacheable = false; uint32_t allocId; + uint32_t allocIdMemoryManagerCounter; }; enum class TunningStatus { diff --git a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl index bddcc230eb..163f2f9f89 100644 --- a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl +++ b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl @@ -6,6 +6,7 @@ */ #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/test/common/mocks/mock_svm_manager.h" #include "shared/test/common/test_macros/test.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" @@ -84,7 +85,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern auto retVal = clSetKernelArgSVMPointer( pMultiDeviceKernel.get(), // cl_kernel kernel - (cl_uint)-1, // cl_uint arg_index + 0, // cl_uint arg_index nullptr // const void *arg_value ); EXPECT_EQ(CL_INVALID_OPERATION, retVal); @@ -209,6 +210,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); + auto mockSvmManager = reinterpret_cast(pMockKernel->getContext().getSVMAllocsManager()); + if (devInfo.svmCapabilities != 0) { EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls); void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); @@ -231,6 +234,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); + ++mockSvmManager->allocationsCounter; // different pointer - called void *nextPtrSvm = static_cast(ptrSvm) + 1; @@ -241,6 +245,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); + ++mockSvmManager->allocationsCounter; // different allocId - called pMockKernel->kernelArguments[0].allocId = 1; @@ -251,6 +256,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); + ++mockSvmManager->allocationsCounter; // allocId = 0 - called pMockKernel->kernelArguments[0].allocId = 0; @@ -261,6 +267,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); + ++mockSvmManager->allocationsCounter; // same values - not called retVal = clSetKernelArgSVMPointer(