Improve caching in clSetKernelArgSVMPointer

Check allocId earlier and also reuse if allocationsCounter did not
change from last call.

Related-To: NEO-6737

Co-authored-by: Michal Mrozek <michal.mrozek@intel.com>

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2022-03-02 11:08:18 +00:00
committed by Compute-Runtime-Automation
parent 385c60948e
commit 7ab86d44d6
4 changed files with 40 additions and 16 deletions

View File

@ -4866,13 +4866,41 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
auto retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel));
API_ENTER(&retVal);
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
if (CL_SUCCESS != retVal) {
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
return retVal;
}
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
retVal = CL_INVALID_ARG_INDEX;
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
return retVal;
}
auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
if (argValue != nullptr) {
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
bool reuseFromCache = false;
if (svmManager->allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
reuseFromCache = true;
} else {
auto svmData = svmManager->getSVMAlloc(argValue);
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
reuseFromCache = true;
}
}
if (reuseFromCache) {
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
return CL_SUCCESS;
}
}
}
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
const HardwareInfo &hwInfo = pDevice->getHardwareInfo();
if (!hwInfo.capabilityTable.ftrSvm) {
@ -4882,12 +4910,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
}
}
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
retVal = CL_INVALID_ARG_INDEX;
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
return retVal;
}
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo()
@ -4905,7 +4927,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
MultiGraphicsAllocation *pSvmAllocs = nullptr;
uint32_t allocId = 0u;
if (argValue != nullptr) {
auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
auto svmData = svmManager->getSVMAlloc(argValue);
if (svmData == nullptr) {
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
@ -4916,12 +4937,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
}
}
} else {
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId() &&
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
return CL_SUCCESS;
}
pSvmAllocs = &svmData->gpuAllocations;
allocId = svmData->getAllocId();
}

View File

@ -930,6 +930,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
kernelArguments[argIndex].allocId = allocId;
kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
if (!kernelArguments[argIndex].isPatched) {
patchedArgumentsNum++;
kernelArguments[argIndex].isPatched = true;

View File

@ -68,6 +68,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
bool isPatched = false;
bool isStatelessUncacheable = false;
uint32_t allocId;
uint32_t allocIdMemoryManagerCounter;
};
enum class TunningStatus {

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/mocks/mock_svm_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
@ -84,7 +85,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern
auto retVal = clSetKernelArgSVMPointer(
pMultiDeviceKernel.get(), // cl_kernel kernel
(cl_uint)-1, // cl_uint arg_index
0, // cl_uint arg_index
nullptr // const void *arg_value
);
EXPECT_EQ(CL_INVALID_OPERATION, retVal);
@ -209,6 +210,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet
TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) {
const ClDeviceInfo &devInfo = pDevice->getDeviceInfo();
auto mockSvmManager = reinterpret_cast<MockSVMAllocsManager *>(pMockKernel->getContext().getSVMAllocsManager());
if (devInfo.svmCapabilities != 0) {
EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls);
void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4);
@ -231,6 +234,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls);
++mockSvmManager->allocationsCounter;
// different pointer - called
void *nextPtrSvm = static_cast<char *>(ptrSvm) + 1;
@ -241,6 +245,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
++mockSvmManager->allocationsCounter;
// different allocId - called
pMockKernel->kernelArguments[0].allocId = 1;
@ -251,6 +256,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
++mockSvmManager->allocationsCounter;
// allocId = 0 - called
pMockKernel->kernelArguments[0].allocId = 0;
@ -261,6 +267,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
++mockSvmManager->allocationsCounter;
// same values - not called
retVal = clSetKernelArgSVMPointer(