mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Improve caching in clSetKernelArgSVMPointer
Check allocId earlier and also reuse if allocationsCounter did not change from last call. Related-To: NEO-6737 Co-authored-by: Michal Mrozek <michal.mrozek@intel.com> Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
385c60948e
commit
7ab86d44d6
@ -4866,13 +4866,41 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
auto retVal = validateObjects(WithCastToInternal(kernel, &pMultiDeviceKernel));
|
||||
API_ENTER(&retVal);
|
||||
|
||||
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
|
||||
|
||||
if (CL_SUCCESS != retVal) {
|
||||
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
|
||||
|
||||
if (argValue != nullptr) {
|
||||
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
|
||||
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
|
||||
bool reuseFromCache = false;
|
||||
|
||||
if (svmManager->allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
|
||||
reuseFromCache = true;
|
||||
} else {
|
||||
auto svmData = svmManager->getSVMAlloc(argValue);
|
||||
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
|
||||
reuseFromCache = true;
|
||||
}
|
||||
}
|
||||
if (reuseFromCache) {
|
||||
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
|
||||
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
const HardwareInfo &hwInfo = pDevice->getHardwareInfo();
|
||||
if (!hwInfo.capabilityTable.ftrSvm) {
|
||||
@ -4882,12 +4910,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
|
||||
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
|
||||
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo()
|
||||
@ -4905,7 +4927,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
MultiGraphicsAllocation *pSvmAllocs = nullptr;
|
||||
uint32_t allocId = 0u;
|
||||
if (argValue != nullptr) {
|
||||
auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
|
||||
auto svmData = svmManager->getSVMAlloc(argValue);
|
||||
if (svmData == nullptr) {
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
@ -4916,12 +4937,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
|
||||
pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId() &&
|
||||
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
|
||||
TRACING_EXIT(clSetKernelArgSVMPointer, &retVal);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
pSvmAllocs = &svmData->gpuAllocations;
|
||||
allocId = svmData->getAllocId();
|
||||
}
|
||||
|
@ -930,6 +930,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
||||
|
||||
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
|
||||
kernelArguments[argIndex].allocId = allocId;
|
||||
kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
|
||||
if (!kernelArguments[argIndex].isPatched) {
|
||||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
|
@ -68,6 +68,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
bool isPatched = false;
|
||||
bool isStatelessUncacheable = false;
|
||||
uint32_t allocId;
|
||||
uint32_t allocIdMemoryManagerCounter;
|
||||
};
|
||||
|
||||
enum class TunningStatus {
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/test/common/mocks/mock_svm_manager.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
@ -84,7 +85,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern
|
||||
|
||||
auto retVal = clSetKernelArgSVMPointer(
|
||||
pMultiDeviceKernel.get(), // cl_kernel kernel
|
||||
(cl_uint)-1, // cl_uint arg_index
|
||||
0, // cl_uint arg_index
|
||||
nullptr // const void *arg_value
|
||||
);
|
||||
EXPECT_EQ(CL_INVALID_OPERATION, retVal);
|
||||
@ -209,6 +210,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet
|
||||
|
||||
TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) {
|
||||
const ClDeviceInfo &devInfo = pDevice->getDeviceInfo();
|
||||
auto mockSvmManager = reinterpret_cast<MockSVMAllocsManager *>(pMockKernel->getContext().getSVMAllocsManager());
|
||||
|
||||
if (devInfo.svmCapabilities != 0) {
|
||||
EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls);
|
||||
void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4);
|
||||
@ -231,6 +234,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls);
|
||||
++mockSvmManager->allocationsCounter;
|
||||
|
||||
// different pointer - called
|
||||
void *nextPtrSvm = static_cast<char *>(ptrSvm) + 1;
|
||||
@ -241,6 +245,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
|
||||
++mockSvmManager->allocationsCounter;
|
||||
|
||||
// different allocId - called
|
||||
pMockKernel->kernelArguments[0].allocId = 1;
|
||||
@ -251,6 +256,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
|
||||
++mockSvmManager->allocationsCounter;
|
||||
|
||||
// allocId = 0 - called
|
||||
pMockKernel->kernelArguments[0].allocId = 0;
|
||||
@ -261,6 +267,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls);
|
||||
++mockSvmManager->allocationsCounter;
|
||||
|
||||
// same values - not called
|
||||
retVal = clSetKernelArgSVMPointer(
|
||||
|
Reference in New Issue
Block a user