Optimize setKernelArgSVMPointer

If same pointer is already set, we don't need to set it again.

Related-To: NEO-6737

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2022-02-25 14:28:18 +00:00
committed by Compute-Runtime-Automation
parent 22ed1be1a3
commit b9d8d8c0fd
19 changed files with 111 additions and 45 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -206,4 +206,31 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet
clSVMFree(pContext, ptrSvm);
}
}
TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgTwiceThenSetArgSvmAllocCalledOnlyFirstTime) {
const ClDeviceInfo &devInfo = pDevice->getDeviceInfo();
if (devInfo.svmCapabilities != 0) {
EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls);
void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4);
EXPECT_NE(nullptr, ptrSvm);
auto retVal = clSetKernelArgSVMPointer(
pMockMultiDeviceKernel, // cl_kernel kernel
0, // cl_uint arg_index
ptrSvm // const void *arg_value
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, pMockKernel->setArgSvmAllocCalls);
retVal = clSetKernelArgSVMPointer(
pMockMultiDeviceKernel, // cl_kernel kernel
0, // cl_uint arg_index
ptrSvm // const void *arg_value
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, pMockKernel->setArgSvmAllocCalls);
clSVMFree(pContext, ptrSvm);
}
}
} // namespace ULT

View File

@ -532,7 +532,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle
MockBuffer::setAllocationType(gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true);
auto ptr = reinterpret_cast<void *>(gfxAllocation->getGpuAddressToPatch());
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation, 0u);
gmm.reset(gfxAllocation->getDefaultGmm());
}

View File

@ -140,7 +140,7 @@ struct BlitEnqueueTests : public ::testing::Test {
for (uint32_t i = 0; i < allocs.size(); i++) {
auto alloc = allocs[i];
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc);
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
}
}

View File

@ -476,7 +476,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
testing::internal::CaptureStdout();
KernelObjsForAuxTranslation kernelObjects;
@ -579,7 +579,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKerne
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
testing::internal::CaptureStdout();
@ -606,7 +606,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
testing::internal::CaptureStdout();
@ -627,7 +627,7 @@ TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNull
mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0);
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr);
mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr, 0u);
testing::internal::CaptureStdout();

View File

@ -240,7 +240,7 @@ class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenAllocationRequi
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false;
@ -295,7 +295,7 @@ class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequires
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
@ -353,7 +353,7 @@ class GivenCacheFlushAfterWalkerDisabledAndProperSteppingIsSetWhenAllocationRequ
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);

View File

@ -266,7 +266,7 @@ class GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlush
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
@ -319,7 +319,7 @@ class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequires
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
@ -375,7 +375,7 @@ class GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlus
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);

View File

@ -129,6 +129,7 @@ TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_FALSE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -142,6 +143,7 @@ TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
}
@ -162,6 +164,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -175,6 +178,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize);
}
@ -200,6 +204,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect)
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -213,6 +218,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect)
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
@ -240,6 +246,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
EXPECT_EQ(CL_SUCCESS, retVal);
@ -251,6 +258,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
@ -286,6 +294,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
EXPECT_EQ(CL_SUCCESS, retVal);
@ -297,6 +306,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescImage>();
@ -340,6 +350,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
EXPECT_EQ(CL_SUCCESS, retVal);
@ -351,6 +362,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
ASSERT_TRUE(pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).getExtendedTypeInfo().hasVmeExtendedDescriptor);
@ -395,6 +407,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
EXPECT_EQ(CL_SUCCESS, retVal);
@ -406,6 +419,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
const auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescSampler>();
@ -436,6 +450,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -449,6 +464,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
@ -468,7 +484,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
multiGraphicsAllocation.addAllocation(svmAlloc);
}
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation);
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation, 1u);
ASSERT_EQ(CL_SUCCESS, retVal);
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
@ -478,6 +494,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -491,6 +508,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
@ -514,6 +532,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
}
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
@ -527,6 +546,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescValue>().elements[0].offset);

View File

@ -381,7 +381,7 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHost
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
@ -397,7 +397,7 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStateless
for (auto pureStatefulBufferAccess : {false, true}) {
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
@ -557,7 +557,7 @@ TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccess
MockGraphicsAllocation gfxAllocation(ptr, 128);
gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
@ -577,7 +577,7 @@ TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAcce
void *ptr = &data;
MockGraphicsAllocation gfxAllocation(ptr, 128);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -110,7 +110,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreS
MockGraphicsAllocation svmAlloc(svmPtr, 256);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
@ -126,7 +126,7 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithUncacheableWhenSettingKernelArgThenKer
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
svmAlloc.setUncacheable(true);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs());
@ -138,13 +138,13 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithoutUncacheableAndKenelWithUncachebleAr
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
svmAlloc.setUncacheable(true);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs());
svmAlloc.setUncacheable(false);
pKernel->kernelArguments[0].isStatelessUncacheable = true;
retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->hasUncacheableStatelessArgs());
}
@ -155,7 +155,7 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg
MockGraphicsAllocation svmAlloc(svmPtr, 256);
pKernelInfo->argAsPtr(0).bindful = 0;
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
@ -179,7 +179,7 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
pKernelInfo->argAsPtr(0).bindful = 0;
pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc);
pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc, 0u);
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
@ -196,7 +196,7 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr
auto systemPointer = reinterpret_cast<void *>(0xfeedbac);
pKernelInfo->argAsPtr(0).bindful = 0;
pKernel->setArgSvmAlloc(0, systemPointer, nullptr);
pKernel->setArgSvmAlloc(0, systemPointer, nullptr, 0u);
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
@ -329,7 +329,7 @@ struct SetArgHandlerSetArgSvm {
struct SetArgHandlerSetArgSvmAlloc {
static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) {
kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc);
kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc, 0u);
}
static constexpr bool supportsOffsets() {
@ -420,7 +420,7 @@ TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpe
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
svmAlloc.setFlushL3Required(false);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
@ -435,7 +435,7 @@ TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectA
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.setFlushL3Required(true);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
@ -450,7 +450,7 @@ TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotEx
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
svmAlloc.setFlushL3Required(false);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
@ -506,7 +506,7 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvm
MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast<uint64_t>(svmPtr), 256);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -66,7 +66,7 @@ TEST_F(PatchedKernelTest, givenKernelWithoutAllArgsSetWhenIsPatchedIsCalledThenR
TEST_F(PatchedKernelTest, givenArgSvmAllocWhenArgIsSetThenArgIsPatched) {
EXPECT_FALSE(kernel->getKernelArguments()[0].isPatched);
kernel->setArgSvmAlloc(0, nullptr, nullptr);
kernel->setArgSvmAlloc(0, nullptr, nullptr, 0u);
EXPECT_TRUE(kernel->getKernelArguments()[0].isPatched);
}

View File

@ -273,7 +273,8 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs
retVal = pKernel->setArgSvmAlloc(
0,
ptrSVM,
pSvmAlloc);
pSvmAlloc,
0u);
ASSERT_EQ(CL_SUCCESS, retVal);
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +

View File

@ -44,4 +44,9 @@ bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) con
return false;
}
cl_int MockKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) {
++setArgSvmAllocCalls;
return Kernel::setArgSvmAlloc(argIndex, svmPtr, svmAlloc, allocId);
}
} // namespace NEO

View File

@ -245,8 +245,11 @@ class MockKernel : public Kernel {
bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const override;
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) override;
uint32_t makeResidentCalls = 0;
uint32_t getResidencyCalls = 0;
uint32_t setArgSvmAllocCalls = 0;
bool canKernelTransformImages = true;
bool isPatchedOverride = true;