mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Optimize setKernelArgSVMPointer
If same pointer is already set, we don't need to set it again. Related-To: NEO-6737 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
22ed1be1a3
commit
b9d8d8c0fd
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -206,4 +206,31 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet
|
||||
clSVMFree(pContext, ptrSvm);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgTwiceThenSetArgSvmAllocCalledOnlyFirstTime) {
|
||||
const ClDeviceInfo &devInfo = pDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities != 0) {
|
||||
EXPECT_EQ(0u, pMockKernel->setArgSvmAllocCalls);
|
||||
void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4);
|
||||
EXPECT_NE(nullptr, ptrSvm);
|
||||
|
||||
auto retVal = clSetKernelArgSVMPointer(
|
||||
pMockMultiDeviceKernel, // cl_kernel kernel
|
||||
0, // cl_uint arg_index
|
||||
ptrSvm // const void *arg_value
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, pMockKernel->setArgSvmAllocCalls);
|
||||
|
||||
retVal = clSetKernelArgSVMPointer(
|
||||
pMockMultiDeviceKernel, // cl_kernel kernel
|
||||
0, // cl_uint arg_index
|
||||
ptrSvm // const void *arg_value
|
||||
);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(1u, pMockKernel->setArgSvmAllocCalls);
|
||||
|
||||
clSVMFree(pContext, ptrSvm);
|
||||
}
|
||||
}
|
||||
} // namespace ULT
|
||||
|
@ -532,7 +532,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle
|
||||
MockBuffer::setAllocationType(gfxAllocation, pDevice->getRootDeviceEnvironment().getGmmClientContext(), true);
|
||||
|
||||
auto ptr = reinterpret_cast<void *>(gfxAllocation->getGpuAddressToPatch());
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation, 0u);
|
||||
|
||||
gmm.reset(gfxAllocation->getDefaultGmm());
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ struct BlitEnqueueTests : public ::testing::Test {
|
||||
for (uint32_t i = 0; i < allocs.size(); i++) {
|
||||
auto alloc = allocs[i];
|
||||
auto ptr = reinterpret_cast<void *>(alloc->getGpuAddressToPatch());
|
||||
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc);
|
||||
mockKernel->mockKernel->setArgSvmAlloc(i, ptr, alloc, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -476,7 +476,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
KernelObjsForAuxTranslation kernelObjects;
|
||||
@ -579,7 +579,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKerne
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
|
||||
@ -606,7 +606,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCall
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
|
||||
@ -627,7 +627,7 @@ TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNull
|
||||
mockKernel.kernelInfo.addArgBuffer(0, 0, 0, 0);
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, nullptr, nullptr, 0u);
|
||||
|
||||
testing::internal::CaptureStdout();
|
||||
|
||||
|
@ -240,7 +240,7 @@ class GivenCacheFlushAfterWalkerEnabledAndProperSteppingIsSetWhenAllocationRequi
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
|
||||
|
||||
cmdQ->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false;
|
||||
|
||||
@ -295,7 +295,7 @@ class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequires
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0);
|
||||
|
||||
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
|
||||
|
||||
@ -353,7 +353,7 @@ class GivenCacheFlushAfterWalkerDisabledAndProperSteppingIsSetWhenAllocationRequ
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
|
||||
|
||||
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
|
||||
|
||||
|
@ -266,7 +266,7 @@ class GivenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlush
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
|
||||
|
||||
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
|
||||
|
||||
@ -319,7 +319,7 @@ class GivenCacheFlushAfterWalkerAndTimestampPacketsEnabledWhenAllocationRequires
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
|
||||
|
||||
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
|
||||
|
||||
@ -375,7 +375,7 @@ class GivenCacheFlushAfterWalkerDisabledWhenAllocationRequiresCacheFlushThenFlus
|
||||
|
||||
mockKernel.kernelInfo.kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties(pDevice->getRootDeviceIndex(), true, MemoryConstants::pageSize, AllocationType::INTERNAL_HEAP, pDevice->getDeviceBitfield()));
|
||||
mockKernel.mockKernel->kernelArgRequiresCacheFlush.resize(1);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation);
|
||||
mockKernel.mockKernel->setArgSvmAlloc(0, svm, svmAllocation, 0u);
|
||||
|
||||
cmdQ->enqueueKernel(mockKernel, 1, nullptr, StatickSize3<16, 1, 1>(), StatickSize3<16, 1, 1>(), 0, nullptr, nullptr);
|
||||
|
||||
|
@ -129,6 +129,7 @@ TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_FALSE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -142,6 +143,7 @@ TEST_F(CloneKernelTest, GivenUnsetArgWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,6 +164,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -175,6 +178,7 @@ TEST_F(CloneKernelTest, GivenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
EXPECT_EQ(alignUp(slmSize, 1024), pClonedKernel[rootDeviceIndex]->slmTotalSize);
|
||||
}
|
||||
@ -200,6 +204,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -213,6 +218,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
@ -240,6 +246,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
@ -251,6 +258,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
@ -286,6 +294,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
@ -297,6 +306,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
|
||||
auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescImage>();
|
||||
@ -340,6 +350,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
@ -351,6 +362,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
|
||||
ASSERT_TRUE(pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).getExtendedTypeInfo().hasVmeExtendedDescriptor);
|
||||
@ -395,6 +407,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
@ -406,6 +419,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect)
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto crossThreadData = reinterpret_cast<uint32_t *>(pClonedKernel[rootDeviceIndex]->getCrossThreadData());
|
||||
const auto &clonedArg = pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescSampler>();
|
||||
@ -436,6 +450,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -449,6 +464,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) {
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
@ -468,7 +484,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
|
||||
multiGraphicsAllocation.addAllocation(svmAlloc);
|
||||
}
|
||||
|
||||
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation);
|
||||
retVal = pSourceMultiDeviceKernel->setArgSvmAlloc(0, memory, &multiGraphicsAllocation, 1u);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
for (auto &rootDeviceIndex : this->context->getRootDeviceIndices()) {
|
||||
@ -478,6 +494,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -491,6 +508,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto pKernelArg = (void **)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().stateless);
|
||||
@ -514,6 +532,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec
|
||||
EXPECT_NE(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(1u, pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_TRUE(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(0u, pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
}
|
||||
|
||||
retVal = pClonedMultiDeviceKernel->cloneKernel(pSourceMultiDeviceKernel.get());
|
||||
@ -527,6 +546,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).size, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).size);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getPatchedArgumentsNum(), pClonedKernel[rootDeviceIndex]->getPatchedArgumentsNum());
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched);
|
||||
EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId);
|
||||
|
||||
auto pKernelArg = (TypeParam *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() +
|
||||
pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as<ArgDescValue>().elements[0].offset);
|
||||
|
@ -381,7 +381,7 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationWhenHasDirectStatelessAccessToHost
|
||||
for (auto pureStatefulBufferAccess : {false, true}) {
|
||||
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
@ -397,7 +397,7 @@ TEST_F(KernelArgBufferTest, givenGfxAllocationInHostMemoryWhenHasDirectStateless
|
||||
for (auto pureStatefulBufferAccess : {false, true}) {
|
||||
pKernelInfo->setBufferStateful(0, pureStatefulBufferAccess);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(!pureStatefulBufferAccess, pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
@ -557,7 +557,7 @@ TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccess
|
||||
MockGraphicsAllocation gfxAllocation(ptr, 128);
|
||||
gfxAllocation.setAllocationType(AllocationType::BUFFER_HOST_MEMORY);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
@ -577,7 +577,7 @@ TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAcce
|
||||
void *ptr = &data;
|
||||
MockGraphicsAllocation gfxAllocation(ptr, 128);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -110,7 +110,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreS
|
||||
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, 256);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
|
||||
@ -126,7 +126,7 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithUncacheableWhenSettingKernelArgThenKer
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
svmAlloc.setUncacheable(true);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs());
|
||||
@ -138,13 +138,13 @@ TEST_F(KernelArgSvmTest, GivenSvmAllocWithoutUncacheableAndKenelWithUncachebleAr
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
svmAlloc.setUncacheable(true);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(pKernel->hasUncacheableStatelessArgs());
|
||||
|
||||
svmAlloc.setUncacheable(false);
|
||||
pKernel->kernelArguments[0].isStatelessUncacheable = true;
|
||||
retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc);
|
||||
retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_FALSE(pKernel->hasUncacheableStatelessArgs());
|
||||
}
|
||||
@ -155,7 +155,7 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg
|
||||
MockGraphicsAllocation svmAlloc(svmPtr, 256);
|
||||
|
||||
pKernelInfo->argAsPtr(0).bindful = 0;
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
@ -179,7 +179,7 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
|
||||
pKernelInfo->argAsPtr(0).bindful = 0;
|
||||
pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc);
|
||||
pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc, 0u);
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
@ -196,7 +196,7 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr
|
||||
auto systemPointer = reinterpret_cast<void *>(0xfeedbac);
|
||||
|
||||
pKernelInfo->argAsPtr(0).bindful = 0;
|
||||
pKernel->setArgSvmAlloc(0, systemPointer, nullptr);
|
||||
pKernel->setArgSvmAlloc(0, systemPointer, nullptr, 0u);
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
@ -329,7 +329,7 @@ struct SetArgHandlerSetArgSvm {
|
||||
|
||||
struct SetArgHandlerSetArgSvmAlloc {
|
||||
static void setArg(Kernel &kernel, uint32_t argNum, void *ptrToPatch, size_t allocSize, GraphicsAllocation &alloc) {
|
||||
kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc);
|
||||
kernel.setArgSvmAlloc(argNum, ptrToPatch, &alloc, 0u);
|
||||
}
|
||||
|
||||
static constexpr bool supportsOffsets() {
|
||||
@ -420,7 +420,7 @@ TEST_F(KernelArgSvmTest, givenWritableSvmAllocationWhenSettingAsArgThenDoNotExpe
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(true);
|
||||
svmAlloc.setFlushL3Required(false);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
@ -435,7 +435,7 @@ TEST_F(KernelArgSvmTest, givenCacheFlushSvmAllocationWhenSettingAsArgThenExpectA
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.setFlushL3Required(true);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(&svmAlloc, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
@ -450,7 +450,7 @@ TEST_F(KernelArgSvmTest, givenNoCacheFlushSvmAllocationWhenSettingAsArgThenNotEx
|
||||
svmAlloc.setMemObjectsAllocationWithWritableFlags(false);
|
||||
svmAlloc.setFlushL3Required(false);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, pKernel->kernelArgRequiresCacheFlush[0]);
|
||||
|
||||
@ -506,7 +506,7 @@ TEST_F(KernelArgSvmTest, givenCpuAddressIsNullWhenGpuAddressIsValidThenExpectSvm
|
||||
|
||||
MockGraphicsAllocation svmAlloc(nullptr, reinterpret_cast<uint64_t>(svmPtr), 256);
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -66,7 +66,7 @@ TEST_F(PatchedKernelTest, givenKernelWithoutAllArgsSetWhenIsPatchedIsCalledThenR
|
||||
|
||||
TEST_F(PatchedKernelTest, givenArgSvmAllocWhenArgIsSetThenArgIsPatched) {
|
||||
EXPECT_FALSE(kernel->getKernelArguments()[0].isPatched);
|
||||
kernel->setArgSvmAlloc(0, nullptr, nullptr);
|
||||
kernel->setArgSvmAlloc(0, nullptr, nullptr, 0u);
|
||||
EXPECT_TRUE(kernel->getKernelArguments()[0].isPatched);
|
||||
}
|
||||
|
||||
|
@ -273,7 +273,8 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs
|
||||
retVal = pKernel->setArgSvmAlloc(
|
||||
0,
|
||||
ptrSVM,
|
||||
pSvmAlloc);
|
||||
pSvmAlloc,
|
||||
0u);
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto pKernelArg = (void **)(pKernel->getCrossThreadData() +
|
||||
|
@ -44,4 +44,9 @@ bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) con
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_int MockKernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) {
|
||||
++setArgSvmAllocCalls;
|
||||
return Kernel::setArgSvmAlloc(argIndex, svmPtr, svmAlloc, allocId);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -245,8 +245,11 @@ class MockKernel : public Kernel {
|
||||
|
||||
bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const override;
|
||||
|
||||
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc, uint32_t allocId) override;
|
||||
|
||||
uint32_t makeResidentCalls = 0;
|
||||
uint32_t getResidencyCalls = 0;
|
||||
uint32_t setArgSvmAllocCalls = 0;
|
||||
|
||||
bool canKernelTransformImages = true;
|
||||
bool isPatchedOverride = true;
|
||||
|
Reference in New Issue
Block a user