From a56b4133929d2190592dc9399180aa74d05838bd Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Sat, 10 Apr 2021 01:41:54 +0000 Subject: [PATCH] Improve support for L0 uncached device allocations (2) Make sure UNCACHED flags are used in stateful paths. Related-To: NEO-5500 Signed-off-by: Jaime Arteaga --- level_zero/core/source/kernel/kernel_imp.cpp | 25 ++++++++++--------- .../unit_tests/sources/module/test_module.cpp | 14 ++++++++--- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index adb698d829..9a11498cff 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -496,19 +496,20 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, val); if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) { setBufferSurfaceState(argIndex, reinterpret_cast(val), allocation); - } else { - auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); - if (allocData) { - bool argWasUncacheable = isArgUncached[argIndex]; - bool argIsUncacheable = allocData->allocationFlagsProperty.flags.locallyUncachedResource; - if (argWasUncacheable == false && argIsUncacheable) { - kernelRequiresUncachedMocsCount++; - } else if (argWasUncacheable && argIsUncacheable == false) { - kernelRequiresUncachedMocsCount--; - } - this->setKernelArgUncached(argIndex, argIsUncacheable); - } } + + auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); + if (allocData) { + bool argWasUncacheable = isArgUncached[argIndex]; + bool argIsUncacheable = allocData->allocationFlagsProperty.flags.locallyUncachedResource; + if (argWasUncacheable == false && argIsUncacheable) { + kernelRequiresUncachedMocsCount++; + } else if (argWasUncacheable && argIsUncacheable == false) { + kernelRequiresUncachedMocsCount--; + } + this->setKernelArgUncached(argIndex, argIsUncacheable); + } + residencyContainer[argIndex] = allocation; return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index e5d9593150..1f5bbbb2c6 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -141,6 +141,10 @@ HWTEST2_F(ModuleTest, givenNonPatchedTokenThenSurfaceBaseAddressIsCorrectlySet, using ModuleUncachedBufferTest = Test; +struct KernelImpUncachedTest : public KernelImp { + using KernelImp::kernelRequiresUncachedMocsCount; +}; + HWTEST2_F(ModuleUncachedBufferTest, givenKernelWithNonUncachedArgumentAndPreviouslyNotSetUncachedThenUncachedMocsNotSet, ModuleTestSupport) { ze_kernel_handle_t kernelHandle; @@ -186,7 +190,7 @@ HWTEST2_F(ModuleUncachedBufferTest, EXPECT_EQ(ZE_RESULT_SUCCESS, res); - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); + auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; @@ -202,6 +206,7 @@ HWTEST2_F(ModuleUncachedBufferTest, uint32_t argIndex = 0u; kernelImp->setKernelArgUncached(argIndex, true); + kernelImp->kernelRequiresUncachedMocsCount++; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); @@ -238,7 +243,7 @@ HWTEST2_F(ModuleUncachedBufferTest, uint32_t argIndex = 0u; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); - EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); + EXPECT_TRUE(kernelImp->getKernelRequiresUncachedMocs()); Kernel::fromHandle(kernelHandle)->destroy(); @@ -257,7 +262,7 @@ HWTEST2_F(ModuleUncachedBufferTest, EXPECT_EQ(ZE_RESULT_SUCCESS, res); - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); + auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); void *devicePtr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; @@ -274,8 +279,9 @@ HWTEST2_F(ModuleUncachedBufferTest, uint32_t argIndex = 0u; kernelImp->setKernelArgUncached(argIndex, true); + kernelImp->kernelRequiresUncachedMocsCount++; kernelImp->setArgBufferWithAlloc(argIndex, reinterpret_cast(devicePtr), gpuAlloc); - EXPECT_FALSE(kernelImp->getKernelRequiresUncachedMocs()); + EXPECT_TRUE(kernelImp->getKernelRequiresUncachedMocs()); auto argInfo = kernelImp->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); auto surfaceStateAddressRaw = ptrOffset(kernelImp->getSurfaceStateHeapData(), argInfo.bindful);