From e060536a32cfa43dff41c3e03dc5b0c20b205a86 Mon Sep 17 00:00:00 2001 From: Katarzyna Cencelewska Date: Wed, 4 Nov 2020 14:55:09 +0000 Subject: [PATCH] Add check in allocateGraphicsMemory64kb to not use cpuPtr when isRenderCompressed Related-To: NEO-4532 Signed-off-by: Katarzyna Cencelewska --- .../unit_test/gmm_helper/gmm_helper_tests.cpp | 68 +++++++++++++++++++ .../windows/mock_wddm_memory_manager.h | 16 ++++- .../windows/wddm_memory_manager_tests.cpp | 55 +++++++++++++++ shared/source/gmm_helper/gmm.cpp | 12 ++++ shared/source/gmm_helper/gmm_utils.cpp | 2 - .../source/memory_manager/memory_manager.cpp | 1 + .../windows/wddm_memory_manager.cpp | 6 +- 7 files changed, 152 insertions(+), 8 deletions(-) diff --git a/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp b/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp index cd4b786c47..b8da76e65c 100644 --- a/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp +++ b/opencl/test/unit_test/gmm_helper/gmm_helper_tests.cpp @@ -45,8 +45,10 @@ struct GmmTests : public MockExecutionEnvironmentGmmFixtureTest { void SetUp() override { MockExecutionEnvironmentGmmFixture::SetUp(); rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); + localPlatformDevice = rootDeviceEnvironment->getMutableHardwareInfo(); } RootDeviceEnvironment *rootDeviceEnvironment = nullptr; + HardwareInfo *localPlatformDevice = nullptr; }; TEST(GmmGlTests, givenGmmWhenAskedforCubeFaceIndexThenProperValueIsReturned) { @@ -811,4 +813,70 @@ TEST(GmmHelperTest, givenValidGmmFunctionsWhenCreateGmmHelperWithoutOsInterfaceT EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType); } +using GmmCompressionTest = GmmTests; +TEST_F(GmmCompressionTest, givenEnabledAndPreferredE2ECWhenApplyingForBuffersThenSetValidFlags) { + std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, false)); + gmm->resourceParams = {}; + + localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; + + gmm->applyAuxFlagsForBuffer(true); + EXPECT_EQ(1u, gmm->resourceParams.Flags.Info.RenderCompressed); + EXPECT_EQ(1u, gmm->resourceParams.Flags.Gpu.CCS); + EXPECT_EQ(1u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); + EXPECT_TRUE(gmm->isRenderCompressed); +} + +TEST_F(GmmCompressionTest, givenDisabledE2ECAndEnabledDebugFlagWhenApplyingForBuffersThenSetValidFlags) { + DebugManagerStateRestore restore; + Gmm gmm(getGmmClientContext(), nullptr, 1, false); + gmm.resourceParams = {}; + + DebugManager.flags.RenderCompressedBuffersEnabled.set(1); + localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = false; + + gmm.applyAuxFlagsForBuffer(true); + EXPECT_EQ(1u, gmm.resourceParams.Flags.Info.RenderCompressed); + EXPECT_EQ(1u, gmm.resourceParams.Flags.Gpu.CCS); + EXPECT_EQ(1u, gmm.resourceParams.Flags.Gpu.UnifiedAuxSurface); + EXPECT_TRUE(gmm.isRenderCompressed); + + gmm.resourceParams = {}; + gmm.isRenderCompressed = false; + DebugManager.flags.RenderCompressedBuffersEnabled.set(0); + localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; + + gmm.applyAuxFlagsForBuffer(true); + EXPECT_EQ(0u, gmm.resourceParams.Flags.Info.RenderCompressed); + EXPECT_EQ(0u, gmm.resourceParams.Flags.Gpu.CCS); + EXPECT_EQ(0u, gmm.resourceParams.Flags.Gpu.UnifiedAuxSurface); + EXPECT_FALSE(gmm.isRenderCompressed); +} + +TEST_F(GmmCompressionTest, givenEnabledAndNotPreferredE2ECWhenApplyingForBuffersThenDontSetValidFlags) { + std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, false)); + gmm->resourceParams = {}; + + localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = true; + + gmm->applyAuxFlagsForBuffer(false); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.RenderCompressed); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.CCS); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); + EXPECT_FALSE(gmm->isRenderCompressed); +} + +TEST_F(GmmCompressionTest, givenDisabledAndPreferredE2ECWhenApplyingForBuffersThenDontSetValidFlags) { + std::unique_ptr gmm(new Gmm(getGmmClientContext(), nullptr, 1, false)); + gmm->resourceParams = {}; + + localPlatformDevice->capabilityTable.ftrRenderCompressedBuffers = false; + + gmm->applyAuxFlagsForBuffer(true); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Info.RenderCompressed); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.CCS); + EXPECT_EQ(0u, gmm->resourceParams.Flags.Gpu.UnifiedAuxSurface); + EXPECT_FALSE(gmm->isRenderCompressed); +} + } // namespace NEO diff --git a/opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h b/opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h index b3eaf9009c..7a4ac080b7 100644 --- a/opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h +++ b/opencl/test/unit_test/os_interface/windows/mock_wddm_memory_manager.h @@ -18,9 +18,7 @@ class MockWddmMemoryManager : public MemoryManagerCreate { using BaseClass = WddmMemoryManager; public: - using BaseClass::allocateGraphicsMemory64kb; using BaseClass::allocateGraphicsMemoryForNonSvmHostPtr; - using BaseClass::allocateGraphicsMemoryInDevicePool; using BaseClass::allocateGraphicsMemoryWithGpuVa; using BaseClass::allocateGraphicsMemoryWithProperties; using BaseClass::allocateShareableMemory; @@ -33,6 +31,18 @@ class MockWddmMemoryManager : public MemoryManagerCreate { using MemoryManagerCreate::MemoryManagerCreate; using BaseClass::getHugeGfxMemoryChunkSize; + GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override { + allocationGraphicsMemory64kbCreated = true; + return BaseClass::allocateGraphicsMemory64kb(allocationData); + } + GraphicsAllocation *allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) override { + if (allocateGraphicsMemoryInNonDevicePool) { + status = AllocationStatus::RetryInNonDevicePool; + return nullptr; + } + return BaseClass::allocateGraphicsMemoryInDevicePool(allocationData, status); + } + size_t hugeGfxMemoryChunkSize = BaseClass::getHugeGfxMemoryChunkSize(); size_t getHugeGfxMemoryChunkSize() const override { return hugeGfxMemoryChunkSize; } @@ -63,5 +73,7 @@ class MockWddmMemoryManager : public MemoryManagerCreate { } uint32_t freeGraphicsMemoryImplCalled = 0u; + bool allocationGraphicsMemory64kbCreated = false; + bool allocateGraphicsMemoryInNonDevicePool = false; }; } // namespace NEO diff --git a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 7f281f2019..b9fc18c714 100644 --- a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -35,6 +35,8 @@ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/os_interface/windows/mock_wddm_allocation.h" +#include "mock_gmm_client_context.h" + using namespace NEO; using namespace ::testing; @@ -679,6 +681,59 @@ TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerSizeZeroWhenCreateFromShared memoryManager->freeGraphicsMemory(gpuAllocation); } +HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenAllocateGraphicsMemoryWithSetAllocattionPropertisWithAllocationTypeBufferCompressedIsCalledThenIsRendeCompressedTrueAndGpuMappingIsSetWithGoodAddressRange) { + void *ptr = reinterpret_cast(0x1001); + auto size = MemoryConstants::pageSize; + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; + rootDeviceEnvironment->setHwInfo(&hwInfo); + + auto memoryManager = std::make_unique(true, false, *executionEnvironment); + memoryManager->allocateGraphicsMemoryInNonDevicePool = true; + auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockRootDeviceIndex, true, size, GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, mockDeviceBitfield}, ptr); + + auto gfxPartition = memoryManager->getGfxPartition(mockRootDeviceIndex); + D3DGPU_VIRTUAL_ADDRESS standard64kbRangeMinimumAddress = gfxPartition->getHeapMinimalAddress(HeapIndex::HEAP_STANDARD64KB); + D3DGPU_VIRTUAL_ADDRESS standard64kbRangeMaximumAddress = gfxPartition->getHeapLimit(HeapIndex::HEAP_STANDARD64KB); + + ASSERT_NE(nullptr, allocation); + EXPECT_TRUE(memoryManager->allocationGraphicsMemory64kbCreated); + EXPECT_TRUE(allocation->getDefaultGmm()->isRenderCompressed); + if ((is32bit || rootDeviceEnvironment->isFullRangeSvm()) && + allocation->getDefaultGmm()->gmmResourceInfo->is64KBPageSuitable()) { + EXPECT_GE(GmmHelper::decanonize(allocation->getGpuAddress()), standard64kbRangeMinimumAddress); + EXPECT_LE(GmmHelper::decanonize(allocation->getGpuAddress()), standard64kbRangeMaximumAddress); + } + + memoryManager->freeGraphicsMemory(allocation); +} + +HWTEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenAllocateGraphicsMemoryWithSetAllocattionPropertisWithAllocationTypeBufferIsCalledThenIsRendeCompressedFalseAndCorrectAddressRange) { + void *ptr = reinterpret_cast(0x1001); + auto size = MemoryConstants::pageSize; + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; + rootDeviceEnvironment->setHwInfo(&hwInfo); + + auto memoryManager = std::make_unique(false, false, *executionEnvironment); + memoryManager->allocateGraphicsMemoryInNonDevicePool = true; + auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{mockRootDeviceIndex, true, size, GraphicsAllocation::AllocationType::BUFFER, mockDeviceBitfield}, ptr); + + auto gfxPartition = memoryManager->getGfxPartition(mockRootDeviceIndex); + D3DGPU_VIRTUAL_ADDRESS svmRangeMinimumAddress = gfxPartition->getHeapMinimalAddress(HeapIndex::HEAP_SVM); + D3DGPU_VIRTUAL_ADDRESS svmRangeMaximumAddress = gfxPartition->getHeapLimit(HeapIndex::HEAP_SVM); + + ASSERT_NE(nullptr, allocation); + EXPECT_FALSE(memoryManager->allocationGraphicsMemory64kbCreated); + EXPECT_FALSE(allocation->getDefaultGmm()->isRenderCompressed); + if (is32bit || rootDeviceEnvironment->isFullRangeSvm()) { + + EXPECT_GE(GmmHelper::decanonize(allocation->getGpuAddress()), svmRangeMinimumAddress); + EXPECT_LE(GmmHelper::decanonize(allocation->getGpuAddress()), svmRangeMaximumAddress); + } + memoryManager->freeGraphicsMemory(allocation); +} + TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenCreateFromSharedHandleFailsThenReturnNull) { auto osHandle = 1u; auto size = 4096u; diff --git a/shared/source/gmm_helper/gmm.cpp b/shared/source/gmm_helper/gmm.cpp index 1ae4646b14..c37f5bc9c1 100644 --- a/shared/source/gmm_helper/gmm.cpp +++ b/shared/source/gmm_helper/gmm.cpp @@ -123,6 +123,18 @@ void Gmm::setupImageResourceParams(ImageInfo &imgInfo) { applyAuxFlagsForImage(imgInfo); } +void Gmm::applyAuxFlagsForBuffer(bool preferRenderCompression) { + bool allowRenderCompression = HwHelper::renderCompressedBuffersSupported(*clientContext->getHardwareInfo()) && + preferRenderCompression; + + if (allowRenderCompression) { + resourceParams.Flags.Info.RenderCompressed = 1; + resourceParams.Flags.Gpu.CCS = 1; + resourceParams.Flags.Gpu.UnifiedAuxSurface = 1; + isRenderCompressed = true; + } +} + void Gmm::queryImageParams(ImageInfo &imgInfo) { auto imageCount = this->gmmResourceInfo->getArraySize(); imgInfo.size = this->gmmResourceInfo->getSizeAllocation(); diff --git a/shared/source/gmm_helper/gmm_utils.cpp b/shared/source/gmm_helper/gmm_utils.cpp index a5350327c3..497e4d484e 100644 --- a/shared/source/gmm_helper/gmm_utils.cpp +++ b/shared/source/gmm_helper/gmm_utils.cpp @@ -12,6 +12,4 @@ using namespace NEO; void Gmm::applyAuxFlagsForImage(ImageInfo &imgInfo) {} -void Gmm::applyAuxFlagsForBuffer(bool preferRenderCompression) {} - void Gmm::applyMemoryFlags(bool systemMemoryPool, StorageInfo &storageInfo) { this->useSystemMemoryPool = systemMemoryPool; } diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index e1df154a11..647cc38cee 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -476,6 +476,7 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemory(const AllocationData & return allocation; } bool use32Allocator = heapAssigner.use32BitHeap(allocationData.type); + if (use32Allocator || (force32bitAllocations && allocationData.flags.allow32Bit && is64bit)) { auto hwInfo = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo(); diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 48aee4142e..6ba6a23200 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -115,13 +115,11 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemory64kb(const Allocati return nullptr; } - auto cpuPtr = lockResource(wddmAllocation.get()); + auto cpuPtr = gmm->isRenderCompressed ? nullptr : lockResource(wddmAllocation.get()); - // 64kb map is not needed auto status = mapGpuVirtualAddress(wddmAllocation.get(), cpuPtr); DEBUG_BREAK_IF(!status); wddmAllocation->setCpuAddress(cpuPtr); - return wddmAllocation.release(); } @@ -204,7 +202,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryWithAlignment(const maxOsContextCount); wddmAllocation->setDriverAllocatedCpuPtr(pSysMem); - gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), pSysMem, sizeAligned, allocationData.flags.uncacheable); + gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), pSysMem, sizeAligned, allocationData.flags.uncacheable, allocationData.flags.preferRenderCompressed, true, allocationData.storageInfo); wddmAllocation->setDefaultGmm(gmm); void *mapPtr = wddmAllocation->getAlignedCpuPtr();