From e1df8f911205b68884d65c2c50aefc65c01485a8 Mon Sep 17 00:00:00 2001 From: John Falkowski Date: Thu, 30 Nov 2023 17:31:22 +0000 Subject: [PATCH] feature: add SetBOChunkingSize debug variable Resolves: NEO-9562 Signed-off-by: John Falkowski --- .../debug_settings/debug_variables_base.inl | 3 +- .../os_interface/linux/drm_memory_manager.cpp | 38 ++++++++-- .../os_interface/linux/drm_memory_manager.h | 1 + shared/test/common/test_files/igdrcl.config | 1 + .../linux/drm_memory_manager_tests.cpp | 72 +++++++++++++++++++ 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index c6ab1c2709..bb0b24ff20 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -532,7 +532,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks") DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks") DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .") -DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use. Must be a power of two)") +DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use. Must be a power of two") +DECLARE_DEBUG_VARIABLE(int32_t, SetBOChunkingSize, -1, "Size of chunk in bytes: -1 = default, otherwise power of two chunk size in bytes") DECLARE_DEBUG_VARIABLE(int32_t, MinimalAllocationSizeForChunking, -1, "2097152: default, >0: size in B. Minimal size an allocation should have to use chunking.") DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 0c75c00d34..3f08afc7f7 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1901,12 +1901,40 @@ BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(uint32_t rootDe return bo; } +size_t DrmMemoryManager::getSizeOfChunk(size_t allocSize) { + size_t chunkSize = MemoryConstants::chunkThreshold; + size_t chunkMask = (~(MemoryConstants::chunkThreshold - 1)); + size_t numChunk = debugManager.flags.NumberOfBOChunks.get(); + if (debugManager.flags.SetBOChunkingSize.get() != -1) { + chunkSize = debugManager.flags.SetBOChunkingSize.get() & chunkMask; + if (chunkSize == 0) { + chunkSize = MemoryConstants::chunkThreshold; + } + numChunk = allocSize / chunkSize; + if (numChunk < 2) { + numChunk = 2; + } + } + if (numChunk > 1) { + chunkSize = (allocSize / numChunk) & chunkMask; + if (chunkSize == 0) { + chunkSize = MemoryConstants::chunkThreshold; + } + numChunk = allocSize / chunkSize; + while (((!Math::isPow2(numChunk)) || (chunkSize & (MemoryConstants::chunkThreshold - 1))) && (numChunk > 2)) { + numChunk -= 1; + chunkSize = allocSize / numChunk; + } + } + return chunkSize; +} + bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *allocation, uint64_t boAddress, size_t boSize, size_t maxOsContextCount) { auto &storageInfo = allocation->storageInfo; auto memoryInfo = drm->getMemoryInfo(); uint32_t handle = 0; auto memoryBanks = static_cast(storageInfo.memoryBanks.to_ulong()); - uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get(); + uint32_t numOfChunks = static_cast(boSize / getSizeOfChunk(boSize)); auto gmm = allocation->getGmm(0u); auto patIndex = drm->getPatIndex(gmm, allocation->getAllocationType(), CacheRegion::Default, CachePolicy::WriteBack, false, !allocation->isAllocatedInLocalMemoryPool()); @@ -1948,9 +1976,7 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation, (drm->getChunkingMode() & 0x02)) { boTotalChunkSize = allocation->getUnderlyingBufferSize(); - - uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get(); - size_t chunkingSize = boTotalChunkSize / numOfChunks; + size_t chunkingSize = getSizeOfChunk(boTotalChunkSize); // Do not chunk for sizes less than chunkThreshold // Do not chunk for single tile device memory @@ -2300,8 +2326,8 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks()); bool useChunking = false; - uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get(); - size_t chunkingSize = size / numOfChunks; + size_t chunkingSize = getSizeOfChunk(size); + uint32_t numOfChunks = static_cast(size / getSizeOfChunk(size)); // Dont chunk for sizes less than chunkThreshold or if debugging is enabled if (!executionEnvironment.isDebuggingEnabled() && diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index 32d73f9de2..1b047b40c6 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -95,6 +95,7 @@ class DrmMemoryManager : public MemoryManager { void createDeviceSpecificMemResources(uint32_t rootDeviceIndex) override; bool allowIndirectAllocationsAsPack(uint32_t rootDeviceIndex) override; Drm &getDrm(uint32_t rootDeviceIndex) const; + size_t getSizeOfChunk(size_t allocSize); protected: void registerSharedBoHandleAllocation(DrmAllocation *drmAllocation); diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 4c2997976e..c37876312d 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -524,6 +524,7 @@ EnableBOChunkingPrefetch = 0 EnableBOChunkingDevMemPrefetch = 0 EnableBOChunkingPreferredLocationHint = 0 NumberOfBOChunks = 2 +SetBOChunkingSize = -1 EnableBOChunking = -1 MinimalAllocationSizeForChunking = -1 DirectSubmissionControllerMaxTimeout = -1 diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index a6585fd5f7..639fd7058a 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -5300,6 +5300,78 @@ TEST_F(DrmMemoryManagerTest, givenPageFaultIsSupportedWhenCallingBindBoOnAllocat } } +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor2ChunksThenActualValueReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.NumberOfBOChunks.set(2); + size_t allocSize = 2097152; + size_t expectedSize = 1048576; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor3ChunksThenCorrectedValueReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.NumberOfBOChunks.set(3); + size_t allocSize = 2097152; + size_t expectedSize = 1048576; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor1ChunkThenDefaultMinimumChunkSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.NumberOfBOChunks.set(1); + size_t allocSize = 2097152; + size_t expectedSize = 65536; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkForTooManyChunksThenDefaultMinimumChunkSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.NumberOfBOChunks.set(10000); + size_t allocSize = 2097152; + size_t expectedSize = 65536; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeThenSameSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.SetBOChunkingSize.set(65536); + size_t allocSize = 2097152; + size_t expectedSize = 65536; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeThenCorrectedSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.SetBOChunkingSize.set(100000); + size_t allocSize = 2097152; + size_t expectedSize = 65536; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeTooLargeThenCorrectedSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.SetBOChunkingSize.set(4000000); + size_t allocSize = 2097152; + size_t expectedSize = 1048576; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeTooSmallThenCorrectedSizeReturned) { + DebugManagerStateRestore stateRestore; + debugManager.flags.SetBOChunkingSize.set(4000); + size_t allocSize = 2097152; + size_t expectedSize = 65536; + size_t chunkSize = memoryManager->getSizeOfChunk(allocSize); + EXPECT_EQ(expectedSize, chunkSize); +} + TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateBufferObjectInMemoryRegionIsCalledWithoutMemoryInfoThenNullBufferObjectIsReturned) { mock->memoryInfo.reset(nullptr);