From 33e8f7377592fa6c5d23f4eb35eff709f187263d Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Tue, 11 May 2021 09:23:27 +0000 Subject: [PATCH] Enable custom allocation alignments on Linux Related-To: NEO-5750 Signed-off-by: Maciej Dziuban --- ...ager_allocate_in_device_pool_tests_dg1.cpp | 81 +++++++++++++++++++ .../test/unit_test/test_files/igdrcl.config | 1 + .../debug_settings/debug_variables_base.inl | 3 + shared/source/memory_manager/gfx_partition.h | 5 ++ ...ry_manager_allocate_in_device_pool_dg1.cpp | 12 ++- shared/source/utilities/heap_allocator.h | 6 +- .../utilities/heap_allocator_tests.cpp | 10 +++ 7 files changed, 115 insertions(+), 3 deletions(-) diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp index bd7caf3fa4..dd1bfc6856 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp @@ -1025,6 +1025,87 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenExtendedHeapPreferredAnd2MbAlignmen } } +TEST_F(DrmMemoryManagerLocalMemoryTest, givenCustomAlignmentWhenAllocatingAllocationBiggerThanTheAlignmentThenAlignProperly) { + AllocationData allocationData; + allocationData.allFlags = 0; + allocationData.flags.allocateMemory = true; + allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.type = GraphicsAllocation::AllocationType::BUFFER; + allocationData.flags.resource48Bit = true; + MemoryManager::AllocationStatus allocationStatus; + + { + // size==2MB, use 2MB heap + DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(2 * MemoryConstants::megaByte); + allocationData.size = 2 * MemoryConstants::megaByte; + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*allocation, HeapIndex::HEAP_STANDARD2MB)); + EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 2 * MemoryConstants::megaByte)); + memoryManager->freeGraphicsMemory(allocation); + } + + { + // size > 2MB, use 2MB heap + DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(16 * MemoryConstants::megaByte); + allocationData.size = 16 * MemoryConstants::megaByte; + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*allocation, HeapIndex::HEAP_STANDARD2MB)); + EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 16 * MemoryConstants::megaByte)); + memoryManager->freeGraphicsMemory(allocation); + } + + { + // size < 2MB, use 64KB heap + DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(8 * MemoryConstants::pageSize64k); + allocationData.size = 8 * MemoryConstants::pageSize64k; + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*allocation, HeapIndex::HEAP_STANDARD64KB)); + EXPECT_TRUE(isAligned(allocation->getGpuAddress(), 8 * MemoryConstants::pageSize64k)); + memoryManager->freeGraphicsMemory(allocation); + } +} + +TEST_F(DrmMemoryManagerLocalMemoryTest, givenCustomAlignmentWhenAllocatingAllocationLessThanTheAlignmentThenIgnoreCustomAlignment) { + AllocationData allocationData; + allocationData.allFlags = 0; + allocationData.size = 3 * MemoryConstants::megaByte; + allocationData.flags.allocateMemory = true; + allocationData.rootDeviceIndex = rootDeviceIndex; + allocationData.type = GraphicsAllocation::AllocationType::BUFFER; + allocationData.flags.resource48Bit = true; + MemoryManager::AllocationStatus allocationStatus; + + { + // Too small allocation, fallback to 64KB heap + DebugManager.flags.AlignLocalMemoryVaTo2MB.set(0); + DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(32 * MemoryConstants::megaByte); + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*allocation, HeapIndex::HEAP_STANDARD64KB)); + EXPECT_FALSE(isAligned(allocation->getGpuAddress(), 32 * MemoryConstants::megaByte)); + memoryManager->freeGraphicsMemory(allocation); + } + + { + // Too small allocation, fallback to 2MB heap + DebugManager.flags.AlignLocalMemoryVaTo2MB.set(1); + DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.set(32 * MemoryConstants::megaByte); + auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocationData, allocationStatus); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(MemoryManager::AllocationStatus::Success, allocationStatus); + EXPECT_TRUE(isAllocationWithinHeap(*allocation, HeapIndex::HEAP_STANDARD2MB)); + EXPECT_FALSE(isAligned(allocation->getGpuAddress(), 32 * MemoryConstants::megaByte)); + memoryManager->freeGraphicsMemory(allocation); + } +} + TEST_F(DrmMemoryManagerLocalMemoryTest, givenUnsupportedTypeWhenAllocatingInDevicePoolThenRetryInNonDevicePoolStatusAndNullptrIsReturned) { MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; AllocationData allocData; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 1271e5fb9a..ae5affc4ef 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -238,6 +238,7 @@ DebuggerLogBitmask = 0 GTPinAllocateBufferInSharedMemory = -1 DeferOsContextInitialization = -1 DebuggerOptDisable = -1 +ExperimentalEnableCustomLocalMemoryAlignment = 0 AlignLocalMemoryVaTo2MB = -1 EngineInstancedSubDevices = 0 OverrideTimestampPacketSize = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index b9c7d14412..eb210d77be 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -230,6 +230,9 @@ DECLARE_DEBUG_VARIABLE(int32_t, WaitLoopCount, -1, "-1: use default, >=0: number DECLARE_DEBUG_VARIABLE(int32_t, GTPinAllocateBufferInSharedMemory, -1, "Force GTPin to allocate buffer in shared memory") DECLARE_DEBUG_VARIABLE(int32_t, AlignLocalMemoryVaTo2MB, -1, "Allow 2MB pages for allocations with size>=2MB. On Linux it means aligned VA, on Windows it means aligned size. -1: default, 0: disabled, 1: enabled") +/*EXPERIMENTAL TOGGLES*/ +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalEnableCustomLocalMemoryAlignment, 0, "Align local memory allocations to a given value. Works only with allocations at least as big as the value. 0: no effect, 2097152: 2 megabytes, 1073741824: 1 gigabyte") + /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features") diff --git a/shared/source/memory_manager/gfx_partition.h b/shared/source/memory_manager/gfx_partition.h index 520aeca3df..864caeb129 100644 --- a/shared/source/memory_manager/gfx_partition.h +++ b/shared/source/memory_manager/gfx_partition.h @@ -69,6 +69,10 @@ class GfxPartition { return getHeap(heapIndex).allocate(size); } + uint64_t heapAllocateWithCustomAlignment(HeapIndex heapIndex, size_t &size, size_t alignment) { + return getHeap(heapIndex).allocateWithCustomAlignment(size, alignment); + } + MOCKABLE_VIRTUAL void heapFree(HeapIndex heapIndex, uint64_t ptr, size_t size) { getHeap(heapIndex).free(ptr, size); } @@ -129,6 +133,7 @@ class GfxPartition { uint64_t getSize() const { return size; } uint64_t getLimit() const { return size ? base + size - 1 : 0; } uint64_t allocate(size_t &size) { return alloc->allocate(size); } + uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) { return alloc->allocateWithCustomAlignment(sizeToAllocate, alignment); } void free(uint64_t ptr, size_t size) { alloc->free(ptr, size); } protected: diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp index 10cd773a70..3771d8cbcc 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp @@ -144,17 +144,25 @@ uint64_t getGpuAddress(HeapAssigner &heapAssigner, const HardwareInfo &hwInfo, G sizeAllocated = 0; break; default: - const bool prefer2MBAlignment = DebugManager.flags.AlignLocalMemoryVaTo2MB.get() != 0 && sizeAllocated >= 2 * MemoryConstants::megaByte; + const size_t customAlignment = static_cast(DebugManager.flags.ExperimentalEnableCustomLocalMemoryAlignment.get()); + const bool preferCustomAlignment = customAlignment > 0 && sizeAllocated >= customAlignment; + const bool prefer2MBAlignment = DebugManager.flags.AlignLocalMemoryVaTo2MB.get() != 0 && + sizeAllocated >= 2 * MemoryConstants::megaByte && + (!preferCustomAlignment || customAlignment <= 2 * MemoryConstants::megaByte); const bool prefer57bitAddressing = gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0 && !resource48Bit; auto heapIndex = HeapIndex::HEAP_STANDARD64KB; + size_t alignment = 0u; if (prefer2MBAlignment) { heapIndex = HeapIndex::HEAP_STANDARD2MB; + } else if (preferCustomAlignment) { + heapIndex = customAlignment > 2 * MemoryConstants::megaByte ? HeapIndex::HEAP_STANDARD2MB : HeapIndex::HEAP_STANDARD64KB; + alignment = customAlignment; } else if (prefer57bitAddressing) { heapIndex = HeapIndex::HEAP_EXTENDED; } - gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heapIndex, sizeAllocated)); + gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocateWithCustomAlignment(heapIndex, sizeAllocated, alignment)); break; } return gpuAddress; diff --git a/shared/source/utilities/heap_allocator.h b/shared/source/utilities/heap_allocator.h index f061c9d500..ec1232aeef 100644 --- a/shared/source/utilities/heap_allocator.h +++ b/shared/source/utilities/heap_allocator.h @@ -40,10 +40,14 @@ class HeapAllocator { } uint64_t allocate(size_t &sizeToAllocate) { - return allocateWithCustomAlignment(sizeToAllocate, this->allocationAlignment); + return allocateWithCustomAlignment(sizeToAllocate, 0u); } uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) { + if (alignment == 0) { + alignment = this->allocationAlignment; + } + UNRECOVERABLE_IF(alignment % allocationAlignment != 0); // custom alignment have to be a multiple of allocator alignment sizeToAllocate = alignUp(sizeToAllocate, allocationAlignment); diff --git a/shared/test/unit_test/utilities/heap_allocator_tests.cpp b/shared/test/unit_test/utilities/heap_allocator_tests.cpp index 80ba3016e3..bafe16ede5 100644 --- a/shared/test/unit_test/utilities/heap_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/heap_allocator_tests.cpp @@ -1363,3 +1363,13 @@ TEST(HeapAllocatorTest, givenUnalignedFreedChunkAvailableWhenAllocatingMemoryWit EXPECT_EQ(1u, heapAllocator.getFreedChunksBig().size()); EXPECT_EQ(heapSize - 3 * ptrSize - MemoryConstants::pageSize, heapAllocator.getavailableSize()); } + +TEST(HeapAllocatorTest, givenZeroAlignmentPassedWhenAllocatingMemoryWithCustomAlignmentThenUseDefaultAllocatorAlignment) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * 4096u; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, 0); + + size_t ptrSize = 1; + uint64_t ptr = heapAllocator.allocateWithCustomAlignment(ptrSize, 0u); + EXPECT_EQ(alignUp(heapBase, allocationAlignment), ptr); +}