From 75f3b416ae70c50aaf77108edd4e8e909856fbbc Mon Sep 17 00:00:00 2001 From: "Milczarek, Slawomir" Date: Wed, 24 Mar 2021 11:37:13 +0000 Subject: [PATCH] Gfx partition with 64KB and 2MB heap granularities Related-To: NEO-5507 Signed-off-by: Milczarek, Slawomir --- .../memory_manager/gfx_partition_tests.inl | 38 ++++++++++--------- .../unit_test/mocks/mock_gfx_partition.cpp | 3 +- .../os_interface/windows/wddm20_tests.cpp | 2 +- .../source/memory_manager/gfx_partition.cpp | 32 +++++++++++----- shared/source/memory_manager/gfx_partition.h | 5 ++- 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/opencl/test/unit_test/memory_manager/gfx_partition_tests.inl b/opencl/test/unit_test/memory_manager/gfx_partition_tests.inl index 92a21caa91..75ef43f1d0 100644 --- a/opencl/test/unit_test/memory_manager/gfx_partition_tests.inl +++ b/opencl/test/unit_test/memory_manager/gfx_partition_tests.inl @@ -80,17 +80,18 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t gfxBase += sizeHeap32; } - uint32_t numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; - uint64_t sizeStandard = alignDown((gfxTop - gfxBase) / numStandardHeaps, GfxPartition::heapGranularity); + constexpr uint32_t numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; + constexpr uint64_t maxStandardHeapGranularity = std::max(GfxPartition::heapGranularity, GfxPartition::heapGranularity2MB); + uint64_t maxStandardHeapSize = alignDown((gfxTop - gfxBase) / numStandardHeaps, maxStandardHeapGranularity); EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_STANDARD)); auto heapStandardBase = gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD); auto heapStandardSize = gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD); EXPECT_TRUE(isAligned(heapStandardBase)); EXPECT_EQ(heapStandardBase, gfxBase); - EXPECT_EQ(heapStandardSize, sizeStandard); + EXPECT_EQ(heapStandardSize, maxStandardHeapSize); - gfxBase += sizeStandard; + gfxBase += maxStandardHeapSize; EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_STANDARD64KB)); auto heapStandard64KbBase = gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB); auto heapStandard64KbSize = gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB); @@ -98,7 +99,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t EXPECT_EQ(heapStandard64KbBase, heapStandardBase + heapStandardSize); EXPECT_EQ(heapStandard64KbSize, heapStandardSize); - gfxBase += sizeStandard; + gfxBase += maxStandardHeapSize; EXPECT_TRUE(gfxPartition.heapInitialized(HeapIndex::HEAP_STANDARD2MB)); auto heapStandard2MbBase = gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD2MB); auto heapStandard2MbSize = gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD2MB); @@ -107,7 +108,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t EXPECT_EQ(heapStandard2MbSize, heapStandard64KbSize); EXPECT_LE(heapStandard2MbBase + heapStandard2MbSize, gfxTop); - EXPECT_LE(gfxBase + sizeStandard, gfxTop); + EXPECT_LE(gfxBase + maxStandardHeapSize, gfxTop); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_FRONT_WINDOW)); EXPECT_EQ(gfxPartition.getHeapMinimalAddress(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW), gfxPartition.getHeapBase(HeapIndex::HEAP_INTERNAL_DEVICE_FRONT_WINDOW)); @@ -121,6 +122,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t } const bool isInternalHeapType = heap == HeapIndex::HEAP_INTERNAL || heap == HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY; + const auto heapGranularity = (heap == HeapIndex::HEAP_STANDARD2MB) ? GfxPartition::heapGranularity2MB : GfxPartition::heapGranularity; if (heap == HeapIndex::HEAP_SVM) { EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap)); @@ -128,7 +130,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap) + GfxPartition::internalFrontWindowPoolSize); } else { EXPECT_GT(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap)); - EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap) + GfxPartition::heapGranularity); + EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap) + heapGranularity); } auto ptrBig = gfxPartition.heapAllocate(heap, sizeBig); @@ -137,7 +139,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t if (isInternalHeapType) { EXPECT_EQ(ptrBig, gfxPartition.getHeapBase(heap) + GfxPartition::internalFrontWindowPoolSize); } else { - EXPECT_EQ(ptrBig, gfxPartition.getHeapBase(heap) + GfxPartition::heapGranularity); + EXPECT_EQ(ptrBig, gfxPartition.getHeapBase(heap) + heapGranularity); } gfxPartition.heapFree(heap, ptrBig, sizeBig); @@ -146,7 +148,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t EXPECT_LT(gfxPartition.getHeapBase(heap), ptrSmall); EXPECT_GT(gfxPartition.getHeapLimit(heap), ptrSmall); - EXPECT_EQ(ptrSmall, gfxPartition.getHeapBase(heap) + gfxPartition.getHeapSize(heap) - GfxPartition::heapGranularity - sizeSmall); + EXPECT_EQ(ptrSmall, gfxPartition.getHeapBase(heap) + gfxPartition.getHeapSize(heap) - heapGranularity - sizeSmall); gfxPartition.heapFree(heap, ptrSmall, sizeSmall); } @@ -213,12 +215,13 @@ TEST(GfxPartitionTest, GivenFullRange48BitSvmHeap64KbSplitWhenTestingGfxPartitio uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : maxNBitValue(48 - 1) + 1; uint64_t gfxTop = maxNBitValue(48) + 1; - auto numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; - auto heapStandardSize = alignDown((gfxTop - gfxBase - 4 * sizeHeap32) / numStandardHeaps, GfxPartition::heapGranularity); - auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); + constexpr auto numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; + constexpr auto maxStandardHeapGranularity = std::max(GfxPartition::heapGranularity, GfxPartition::heapGranularity2MB); + auto maxStandardHeapSize = alignDown((gfxTop - gfxBase - 4 * sizeHeap32) / numStandardHeaps, maxStandardHeapGranularity); + auto heapStandard64KBSize = alignDown(maxStandardHeapSize / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); - EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); + EXPECT_EQ(gfxBase + 4 * sizeHeap32 + maxStandardHeapSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } TEST(GfxPartitionTest, GivenFullRange47BitSvmHeap64KbSplitWhenTestingGfxPartitionThenAllExpectationsAreMet) { @@ -231,12 +234,13 @@ TEST(GfxPartitionTest, GivenFullRange47BitSvmHeap64KbSplitWhenTestingGfxPartitio uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : (uint64_t)gfxPartition.getReservedCpuAddressRange(); uint64_t gfxTop = is32bit ? maxNBitValue(47) + 1 : gfxBase + gfxPartition.getReservedCpuAddressRangeSize(); - auto numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; - auto heapStandardSize = alignDown((gfxTop - gfxBase - 4 * sizeHeap32) / numStandardHeaps, GfxPartition::heapGranularity); - auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); + constexpr auto numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; + constexpr auto maxStandardHeapGranularity = std::max(GfxPartition::heapGranularity, GfxPartition::heapGranularity2MB); + auto maxStandardHeapSize = alignDown((gfxTop - gfxBase - 4 * sizeHeap32) / numStandardHeaps, maxStandardHeapGranularity); + auto heapStandard64KBSize = alignDown(maxStandardHeapSize / numRootDevices, GfxPartition::heapGranularity); EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); - EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); + EXPECT_EQ(gfxBase + 4 * sizeHeap32 + maxStandardHeapSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } class MockOsMemory : public OSMemory { diff --git a/opencl/test/unit_test/mocks/mock_gfx_partition.cpp b/opencl/test/unit_test/mocks/mock_gfx_partition.cpp index d95b0693c0..753f92d1b8 100644 --- a/opencl/test/unit_test/mocks/mock_gfx_partition.cpp +++ b/opencl/test/unit_test/mocks/mock_gfx_partition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -16,4 +16,5 @@ std::array(HeapIndex::TOTAL_HEAPS)> HeapIndex::HEAP_EXTERNAL, HeapIndex::HEAP_STANDARD, HeapIndex::HEAP_STANDARD64KB, + HeapIndex::HEAP_STANDARD2MB, HeapIndex::HEAP_SVM}}; diff --git a/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp b/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp index 48736c137e..505f32312e 100644 --- a/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/wddm20_tests.cpp @@ -1103,7 +1103,7 @@ TEST_F(WddmGfxPartitionTest, WhenInitializingGfxPartitionThenAllHeapsAreInitiali for (auto heap : MockGfxPartition::allHeapNames) { if (!gfxPartition.heapInitialized(heap)) { - EXPECT_TRUE(heap == HeapIndex::HEAP_SVM || heap == HeapIndex::HEAP_EXTENDED); + EXPECT_TRUE(heap == HeapIndex::HEAP_SVM || heap == HeapIndex::HEAP_STANDARD2MB || heap == HeapIndex::HEAP_EXTENDED); } else { EXPECT_TRUE(gfxPartition.heapInitialized(heap)); } diff --git a/shared/source/memory_manager/gfx_partition.cpp b/shared/source/memory_manager/gfx_partition.cpp index ecc71f167e..eb2a87c3ba 100644 --- a/shared/source/memory_manager/gfx_partition.cpp +++ b/shared/source/memory_manager/gfx_partition.cpp @@ -38,12 +38,17 @@ void GfxPartition::Heap::init(uint64_t base, uint64_t size, size_t allocationAli this->base = base; this->size = size; - // Exclude very first and very last 64K from GPU address range allocation - if (size > 2 * GfxPartition::heapGranularity) { - size -= 2 * GfxPartition::heapGranularity; + auto heapGranularity = GfxPartition::heapGranularity; + if (allocationAlignment > heapGranularity) { + heapGranularity = GfxPartition::heapGranularity2MB; } - alloc = std::make_unique(base + GfxPartition::heapGranularity, size, allocationAlignment); + // Exclude very first and very last 64K from GPU address range allocation + if (size > 2 * heapGranularity) { + size -= 2 * heapGranularity; + } + + alloc = std::make_unique(base + heapGranularity, size, allocationAlignment); } void GfxPartition::Heap::initExternalWithFrontWindow(uint64_t base, uint64_t size) { @@ -184,20 +189,27 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe gfxBase += gfxHeap32Size; } - uint32_t numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; - uint64_t gfxStandardSize = alignDown((gfxTop - gfxBase) / numStandardHeaps, heapGranularity); + constexpr uint32_t numStandardHeaps = static_cast(HeapIndex::HEAP_STANDARD2MB) - static_cast(HeapIndex::HEAP_STANDARD) + 1; + constexpr uint64_t maxStandardHeapGranularity = std::max(GfxPartition::heapGranularity, GfxPartition::heapGranularity2MB); + uint64_t maxStandardHeapSize = alignDown((gfxTop - gfxBase) / numStandardHeaps, maxStandardHeapGranularity); + auto gfxStandardSize = maxStandardHeapSize; heapInit(HeapIndex::HEAP_STANDARD, gfxBase, gfxStandardSize); - gfxBase += gfxStandardSize; + DEBUG_BREAK_IF(!isAligned(getHeapBase(HeapIndex::HEAP_STANDARD))); + + gfxBase += maxStandardHeapSize; // Split HEAP_STANDARD64K among root devices - auto gfxStandard64KBSize = alignDown(gfxStandardSize / numRootDevices, GfxPartition::heapGranularity); + auto gfxStandard64KBSize = alignDown(maxStandardHeapSize / numRootDevices, GfxPartition::heapGranularity); heapInitWithAllocationAlignment(HeapIndex::HEAP_STANDARD64KB, gfxBase + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize, MemoryConstants::pageSize64k); - gfxBase += gfxStandardSize; + DEBUG_BREAK_IF(!isAligned(getHeapBase(HeapIndex::HEAP_STANDARD64KB))); + + gfxBase += maxStandardHeapSize; // Split HEAP_STANDARD2MB among root devices - auto gfxStandard2MBSize = alignDown(gfxStandardSize / numRootDevices, GfxPartition::heapGranularity); + auto gfxStandard2MBSize = alignDown(maxStandardHeapSize / numRootDevices, GfxPartition::heapGranularity2MB); heapInitWithAllocationAlignment(HeapIndex::HEAP_STANDARD2MB, gfxBase + rootDeviceIndex * gfxStandard2MBSize, gfxStandard2MBSize, 2 * MemoryConstants::megaByte); + DEBUG_BREAK_IF(!isAligned(getHeapBase(HeapIndex::HEAP_STANDARD2MB))); return true; } diff --git a/shared/source/memory_manager/gfx_partition.h b/shared/source/memory_manager/gfx_partition.h index 946fad7fc6..137475dd1d 100644 --- a/shared/source/memory_manager/gfx_partition.h +++ b/shared/source/memory_manager/gfx_partition.h @@ -98,6 +98,8 @@ class GfxPartition { } else if (heapIndex == HeapIndex::HEAP_INTERNAL || heapIndex == HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY) { return getHeapBase(heapIndex) + GfxPartition::internalFrontWindowPoolSize; + } else if (heapIndex == HeapIndex::HEAP_STANDARD2MB) { + return getHeapBase(heapIndex) + GfxPartition::heapGranularity2MB; } return getHeapBase(heapIndex) + GfxPartition::heapGranularity; } @@ -105,7 +107,8 @@ class GfxPartition { bool isLimitedRange() { return getHeap(HeapIndex::HEAP_SVM).getSize() == 0ull; } - static const uint64_t heapGranularity = 2 * MemoryConstants::megaByte; + static constexpr uint64_t heapGranularity = MemoryConstants::pageSize64k; + static constexpr uint64_t heapGranularity2MB = 2 * MemoryConstants::megaByte; static constexpr size_t externalFrontWindowPoolSize = 16 * MemoryConstants::megaByte; static constexpr size_t internalFrontWindowPoolSize = 1 * MemoryConstants::megaByte;