From a34268fc549a04cb4bc10b0a9f7e446368e7a81c Mon Sep 17 00:00:00 2001 From: Igor Venevtsev Date: Wed, 29 Jan 2020 17:48:36 +0100 Subject: [PATCH] Split Standard 64KB heap for multiple root devices Resolves: NEO-4196 Change-Id: Ibd76eb2447791e34ba3e1c27bbc7f5cd09325e98 Signed-off-by: Igor Venevtsev --- core/memory_manager/gfx_partition.cpp | 8 ++-- core/memory_manager/gfx_partition.h | 4 +- .../gfx_partition_init_additional_range.cpp | 4 +- .../os_agnostic_memory_manager.cpp | 2 +- .../os_interface/linux/drm_memory_manager.cpp | 2 +- runtime/os_interface/windows/wddm/wddm.cpp | 6 ++- runtime/os_interface/windows/wddm/wddm.h | 2 +- .../windows/wddm_memory_manager.cpp | 2 +- .../memory_manager/gfx_partition_tests.inl | 42 +++++++++++++++++-- .../mocks/linux/mock_drm_memory_manager.h | 2 +- unit_tests/mocks/mock_memory_manager.h | 2 +- .../linux/drm_memory_manager_tests.cpp | 2 +- .../os_interface/windows/wddm20_tests.cpp | 22 +++++++++- 13 files changed, 79 insertions(+), 21 deletions(-) diff --git a/core/memory_manager/gfx_partition.cpp b/core/memory_manager/gfx_partition.cpp index e25ccb7604..61412ecc4d 100644 --- a/core/memory_manager/gfx_partition.cpp +++ b/core/memory_manager/gfx_partition.cpp @@ -54,7 +54,7 @@ void GfxPartition::freeGpuAddressRange(uint64_t ptr, size_t size) { } } -void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex) { +void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices) { /* * I. 64-bit builds: @@ -126,7 +126,7 @@ void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe gfxBase = 0ull; heapInit(HeapIndex::HEAP_SVM, 0ull, 0ull); } else { - initAdditionalRange(gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex); + initAdditionalRange(gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex, numRootDevices); } } @@ -140,7 +140,9 @@ void GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe heapInit(HeapIndex::HEAP_STANDARD, gfxBase, gfxStandardSize); gfxBase += gfxStandardSize; - heapInit(HeapIndex::HEAP_STANDARD64KB, gfxBase, gfxStandardSize); + // Split HEAP_STANDARD64K among root devices + auto gfxStandard64KBSize = alignDown(gfxStandardSize / numRootDevices, GfxPartition::heapGranularity); + heapInit(HeapIndex::HEAP_STANDARD64KB, gfxBase + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize); } } // namespace NEO diff --git a/core/memory_manager/gfx_partition.h b/core/memory_manager/gfx_partition.h index 39d04795e9..354133a35f 100644 --- a/core/memory_manager/gfx_partition.h +++ b/core/memory_manager/gfx_partition.h @@ -33,7 +33,7 @@ class GfxPartition { GfxPartition(); MOCKABLE_VIRTUAL ~GfxPartition(); - void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex); + void init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToReserve, uint32_t rootDeviceIndex, size_t numRootDevices); void heapInit(HeapIndex heapIndex, uint64_t base, uint64_t size) { getHeap(heapIndex).init(base, size); @@ -69,7 +69,7 @@ class GfxPartition { static const std::array heapNonSvmNames; protected: - void initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex); + void initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices); class Heap { public: diff --git a/core/memory_manager/gfx_partition_init_additional_range.cpp b/core/memory_manager/gfx_partition_init_additional_range.cpp index f437d23961..8eca663b4d 100644 --- a/core/memory_manager/gfx_partition_init_additional_range.cpp +++ b/core/memory_manager/gfx_partition_init_additional_range.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,7 +9,7 @@ namespace NEO { -void GfxPartition::initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex) { +void GfxPartition::initAdditionalRange(uint64_t gpuAddressSpace, uint64_t &gfxBase, uint64_t &gfxTop, uint32_t rootDeviceIndex, size_t numRootDevices) { UNRECOVERABLE_IF("Invalid GPU Address Range!"); } diff --git a/runtime/memory_manager/os_agnostic_memory_manager.cpp b/runtime/memory_manager/os_agnostic_memory_manager.cpp index 38a0a78fd4..3f7cb0a81c 100644 --- a/runtime/memory_manager/os_agnostic_memory_manager.cpp +++ b/runtime/memory_manager/os_agnostic_memory_manager.cpp @@ -34,7 +34,7 @@ OsAgnosticMemoryManager::OsAgnosticMemoryManager(bool aubUsage, ExecutionEnviron size_t reservedCpuAddressRangeSize = is64bit ? (4 * 4 + 2 * (aubUsage ? 32 : 4)) * GB : 0; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) { - getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex); + getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, reservedCpuAddressRangeSize, rootDeviceIndex, gfxPartitions.size()); } } diff --git a/runtime/os_interface/linux/drm_memory_manager.cpp b/runtime/os_interface/linux/drm_memory_manager.cpp index f0a4f259f9..a820399d99 100644 --- a/runtime/os_interface/linux/drm_memory_manager.cpp +++ b/runtime/os_interface/linux/drm_memory_manager.cpp @@ -39,7 +39,7 @@ DrmMemoryManager::DrmMemoryManager(gemCloseWorkerMode mode, validateHostPtrMemory(validateHostPtrMemory) { auto gpuAddressSpace = executionEnvironment.getHardwareInfo()->capabilityTable.gpuAddressSpace; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) { - getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex); + getGfxPartition(rootDeviceIndex)->init(gpuAddressSpace, getSizeToReserve(), rootDeviceIndex, gfxPartitions.size()); } MemoryManager::virtualPaddingAvailable = true; if (mode != gemCloseWorkerMode::gemCloseWorkerInactive) { diff --git a/runtime/os_interface/windows/wddm/wddm.cpp b/runtime/os_interface/windows/wddm/wddm.cpp index 2f17d360eb..da2b1a41b9 100644 --- a/runtime/os_interface/windows/wddm/wddm.cpp +++ b/runtime/os_interface/windows/wddm/wddm.cpp @@ -830,7 +830,7 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF return status == STATUS_SUCCESS; } -void Wddm::initGfxPartition(GfxPartition &outGfxPartition) const { +void Wddm::initGfxPartition(GfxPartition &outGfxPartition, uint32_t rootDeviceIndex, size_t numRootDevices) const { if (gfxPartition.SVM.Limit != 0) { outGfxPartition.heapInit(HeapIndex::HEAP_SVM, gfxPartition.SVM.Base, gfxPartition.SVM.Limit - gfxPartition.SVM.Base + 1); } else if (is32bit) { @@ -839,7 +839,9 @@ void Wddm::initGfxPartition(GfxPartition &outGfxPartition) const { outGfxPartition.heapInit(HeapIndex::HEAP_STANDARD, gfxPartition.Standard.Base, gfxPartition.Standard.Limit - gfxPartition.Standard.Base + 1); - outGfxPartition.heapInit(HeapIndex::HEAP_STANDARD64KB, gfxPartition.Standard64KB.Base, gfxPartition.Standard64KB.Limit - gfxPartition.Standard64KB.Base + 1); + // Split HEAP_STANDARD64K among root devices + auto gfxStandard64KBSize = alignDown((gfxPartition.Standard64KB.Limit - gfxPartition.Standard64KB.Base + 1) / numRootDevices, GfxPartition::heapGranularity); + outGfxPartition.heapInit(HeapIndex::HEAP_STANDARD64KB, gfxPartition.Standard64KB.Base + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize); for (auto heap : GfxPartition::heap32Names) { outGfxPartition.heapInit(heap, gfxPartition.Heap32[static_cast(heap)].Base, diff --git a/runtime/os_interface/windows/wddm/wddm.h b/runtime/os_interface/windows/wddm/wddm.h index 90842d5082..49bc6f8ba1 100644 --- a/runtime/os_interface/windows/wddm/wddm.h +++ b/runtime/os_interface/windows/wddm/wddm.h @@ -106,7 +106,7 @@ class Wddm { return gfxPartition; } - void initGfxPartition(GfxPartition &outGfxPartition) const; + void initGfxPartition(GfxPartition &outGfxPartition, uint32_t rootDeviceIndex, size_t numRootDevices) const; const std::string &getDeviceRegistryPath() const { return deviceRegistryPath; diff --git a/runtime/os_interface/windows/wddm_memory_manager.cpp b/runtime/os_interface/windows/wddm_memory_manager.cpp index 55588d986e..8bcfc889f7 100644 --- a/runtime/os_interface/windows/wddm_memory_manager.cpp +++ b/runtime/os_interface/windows/wddm_memory_manager.cpp @@ -44,7 +44,7 @@ WddmMemoryManager::WddmMemoryManager(ExecutionEnvironment &executionEnvironment) mallocRestrictions.minAddress = 0u; for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < gfxPartitions.size(); ++rootDeviceIndex) { - getWddm(rootDeviceIndex).initGfxPartition(*getGfxPartition(rootDeviceIndex)); + getWddm(rootDeviceIndex).initGfxPartition(*getGfxPartition(rootDeviceIndex), rootDeviceIndex, gfxPartitions.size()); mallocRestrictions.minAddress = std::max(mallocRestrictions.minAddress, getWddm(rootDeviceIndex).getWddmMinAddress()); } } diff --git a/unit_tests/memory_manager/gfx_partition_tests.inl b/unit_tests/memory_manager/gfx_partition_tests.inl index c3d2e9f011..8f6bba0025 100644 --- a/unit_tests/memory_manager/gfx_partition_tests.inl +++ b/unit_tests/memory_manager/gfx_partition_tests.inl @@ -86,7 +86,7 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t TEST(GfxPartitionTest, testGfxPartitionFullRange48BitSVM) { MockGfxPartition gfxPartition; - gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, 0); + gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxTop = maxNBitValue(48) + 1; uint64_t gfxBase = MemoryConstants::maxSvmAddress + 1; @@ -96,7 +96,7 @@ TEST(GfxPartitionTest, testGfxPartitionFullRange48BitSVM) { TEST(GfxPartitionTest, testGfxPartitionFullRange47BitSVM) { MockGfxPartition gfxPartition; - gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0); + gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : (uint64_t)gfxPartition.getReservedCpuAddressRange(); uint64_t gfxTop = is32bit ? maxNBitValue(47) + 1 : gfxBase + gfxPartition.getReservedCpuAddressRangeSize(); @@ -107,7 +107,7 @@ TEST(GfxPartitionTest, testGfxPartitionFullRange47BitSVM) { TEST(GfxPartitionTest, testGfxPartitionLimitedRange) { MockGfxPartition gfxPartition; - gfxPartition.init(maxNBitValue(47 - 1), reservedCpuAddressRangeSize, 0); + gfxPartition.init(maxNBitValue(47 - 1), reservedCpuAddressRangeSize, 0, 1); uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : 0ull; uint64_t gfxTop = maxNBitValue(47 - 1) + 1; @@ -122,5 +122,39 @@ TEST(GfxPartitionTest, testGfxPartitionUnsupportedRange) { } MockGfxPartition gfxPartition; - EXPECT_THROW(gfxPartition.init(maxNBitValue(48 + 1), reservedCpuAddressRangeSize, 0), std::exception); + EXPECT_THROW(gfxPartition.init(maxNBitValue(48 + 1), reservedCpuAddressRangeSize, 0, 1), std::exception); +} + +TEST(GfxPartitionTest, testGfxPartitionFullRange48BitSVMHeap64KBSplit) { + uint32_t rootDeviceIndex = 3; + size_t numRootDevices = 5; + + MockGfxPartition gfxPartition; + gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices); + + uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : maxNBitValue(48 - 1) + 1; + uint64_t gfxTop = maxNBitValue(48) + 1; + + auto heapStandardSize = (gfxTop - gfxBase - 4 * sizeHeap32) / 2; + auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); + + EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); + EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); +} + +TEST(GfxPartitionTest, testGfxPartitionFullRange47BitSVMHeap64KBSplit) { + uint32_t rootDeviceIndex = 3; + size_t numRootDevices = 5; + + MockGfxPartition gfxPartition; + gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices); + + uint64_t gfxBase = is32bit ? MemoryConstants::maxSvmAddress + 1 : (uint64_t)gfxPartition.getReservedCpuAddressRange(); + uint64_t gfxTop = is32bit ? maxNBitValue(47) + 1 : gfxBase + gfxPartition.getReservedCpuAddressRangeSize(); + + auto heapStandardSize = ((gfxTop - gfxBase) - 4 * sizeHeap32) / 2; + auto heapStandard64KBSize = alignDown(heapStandardSize / numRootDevices, GfxPartition::heapGranularity); + + EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); + EXPECT_EQ(gfxBase + 4 * sizeHeap32 + heapStandardSize + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); } diff --git a/unit_tests/mocks/linux/mock_drm_memory_manager.h b/unit_tests/mocks/linux/mock_drm_memory_manager.h index b445ce6d42..5adda949e8 100644 --- a/unit_tests/mocks/linux/mock_drm_memory_manager.h +++ b/unit_tests/mocks/linux/mock_drm_memory_manager.h @@ -85,7 +85,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate { } DrmGemCloseWorker *getgemCloseWorker() { return this->gemCloseWorker.get(); } - void forceLimitedRangeAllocator(uint64_t range) { getGfxPartition(0)->init(range, getSizeToReserve(), 0); } + void forceLimitedRangeAllocator(uint64_t range) { getGfxPartition(0)->init(range, getSizeToReserve(), 0, 1); } void overrideGfxPartition(GfxPartition *newGfxPartition) { gfxPartitions[0].reset(newGfxPartition); } DrmAllocation *allocate32BitGraphicsMemory(size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType) { diff --git a/unit_tests/mocks/mock_memory_manager.h b/unit_tests/mocks/mock_memory_manager.h index c96f51d218..a1b4970a3f 100644 --- a/unit_tests/mocks/mock_memory_manager.h +++ b/unit_tests/mocks/mock_memory_manager.h @@ -106,7 +106,7 @@ class MockMemoryManager : public MemoryManagerCreate { GraphicsAllocation *allocate32BitGraphicsMemory(size_t size, const void *ptr, GraphicsAllocation::AllocationType allocationType); GraphicsAllocation *allocate32BitGraphicsMemoryImpl(const AllocationData &allocationData) override; - void forceLimitedRangeAllocator(uint32_t rootDeviceIndex, uint64_t range) { getGfxPartition(rootDeviceIndex)->init(range, 0, 0); } + void forceLimitedRangeAllocator(uint32_t rootDeviceIndex, uint64_t range) { getGfxPartition(rootDeviceIndex)->init(range, 0, 0, gfxPartitions.size()); } uint32_t freeGraphicsMemoryCalled = 0u; uint32_t unlockResourceCalled = 0u; diff --git a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp index 1b598380f4..df89eec310 100644 --- a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp +++ b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp @@ -3271,7 +3271,7 @@ TEST_F(DrmMemoryManagerTest, givenSvmCpuAllocationWhenSizeAndAlignmentProvidedBu TEST_F(DrmMemoryManagerTest, DISABLED_givenDrmMemoryManagerAndReleaseGpuRangeIsCalledThenGpuAddressIsDecanonized) { auto mockGfxPartition = std::make_unique(); - mockGfxPartition->init(maxNBitValue(48), 0, 0); + mockGfxPartition->init(maxNBitValue(48), 0, 0, 1); auto size = 2 * MemoryConstants::megaByte; auto gpuAddress = mockGfxPartition->heapAllocate(HeapIndex::HEAP_STANDARD, size); auto gpuAddressCanonized = GmmHelper::canonize(gpuAddress); diff --git a/unit_tests/os_interface/windows/wddm20_tests.cpp b/unit_tests/os_interface/windows/wddm20_tests.cpp index b6f56ff894..742dfd8d8c 100644 --- a/unit_tests/os_interface/windows/wddm20_tests.cpp +++ b/unit_tests/os_interface/windows/wddm20_tests.cpp @@ -1057,7 +1057,7 @@ TEST_F(WddmGfxPartitionTest, initGfxPartition) { ASSERT_FALSE(gfxPartition.heapInitialized(heap)); } - wddm->initGfxPartition(gfxPartition); + wddm->initGfxPartition(gfxPartition, 0, 1); for (auto heap : MockGfxPartition::allHeapNames) { if (!gfxPartition.heapInitialized(heap)) { @@ -1068,6 +1068,26 @@ TEST_F(WddmGfxPartitionTest, initGfxPartition) { } } +TEST_F(WddmGfxPartitionTest, initGfxPartitionHeapStandard64KBSplit) { + struct MockWddm : public Wddm { + using Wddm::gfxPartition; + + MockWddm(RootDeviceEnvironment &rootDeviceEnvironment) : Wddm(rootDeviceEnvironment) {} + }; + + MockWddm wddm(*executionEnvironment->rootDeviceEnvironments[0].get()); + + uint32_t rootDeviceIndex = 3; + size_t numRootDevices = 5; + + MockGfxPartition gfxPartition; + wddm.initGfxPartition(gfxPartition, rootDeviceIndex, numRootDevices); + + auto heapStandard64KBSize = alignDown(wddm.gfxPartition.Standard64KB.Limit - wddm.gfxPartition.Standard64KB.Base + 1, GfxPartition::heapGranularity); + EXPECT_EQ(heapStandard64KBSize, gfxPartition.getHeapSize(HeapIndex::HEAP_STANDARD64KB)); + EXPECT_EQ(wddm.gfxPartition.Standard64KB.Base + rootDeviceIndex * heapStandard64KBSize, gfxPartition.getHeapBase(HeapIndex::HEAP_STANDARD64KB)); +} + TEST_F(Wddm20Tests, givenWddmWhenOpenAdapterAndForceDeviceIdIsTheSameAsTheExistingDeviceThenReturnTrue) { DebugManagerStateRestore stateRestore; DebugManager.flags.ForceDeviceId.set("1234"); // Existing device Id