From d2ce3badfc191607a6c656725040278a691eda17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Zwoli=C5=84ski?= Date: Wed, 4 Dec 2024 16:21:36 +0000 Subject: [PATCH] fix: bindlessHeapsHelper handle unavailable external heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR handles the situation in which a component has reserved a front window space for itself in the external heap, so that the Compute Runtime cannot access this area. In such a situation, we perform the following steps: 1. reserve 4GB chunk in heapStandard 2. split our chunk into 2 parts: heapFrontWindow, heapRegular 3. from this point on, map all linearStream allocations in reserved 4GB chunk Patch applies to Windows and WSL. Patch only applies when the bindless global allocator is enabled. Related-To: HSD-16025889919 Signed-off-by: Fabian ZwoliƄski --- .../source/helpers/bindless_heaps_helper.cpp | 97 ++++++++ shared/source/helpers/bindless_heaps_helper.h | 18 ++ .../source/memory_manager/memory_manager.cpp | 16 ++ shared/source/memory_manager/memory_manager.h | 12 + .../windows/wddm_memory_manager.cpp | 23 +- shared/source/utilities/heap_allocator.h | 4 + .../common/mocks/mock_bindless_heaps_helper.h | 11 + .../test/common/mocks/mock_memory_manager.h | 40 ++++ .../helpers/bindless_heaps_helper_tests.cpp | 220 ++++++++++++++++++ .../memory_manager/memory_manager_tests.cpp | 39 ++++ .../windows/wddm_memory_manager_tests.cpp | 104 +++++++++ .../utilities/heap_allocator_tests.cpp | 12 + 12 files changed, 589 insertions(+), 7 deletions(-) diff --git a/shared/source/helpers/bindless_heaps_helper.cpp b/shared/source/helpers/bindless_heaps_helper.cpp index 471f8cd1b2..726b0640b8 100644 --- a/shared/source/helpers/bindless_heaps_helper.cpp +++ b/shared/source/helpers/bindless_heaps_helper.cpp @@ -10,6 +10,8 @@ #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/driver_model_type.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/indirect_heap/indirect_heap.h" @@ -18,11 +20,32 @@ #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_context.h" +#include "shared/source/os_interface/os_interface.h" +#include "shared/source/utilities/heap_allocator.h" namespace NEO { constexpr size_t globalSshAllocationSize = 4 * MemoryConstants::pageSize64k; constexpr size_t borderColorAlphaOffset = alignUp(4 * sizeof(float), MemoryConstants::cacheLineSize); + +constexpr HeapIndex heapIndexForPoolReservedRange = HeapIndex::heapStandard; +constexpr size_t reservedRangeSize = static_cast(4 * MemoryConstants::gigaByte); +constexpr size_t heapFrontWindowSize = GfxPartition::externalFrontWindowPoolSize; +constexpr size_t heapRegularSize = reservedRangeSize - heapFrontWindowSize; + +/* + * __________________________________ STANDARD __________________________________ + * / \ + * / ____________________ Reserved 4GB _____________________ \ + * / / \ \ + * / / \ \ + * |_____________|_________________|_______________________________________|____________| + * | | | | | + * heapFrontWindow heapRegular + * ^ + * reservedRangeBase + */ + using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType; BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsContextCapable) : rootDevice(rootDevice), @@ -34,6 +57,14 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte for (auto heapType = 0; heapType < BindlesHeapType::numHeapTypes; heapType++) { auto size = MemoryConstants::pageSize64k; + + if (heapType == BindlesHeapType::specialSsh) { + if (isReservedMemoryModeAvailable() && + !tryReservingMemoryForSpecialSsh(size, MemoryConstants::pageSize64k)) { + useReservedMemory = initializeReservedMemory(); + } + } + auto heapAllocation = getHeapAllocation(size, MemoryConstants::pageSize64k, heapType == BindlesHeapType::specialSsh); UNRECOVERABLE_IF(heapAllocation == nullptr); ssHeapsAllocations.push_back(heapAllocation); @@ -48,12 +79,70 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte memcpy_s(ptrOffset(borderColorStates->getUnderlyingBuffer(), borderColorAlphaOffset), sizeof(borderColorAlpha), borderColorAlpha, sizeof(borderColorDefault)); } +std::optional BindlessHeapsHelper::reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex) { + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(rootDeviceIndex); + + uint32_t reservedOnRootDevice = 0; + auto reservedRange = memManager->reserveGpuAddressOnHeap( + 0ull, size, rootDeviceIndices, &reservedOnRootDevice, heapIndex, alignment); + + if (reservedRange.address == 0u) { + return std::nullopt; + } + + reservedRanges.push_back({reservedRange.address, reservedRange.size}); + + return reservedRange; +} + +bool BindlessHeapsHelper::tryReservingMemoryForSpecialSsh(const size_t size, size_t alignment) { + auto heapIndex = memManager->isLocalMemorySupported(rootDeviceIndex) ? HeapIndex::heapExternalDeviceFrontWindow : HeapIndex::heapExternalFrontWindow; + auto reservedRange = reserveMemoryRange(size, alignment, heapIndex); + return reservedRange.has_value(); +} + +bool BindlessHeapsHelper::initializeReservedMemory() { + if (reservedMemoryInitialized) { + return true; + } + + auto reservedRangeOpt = reserveMemoryRange(reservedRangeSize, MemoryConstants::pageSize64k, heapIndexForPoolReservedRange); + if (!reservedRangeOpt.has_value()) { + return false; + } + + DEBUG_BREAK_IF((reservedRangeOpt.value().address % MemoryConstants::pageSize64k) != 0); + + auto reservedRange = reservedRangeOpt.value(); + reservedRangeBase = rootDevice->getRootDeviceEnvironmentRef().getGmmHelper()->decanonize(reservedRange.address); + + heapFrontWindow = std::make_unique(reservedRangeBase, heapFrontWindowSize, MemoryConstants::pageSize64k, 0); + heapRegular = std::make_unique(reservedRangeBase + heapFrontWindowSize, heapRegularSize, MemoryConstants::pageSize64k, 0); + + memManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), reservedRangeBase}); + memManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), reservedRangeBase}); + + reservedMemoryInitialized = true; + return true; +} + BindlessHeapsHelper::~BindlessHeapsHelper() { for (auto *allocation : ssHeapsAllocations) { memManager->freeGraphicsMemory(allocation); } memManager->freeGraphicsMemory(borderColorStates); ssHeapsAllocations.clear(); + + for (const auto &range : reservedRanges) { + memManager->freeGpuAddress(range, rootDeviceIndex); + } + reservedRanges.clear(); + + if (reservedMemoryInitialized) { + memManager->removeCustomHeapAllocatorConfig(AllocationType::linearStream, true); + memManager->removeCustomHeapAllocatorConfig(AllocationType::linearStream, false); + } } GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size_t alignment, bool allocInFrontWindow) { @@ -72,6 +161,14 @@ GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size return allocation; } +bool BindlessHeapsHelper::isReservedMemoryModeAvailable() { + auto osInterface = rootDevice->getRootDeviceEnvironment().osInterface.get(); + if (!osInterface) { + return false; + } + return osInterface->getDriverModel()->getDriverModelType() == NEO::DriverModelType::wddm; +} + void BindlessHeapsHelper::clearStateDirtyForContext(uint32_t osContextId) { std::lock_guard autolock(this->mtx); diff --git a/shared/source/helpers/bindless_heaps_helper.h b/shared/source/helpers/bindless_heaps_helper.h index b2ce1975a6..95414211b4 100644 --- a/shared/source/helpers/bindless_heaps_helper.h +++ b/shared/source/helpers/bindless_heaps_helper.h @@ -13,12 +13,15 @@ #include #include #include +#include #include #include namespace NEO { class IndirectHeap; +struct AddressRange; +class HeapAllocator; namespace BindlessImageSlot { constexpr uint32_t image = 0; @@ -68,6 +71,12 @@ class BindlessHeapsHelper { bool getStateDirtyForContext(uint32_t osContextId); void clearStateDirtyForContext(uint32_t osContextId); + protected: + bool tryReservingMemoryForSpecialSsh(const size_t size, size_t alignment); + std::optional reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex); + bool initializeReservedMemory(); + bool isReservedMemoryModeAvailable(); + protected: Device *rootDevice = nullptr; const size_t surfaceStateSize; @@ -89,5 +98,14 @@ class BindlessHeapsHelper { std::mutex mtx; DeviceBitfield deviceBitfield; bool globalBindlessDsh = false; + + bool useReservedMemory = false; + bool reservedMemoryInitialized = false; + uint64_t reservedRangeBase = 0; + + std::unique_ptr heapFrontWindow; + std::unique_ptr heapRegular; + + std::vector reservedRanges; }; } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 4c86dc0444..2b66c0c8cc 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -1220,4 +1220,20 @@ bool MemoryManager::usmCompressionSupported(Device *device) { return gfxCoreHelper.usmCompressionSupported(hwInfo); } +void MemoryManager::addCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool, const CustomHeapAllocatorConfig &config) { + customHeapAllocators[{allocationType, isFrontWindowPool}] = config; +} + +std::optional> MemoryManager::getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool) { + auto it = customHeapAllocators.find({allocationType, isFrontWindowPool}); + if (it != customHeapAllocators.end()) { + return it->second; + } + return std::nullopt; +} + +void MemoryManager::removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool) { + customHeapAllocators.erase({allocationType, isFrontWindowPool}); +} + } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index f4bfee0192..ac7bdb0a05 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ class Gmm; class HostPtrManager; class OsContext; class PrefetchManager; +class HeapAllocator; enum AllocationUsage { TEMPORARY_ALLOCATION, @@ -81,6 +83,11 @@ struct VirtualMemoryReservation { size_t reservationTotalSize; }; +struct CustomHeapAllocatorConfig { + HeapAllocator *allocator = nullptr; + uint64_t gpuVaBase = std::numeric_limits::max(); +}; + constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte; namespace MemoryTransferHelper { @@ -342,6 +349,10 @@ class MemoryManager { return hostAllocationsSavedForReuseSize; } + void addCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool, const CustomHeapAllocatorConfig &config); + std::optional> getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool); + void removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool); + protected: bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo); static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties); @@ -416,6 +427,7 @@ class MemoryManager { std::atomic sysMemAllocsSize; size_t hostAllocationsSavedForReuseSize = 0u; mutable std::mutex hostAllocationsReuseMtx; + std::map, CustomHeapAllocatorConfig> customHeapAllocators; }; std::unique_ptr createDeferredDeleter(); diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 9e7c929770..3c162f68ca 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -40,6 +40,7 @@ #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/source/os_interface/windows/wddm_residency_allocations_container.h" #include "shared/source/os_interface/windows/wddm_residency_controller.h" +#include "shared/source/utilities/heap_allocator.h" #include "shared/source/utilities/logger_neo_only.h" #include @@ -587,10 +588,8 @@ GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemoryImpl(const All freeSystemMemory(pSysMem); return nullptr; } - auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow)); - UNRECOVERABLE_IF(gmmHelper->canonize(baseAddress) != wddmAllocation->getGpuBaseAddress()); - - wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(baseAddress)); + [[maybe_unused]] auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow)); + DEBUG_BREAK_IF(gmmHelper->canonize(baseAddress) != wddmAllocation->getGpuBaseAddress()); if (storageInfo.isLockable) { auto lockedPtr = lockResource(wddmAllocation.get()); @@ -1045,9 +1044,16 @@ bool WddmMemoryManager::mapGpuVaForOneHandleAllocation(WddmAllocation *allocatio if (allocation->getReservedGpuVirtualAddress()) { addressToMap = allocation->getReservedGpuVirtualAddress(); } + + auto customHeapAllocatorCfg = getCustomHeapAllocatorConfig(allocation->getAllocationType(), allocation->isAllocInFrontWindowPool()); + auto gfxPartition = getGfxPartition(allocation->getRootDeviceIndex()); - if (allocation->isAllocInFrontWindowPool()) { - auto alignedSize = allocation->getAlignedSize(); + auto alignedSize = allocation->getAlignedSize(); + + if (customHeapAllocatorCfg.has_value()) { + auto &customRange = customHeapAllocatorCfg.value().get(); + addressToMap = customRange.allocator->allocateWithCustomAlignment(alignedSize, MemoryConstants::pageSize64k); + } else if (allocation->isAllocInFrontWindowPool()) { addressToMap = gfxPartition->heapAllocate(heapIndex, alignedSize); } @@ -1067,7 +1073,10 @@ bool WddmMemoryManager::mapGpuVaForOneHandleAllocation(WddmAllocation *allocatio return false; } - if (GfxPartition::isAnyHeap32(heapIndex)) { + if (auto config = customHeapAllocatorCfg; config.has_value() && config->get().gpuVaBase != std::numeric_limits::max()) { + auto gmmHelper = getGmmHelper(allocation->getRootDeviceIndex()); + allocation->setGpuBaseAddress(gmmHelper->canonize(config->get().gpuVaBase)); + } else if (GfxPartition::isAnyHeap32(heapIndex)) { auto gmmHelper = getGmmHelper(allocation->getRootDeviceIndex()); allocation->setGpuBaseAddress(gmmHelper->canonize(gfxPartition->getHeapBase(heapIndex))); } diff --git a/shared/source/utilities/heap_allocator.h b/shared/source/utilities/heap_allocator.h index c6d3a30360..669dd6321d 100644 --- a/shared/source/utilities/heap_allocator.h +++ b/shared/source/utilities/heap_allocator.h @@ -58,6 +58,10 @@ class HeapAllocator { double getUsage() const; + uint64_t getBaseAddress() const { + return this->pLeftBound; + } + protected: const uint64_t size; uint64_t availableSize; diff --git a/shared/test/common/mocks/mock_bindless_heaps_helper.h b/shared/test/common/mocks/mock_bindless_heaps_helper.h index c979f16d22..56bdfcf6ea 100644 --- a/shared/test/common/mocks/mock_bindless_heaps_helper.h +++ b/shared/test/common/mocks/mock_bindless_heaps_helper.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/helpers/bindless_heaps_helper.h" +#include "shared/source/memory_manager/memory_manager.h" using namespace NEO; @@ -34,9 +35,17 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper { using BaseClass::borderColorStates; using BaseClass::globalBindlessDsh; using BaseClass::growHeap; + using BaseClass::heapFrontWindow; + using BaseClass::heapRegular; + using BaseClass::initializeReservedMemory; using BaseClass::isMultiOsContextCapable; + using BaseClass::isReservedMemoryModeAvailable; using BaseClass::memManager; using BaseClass::releasePoolIndex; + using BaseClass::reservedMemoryInitialized; + using BaseClass::reservedRangeBase; + using BaseClass::reservedRanges; + using BaseClass::reserveMemoryRange; using BaseClass::reuseSlotCountThreshold; using BaseClass::rootDeviceIndex; using BaseClass::ssHeapsAllocations; @@ -44,6 +53,8 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper { using BaseClass::surfaceStateHeaps; using BaseClass::surfaceStateInHeapVectorReuse; using BaseClass::surfaceStateSize; + using BaseClass::tryReservingMemoryForSpecialSsh; + using BaseClass::useReservedMemory; IndirectHeap *specialSsh; IndirectHeap *globalSsh; diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 92d401cb4b..2707b29ee6 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -52,6 +52,7 @@ class MockMemoryManager : public MemoryManagerCreate { using MemoryManager::useNonSvmHostPtrAlloc; using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr; using MemoryManagerCreate::MemoryManagerCreate; + using MemoryManager::customHeapAllocators; using MemoryManager::enable64kbpages; using MemoryManager::executionEnvironment; using MemoryManager::getPreferredAllocationMethod; @@ -102,6 +103,41 @@ class MockMemoryManager : public MemoryManagerCreate { OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation); }; + AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, RootDeviceIndicesContainer rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override { + reserveGpuAddressOnHeapCalled++; + reserveGpuAddressOnHeapParamsPassed.push_back({requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, heap, alignment}); + + bool shouldFail = failReserveGpuAddressOnHeap; + if (!reserveGpuAddressOnHeapFailOnCalls.empty() && + std::find(reserveGpuAddressOnHeapFailOnCalls.begin(), reserveGpuAddressOnHeapFailOnCalls.end(), reserveGpuAddressOnHeapCalled - 1) != reserveGpuAddressOnHeapFailOnCalls.end()) { + shouldFail = true; + } + + if (shouldFail) { + reserveGpuAddressOnHeapResult = AddressRange{0u, 0u}; + } else { + reserveGpuAddressOnHeapResult = OsAgnosticMemoryManager::reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, heap, alignment); + } + return reserveGpuAddressOnHeapResult; + } + + struct ReserveGpuAddressOnHeapParams { + uint64_t requiredStartAddress{}; + size_t size{}; + RootDeviceIndicesContainer rootDeviceIndices{}; + uint32_t *reservedOnRootDeviceIndex{}; + HeapIndex heap{}; + size_t alignment{}; + }; + + StackVec reserveGpuAddressOnHeapParamsPassed{}; + StackVec reserveGpuAddressOnHeapFailOnCalls; + + void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override { + freeGpuAddressCalled++; + return OsAgnosticMemoryManager::freeGpuAddress(addressRange, rootDeviceIndex); + } + void *lockResourceImpl(GraphicsAllocation &gfxAllocation) override { lockResourceCalled++; void *pLockedMemory = nullptr; @@ -258,6 +294,8 @@ class MockMemoryManager : public MemoryManagerCreate { uint32_t populateOsHandlesCalled = 0u; uint32_t allocateGraphicsMemoryForNonSvmHostPtrCalled = 0u; uint32_t freeGraphicsMemoryCalled = 0u; + uint32_t reserveGpuAddressOnHeapCalled = 0u; + uint32_t freeGpuAddressCalled = 0u; uint32_t unlockResourceCalled = 0u; uint32_t lockResourceCalled = 0u; uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u; @@ -284,6 +322,7 @@ class MockMemoryManager : public MemoryManagerCreate { bool allocate32BitGraphicsMemoryImplCalled = false; bool allocateForShareableCalled = false; bool failReserveAddress = false; + bool failReserveGpuAddressOnHeap = false; bool failAllocateSystemMemory = false; bool failAllocate32Bit = false; bool failLockResource = false; @@ -314,6 +353,7 @@ class MockMemoryManager : public MemoryManagerCreate { GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr; std::unique_ptr lastAllocationProperties = nullptr; std::function validateAllocateProperties = [](const AllocationProperties &) -> void {}; + AddressRange reserveGpuAddressOnHeapResult = AddressRange{0u, 0u}; }; class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager { diff --git a/shared/test/unit_test/helpers/bindless_heaps_helper_tests.cpp b/shared/test/unit_test/helpers/bindless_heaps_helper_tests.cpp index 4860e8e368..dfe1d01caf 100644 --- a/shared/test/unit_test/helpers/bindless_heaps_helper_tests.cpp +++ b/shared/test/unit_test/helpers/bindless_heaps_helper_tests.cpp @@ -5,12 +5,16 @@ * */ +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/gfx_partition.h" +#include "shared/source/utilities/heap_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_bindless_heaps_helper.h" #include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_driver_model.h" +#include "shared/test/common/mocks/mock_gfx_partition.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" @@ -557,3 +561,219 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenItsCreatedThenSshAll EXPECT_EQ(memoryOperationsIface->isResident(getDevice(), *allocation), MemoryOperationsStatus::success); } } + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenDriverModelWDDMThenReservedMemoryModeIsAvailable) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + getDevice()->getRootDeviceEnvironmentRef().osInterface.reset(new NEO::OSInterface()); + + getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique()); + EXPECT_TRUE(bindlessHeapHelper->isReservedMemoryModeAvailable()); + + getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique()); + EXPECT_FALSE(bindlessHeapHelper->isReservedMemoryModeAvailable()); +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSuccessfullyReservingMemoryRangeThenRangeIsReservedAndStoredAndFreed) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + size_t reservationSize = 1 * MemoryConstants::gigaByte; + size_t alignment = MemoryConstants::pageSize64k; + HeapIndex heapIndex = HeapIndex::heapStandard; + + auto reservedRange = bindlessHeapHelper->reserveMemoryRange(reservationSize, alignment, heapIndex); + ASSERT_TRUE(reservedRange.has_value()); + + EXPECT_EQ(reservationSize, reservedRange->size); + + EXPECT_EQ(1u, bindlessHeapHelper->reservedRanges.size()); + EXPECT_EQ(bindlessHeapHelper->reservedRanges[0].address, reservedRange->address); + EXPECT_EQ(bindlessHeapHelper->reservedRanges[0].size, reservedRange->size); + + EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled); + + bindlessHeapHelper.reset(); + + EXPECT_EQ(1u, memManager->freeGpuAddressCalled); +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenUnsuccessfullyReservingMemoryRangeThenNoValueIsReturned) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + size_t reservationSize = 1 * MemoryConstants::gigaByte; + size_t alignment = MemoryConstants::pageSize64k; + HeapIndex heapIndex = HeapIndex::heapStandard; + + memManager->failReserveGpuAddressOnHeap = true; + + auto reservedRange = bindlessHeapHelper->reserveMemoryRange(reservationSize, alignment, heapIndex); + EXPECT_FALSE(reservedRange.has_value()); + + EXPECT_EQ(0u, bindlessHeapHelper->reservedRanges.size()); + EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled); + + bindlessHeapHelper.reset(); + + EXPECT_EQ(0u, memManager->freeGpuAddressCalled); +} + +TEST_F(BindlessHeapsHelperTests, givenLocalMemorySupportWhenReservingMemoryForSpecialSshThenCorrectHeapIsUsed) { + auto gfxPartition = std::make_unique(); + gfxPartition->callHeapAllocate = false; + memManager->gfxPartitions[0] = std::move(gfxPartition); + + std::map localMemSupportedToExpectedHeapIndexMap = { + {false, HeapIndex::heapExternalFrontWindow}, + {true, HeapIndex::heapExternalDeviceFrontWindow}}; + + size_t currentIter = 0u; + + for (auto &[localMemSupported, expectedHeapIndex] : localMemSupportedToExpectedHeapIndexMap) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + size_t reservationSize = MemoryConstants::pageSize64k; + size_t alignment = MemoryConstants::pageSize64k; + + memManager->localMemorySupported = {localMemSupported}; + + auto specialSshReservationSuccessful = bindlessHeapHelper->tryReservingMemoryForSpecialSsh(reservationSize, alignment); + EXPECT_TRUE(specialSshReservationSuccessful); + + auto &reserveGpuAddressOnHeapParamsPassed = memManager->reserveGpuAddressOnHeapParamsPassed; + ASSERT_GE(reserveGpuAddressOnHeapParamsPassed.size(), currentIter + 1); + EXPECT_EQ(expectedHeapIndex, reserveGpuAddressOnHeapParamsPassed[currentIter].heap); + + EXPECT_EQ(1u, bindlessHeapHelper->reservedRanges.size()); + EXPECT_EQ(currentIter + 1u, memManager->reserveGpuAddressOnHeapCalled); + + bindlessHeapHelper.reset(); + + EXPECT_EQ(currentIter + 1u, memManager->freeGpuAddressCalled); + currentIter++; + } +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSpecialSshReservationFailsThenNoRangeIsReserved) { + memManager->failReserveGpuAddressOnHeap = true; + + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + size_t reservationSize = MemoryConstants::pageSize64k; + size_t alignment = MemoryConstants::pageSize64k; + + auto specialSshReservationSuccessful = bindlessHeapHelper->tryReservingMemoryForSpecialSsh(reservationSize, alignment); + EXPECT_FALSE(specialSshReservationSuccessful); + + EXPECT_EQ(0u, bindlessHeapHelper->reservedRanges.size()); + EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled); +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenReservedMemoryAlreadyInitializedThenEarlyReturnTrue) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + bindlessHeapHelper->reservedMemoryInitialized = true; + memManager->reserveGpuAddressOnHeapCalled = 0u; + + EXPECT_TRUE(bindlessHeapHelper->initializeReservedMemory()); + EXPECT_EQ(0u, memManager->reserveGpuAddressOnHeapCalled); +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenMemoryReservationFailsDuringInitializationThenInitializationReturnsFalse) { + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + memManager->reserveGpuAddressOnHeapCalled = 0u; + memManager->failReserveGpuAddressOnHeap = true; + + EXPECT_FALSE(bindlessHeapHelper->initializeReservedMemory()); + EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled); +} + +TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSuccessfullyInitializingReservedMemoryThenHeapsAndAllocatorsAreConfiguredCorrectly) { + constexpr uint64_t fullHeapSize = 4 * MemoryConstants::gigaByte; + + if (fullHeapSize > std::numeric_limits::max()) { + GTEST_SKIP(); + } + + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + memManager->reserveGpuAddressOnHeapCalled = 0u; + memManager->customHeapAllocators.clear(); + + // Override gfxPartition to ensure heapStandard has sufficient free/available space for this test. + auto mockGfxPartition = std::make_unique(); + mockGfxPartition->initHeap(HeapIndex::heapStandard, maxNBitValue(56) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k); + memManager->gfxPartitions[0] = std::move(mockGfxPartition); + + EXPECT_TRUE(bindlessHeapHelper->initializeReservedMemory()); + + EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled); + EXPECT_TRUE(bindlessHeapHelper->reservedMemoryInitialized); + + auto &reserveGpuAddressOnHeapParamsPassed = memManager->reserveGpuAddressOnHeapParamsPassed; + ASSERT_EQ(1u, reserveGpuAddressOnHeapParamsPassed.size()); + + EXPECT_EQ(HeapIndex::heapStandard, reserveGpuAddressOnHeapParamsPassed[0].heap); + EXPECT_EQ(4 * MemoryConstants::gigaByte, reserveGpuAddressOnHeapParamsPassed[0].size); + EXPECT_EQ(MemoryConstants::pageSize64k, reserveGpuAddressOnHeapParamsPassed[0].alignment); + + EXPECT_EQ(rootDevice->getRootDeviceEnvironmentRef().getGmmHelper()->decanonize(memManager->reserveGpuAddressOnHeapResult.address), bindlessHeapHelper->reservedRangeBase); + + ASSERT_EQ(1u, bindlessHeapHelper->reservedRanges.size()); + EXPECT_EQ(memManager->reserveGpuAddressOnHeapResult.address, bindlessHeapHelper->reservedRanges[0].address); + EXPECT_EQ(memManager->reserveGpuAddressOnHeapResult.size, bindlessHeapHelper->reservedRanges[0].size); + + constexpr auto expectedFrontWindowSize = GfxPartition::externalFrontWindowPoolSize; + + { + // heapFrontWindow + EXPECT_EQ(bindlessHeapHelper->heapFrontWindow->getBaseAddress(), bindlessHeapHelper->reservedRangeBase); + auto frontWindowSize = bindlessHeapHelper->heapFrontWindow->getLeftSize() + bindlessHeapHelper->heapFrontWindow->getUsedSize(); + EXPECT_EQ(expectedFrontWindowSize, frontWindowSize); + } + + { + // heapRegular + EXPECT_EQ(bindlessHeapHelper->heapRegular->getBaseAddress(), bindlessHeapHelper->heapFrontWindow->getBaseAddress() + expectedFrontWindowSize); + auto expectedRegularSize = 4 * MemoryConstants::gigaByte - expectedFrontWindowSize; + auto heapRegularSize = bindlessHeapHelper->heapRegular->getLeftSize() + bindlessHeapHelper->heapRegular->getUsedSize(); + EXPECT_EQ(expectedRegularSize, heapRegularSize); + } + + EXPECT_EQ(2u, memManager->customHeapAllocators.size()); + + { + // heapFrontWindow + ASSERT_TRUE(memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true).has_value()); + EXPECT_EQ(bindlessHeapHelper->heapFrontWindow.get(), memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true)->get().allocator); + EXPECT_EQ(bindlessHeapHelper->reservedRangeBase, memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true)->get().gpuVaBase); + } + + { + // heapRegular + ASSERT_TRUE(memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false).has_value()); + EXPECT_EQ(bindlessHeapHelper->heapRegular.get(), memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false)->get().allocator); + EXPECT_EQ(bindlessHeapHelper->reservedRangeBase, memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false)->get().gpuVaBase); + } + + bindlessHeapHelper.reset(); + + EXPECT_EQ(1u, memManager->freeGpuAddressCalled); // 1 * 4GB reserved range + EXPECT_EQ(0u, memManager->customHeapAllocators.size()); +} + +TEST_F(BindlessHeapsHelperTests, givenReservedMemoryModeAvailableWhenSpecialSshReservationInFrontWindowFailsThenReservedMemoryModeIsUsed) { + auto gfxPartition = std::make_unique(); + gfxPartition->callHeapAllocate = false; + memManager->gfxPartitions[0] = std::move(gfxPartition); + + getDevice()->getRootDeviceEnvironmentRef().osInterface.reset(new NEO::OSInterface()); + getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique()); + + memManager->reserveGpuAddressOnHeapFailOnCalls.push_back(0u); // Fail reserving memory for special ssh + + auto bindlessHeapHelper = std::make_unique(getDevice(), false); + + EXPECT_TRUE(bindlessHeapHelper->reservedMemoryInitialized); + EXPECT_TRUE(bindlessHeapHelper->useReservedMemory); +} \ No newline at end of file diff --git a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp index f84695f3f0..a21c6f0bd0 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -13,6 +13,7 @@ #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/program/program_initialization.h" +#include "shared/source/utilities/heap_allocator.h" #include "shared/test/common/compiler_interface/linker_mock.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/fixtures/memory_allocator_fixture.h" @@ -3363,3 +3364,41 @@ TEST(MemoryManagerTest, WhenGettingExtraDevicePropertiesThenPropertiesRemainUnch EXPECT_EQ(moduleId, 0u); EXPECT_EQ(serverType, 0u); } + +TEST(MemoryManagerTest, WhenAddingCustomHeapAllocatorConfigsThenCanRetrieveAndMatchConfigs) { + uint64_t heapBase = 0xAAAAAAAA; + + uint64_t heapFrontStart = 0xAAAABBBB; + uint64_t heapRegularStart = 0xEEEEFFFF; + + size_t heapFrontSize = 1 * MemoryConstants::gigaByte; + size_t heapRegularSize = 2 * MemoryConstants::gigaByte; + + auto allocator1 = std::make_unique(heapFrontStart, heapFrontSize, MemoryConstants::pageSize64k, 0); + auto allocator2 = std::make_unique(heapRegularStart, heapRegularSize, MemoryConstants::pageSize64k, 0); + + MockMemoryManager memoryManager; + + memoryManager.addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {allocator1.get(), heapBase}); + memoryManager.addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {allocator2.get(), heapBase}); + + auto config1 = memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, true); + auto config2 = memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, false); + auto configNonExisting = memoryManager.getCustomHeapAllocatorConfig(AllocationType::buffer, false); + + EXPECT_TRUE(config1.has_value()); + EXPECT_TRUE(config2.has_value()); + EXPECT_FALSE(configNonExisting.has_value()); + + EXPECT_EQ(allocator1.get(), config1->get().allocator); + EXPECT_EQ(heapBase, config1->get().gpuVaBase); + + EXPECT_EQ(allocator2.get(), config2->get().allocator); + EXPECT_EQ(heapBase, config2->get().gpuVaBase); + + memoryManager.removeCustomHeapAllocatorConfig(AllocationType::linearStream, true); + memoryManager.removeCustomHeapAllocatorConfig(AllocationType::linearStream, false); + + EXPECT_FALSE(memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, true).has_value()); + EXPECT_FALSE(memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, false).has_value()); +} diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 831800f89d..cdc093c8fb 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -14,6 +14,7 @@ #include "shared/source/os_interface/windows/dxgi_wrapper.h" #include "shared/source/os_interface/windows/wddm/um_km_data_translator.h" #include "shared/source/os_interface/windows/windows_wrapper.h" +#include "shared/source/utilities/heap_allocator.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/execution_environment_helper.h" @@ -4384,3 +4385,106 @@ TEST(WddmMemoryManagerTest3, givenWmmWhenAsyncDeleterIsEnabledAndWaitForDeletion EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter()); debugManager.flags.EnableDeferredDeleter.set(actualDeleterFlag); } + +class WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest : public WddmMemoryManagerSimpleTest { + public: + void SetUp() override { + debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); + WddmMemoryManagerSimpleTest::SetUp(); + wddm->callBaseMapGpuVa = false; + + heapBase = alignUp(0xAAAAAAAA, alignment); + heapFrontStart = heapBase; + heapRegularStart = heapFrontStart + heapFrontSize; + + heapFrontWindow = std::make_unique(heapFrontStart, heapFrontSize, alignment, 0); + heapRegular = std::make_unique(heapRegularStart, heapRegularSize, alignment, 0); + } + + void TearDown() override { + WddmMemoryManagerSimpleTest::TearDown(); + } + + DebugManagerStateRestore restore{}; + + size_t allocationSize = MemoryConstants::pageSize64k; + size_t alignment = MemoryConstants::pageSize64k; + + size_t heapFrontSize = 1 * MemoryConstants::gigaByte; + size_t heapRegularSize = 2 * MemoryConstants::gigaByte; + + uint64_t heapBase = 0u; + uint64_t heapFrontStart = 0u; + uint64_t heapRegularStart = 0u; + + std::unique_ptr heapFrontWindow; + std::unique_ptr heapRegular; +}; + +TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorForFrontWindowWhenAllocatingThenGpuAddressAndBaseAreAssignedByCustomAllocator) { + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), heapBase}); + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), heapBase}); + + NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield}; + properties.flags.use32BitFrontWindow = 1; + properties.alignment = alignment; + + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); + ASSERT_NE(nullptr, allocation); + + EXPECT_EQ(heapFrontStart, allocation->getGpuBaseAddress()); + EXPECT_EQ(heapFrontStart, allocation->getGpuAddress()); + + EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize()); + EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); + EXPECT_TRUE(allocation->isAllocInFrontWindowPool()); + + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorForNonFrontWindowHeapWhenAllocatingThenGpuAddressAndBaseAreAssignedByCustomAllocator) { + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), heapBase}); + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), heapBase}); + + NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield}; + properties.flags.use32BitFrontWindow = 0; + properties.alignment = alignment; + + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); + ASSERT_NE(nullptr, allocation); + + EXPECT_EQ(heapBase, allocation->getGpuBaseAddress()); + EXPECT_EQ(heapRegularStart, allocation->getGpuAddress()); + + EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize()); + EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); + EXPECT_FALSE(allocation->isAllocInFrontWindowPool()); + + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorCfgWithoutGpuVaBaseWhenAllocatingThenGpuBaseAddressIsNotObtainedFromCfg) { + CustomHeapAllocatorConfig cfg1; + cfg1.allocator = heapFrontWindow.get(); + CustomHeapAllocatorConfig cfg2; + cfg2.allocator = heapRegular.get(); + + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, cfg1); + memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, cfg2); + + NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield}; + properties.flags.use32BitFrontWindow = 1; + properties.alignment = alignment; + + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); + ASSERT_NE(nullptr, allocation); + + EXPECT_NE(heapFrontStart, allocation->getGpuBaseAddress()); + EXPECT_EQ(heapFrontStart, allocation->getGpuAddress()); + + EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize()); + EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); + EXPECT_TRUE(allocation->isAllocInFrontWindowPool()); + + memoryManager->freeGraphicsMemory(allocation); +} \ No newline at end of file diff --git a/shared/test/unit_test/utilities/heap_allocator_tests.cpp b/shared/test/unit_test/utilities/heap_allocator_tests.cpp index 8fb9659b0f..eae44d2d5f 100644 --- a/shared/test/unit_test/utilities/heap_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/heap_allocator_tests.cpp @@ -1437,3 +1437,15 @@ TEST(HeapAllocatorTest, givenZeroAlignmentPassedWhenAllocatingMemoryWithCustomAl uint64_t ptr = heapAllocator.allocateWithCustomAlignment(ptrSize, 0u); EXPECT_EQ(alignUp(heapBase, allocationAlignment), ptr); } + +TEST(HeapAllocatorTest, whenGetBaseAddressIsCalledThenReturnInitialLeftBoundAddress) { + const uint64_t heapBase = 0x100000llu; + const size_t heapSize = 1024 * 4096; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + + EXPECT_EQ(heapBase, heapAllocator.getBaseAddress()); + + size_t sizeToAlloc = 4096; + heapAllocator.allocate(sizeToAlloc); + EXPECT_EQ(heapBase, heapAllocator.getBaseAddress()); +} \ No newline at end of file