From a50c0dbecfc2c5dddac82f6d218301aec4ac5e40 Mon Sep 17 00:00:00 2001 From: "Chandio, Bibrak Qamar" Date: Fri, 1 Aug 2025 23:43:17 +0000 Subject: [PATCH] feature: Support for pStart Related-To: NEO-15156, GSD-9939 Support for start address hint in zeVirtualMemReserve. If it fails to find pStart then it defaults to the base line allocateWithCustomAlignment(...) Signed-off-by: Chandio, Bibrak Qamar --- .../core/source/context/context_imp.cpp | 64 ++-- level_zero/core/source/context/context_imp.h | 6 +- .../unit_tests/fixtures/memory_ipc_fixture.h | 37 ++- .../sources/cmdlist/test_cmdlist_6.cpp | 4 +- .../sources/context/test_context.cpp | 264 ++++++++++++++-- .../unit_tests/sources/event/test_event.cpp | 19 +- .../unit_tests/sources/module/test_module.cpp | 10 +- .../source/memory_manager/gfx_partition.cpp | 50 ++- shared/source/memory_manager/gfx_partition.h | 60 +++- shared/source/memory_manager/memory_manager.h | 1 + .../os_agnostic_memory_manager.cpp | 25 +- .../os_agnostic_memory_manager.h | 1 + .../os_interface/linux/drm_memory_manager.cpp | 70 +++-- .../os_interface/linux/drm_memory_manager.h | 2 + .../source/os_interface/windows/wddm/wddm.cpp | 20 +- .../windows/wddm_memory_manager.cpp | 21 +- .../windows/wddm_memory_manager.h | 1 + shared/source/utilities/heap_allocator.cpp | 97 +++++- shared/source/utilities/heap_allocator.h | 12 +- shared/test/common/mocks/mock_gfx_partition.h | 3 +- .../unit_test/device/neo_device_tests.cpp | 19 +- .../memory_manager/gfx_partition_tests.cpp | 28 +- .../memory_manager/memory_manager_tests.cpp | 109 ++++++- .../linux/drm_memory_manager_tests.cpp | 180 +++++++++-- .../windows/wddm_memory_manager_tests.cpp | 42 +++ .../utilities/heap_allocator_tests.cpp | 295 +++++++++++++++++- 26 files changed, 1257 insertions(+), 183 deletions(-) diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index 89cc58aed2..c770cd7245 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -1194,45 +1194,65 @@ NEO::VirtualMemoryReservation *ContextImp::findSupportedVirtualReservation(const ze_result_t ContextImp::reserveVirtualMem(const void *pStart, size_t size, void **pptr) { - uint64_t maxCpuVa = 0; - if (this->driverHandle->getMemoryManager()->peek32bit()) { - maxCpuVa = maxNBitValue(32); - } else { - maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); - } - bool reserveOnSvmHeap = pStart == nullptr; - if (castToUint64(pStart) <= maxCpuVa) { - reserveOnSvmHeap = true; - } - reserveOnSvmHeap &= contextSettings.enableSvmHeapReservation; - reserveOnSvmHeap &= NEO::debugManager.flags.EnableReservingInSvmRange.get(); + if (alignUp(size, MemoryConstants::pageSize) != size) { + return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; + } NEO::AddressRange addressRange{}; uint32_t reservedOnRootDeviceIndex = 0; uint64_t reservationBase = 0; size_t reservationTotalSize = 0; + bool reserveOnSvmHeap = true; + uint64_t maxCpuVa = 0; + + if (this->driverHandle->getMemoryManager()->peek32bit()) { + maxCpuVa = maxNBitValue(32); + } else { + maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); + } + + uint64_t requiredStartAddress = castToUint64(pStart); + + if (requiredStartAddress > maxCpuVa) { + reserveOnSvmHeap = false; + } + + reserveOnSvmHeap &= contextSettings.enableSvmHeapReservation; + reserveOnSvmHeap &= NEO::debugManager.flags.EnableReservingInSvmRange.get(); + if (reserveOnSvmHeap) { - if (alignUp(size, MemoryConstants::pageSize) != size) { - return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; - } + reservationTotalSize = alignUp(size, MemoryConstants::pageSize2M) + MemoryConstants::pageSize2M; - addressRange = this->driverHandle->getMemoryManager()->reserveCpuAddressWithZeroBaseRetry(castToUint64(pStart), reservationTotalSize); + addressRange = this->driverHandle->getMemoryManager()->reserveCpuAddressWithZeroBaseRetry(requiredStartAddress, reservationTotalSize); if (addressRange.address == 0) { return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; } DEBUG_BREAK_IF(addressRange.address + reservationTotalSize > maxCpuVa); + reservationBase = addressRange.address; addressRange.address = alignUp(addressRange.address, MemoryConstants::pageSize2M); addressRange.size = size; } else { + + bool useStartAddressHint = (requiredStartAddress != 0ULL); + NEO::HeapIndex heap; size_t pageSize; - if ((getPageAlignedSizeRequired(size, &heap, &pageSize) != size)) { + + auto alignedSize = getPageAlignedSizeRequired(pStart, size, &heap, &pageSize); + + if (!useStartAddressHint && alignedSize != size) { return ZE_RESULT_ERROR_UNSUPPORTED_SIZE; } - addressRange = this->driverHandle->getMemoryManager()->reserveGpuAddressOnHeap(castToUint64(pStart), size, this->driverHandle->rootDeviceIndices, &reservedOnRootDeviceIndex, heap, pageSize); + + if (useStartAddressHint) { + requiredStartAddress = alignUp(requiredStartAddress, pageSize); + } + + addressRange = this->driverHandle->getMemoryManager()->reserveGpuAddressOnHeap(requiredStartAddress, alignedSize, this->driverHandle->rootDeviceIndices, &reservedOnRootDeviceIndex, heap, pageSize); + if (addressRange.address == 0) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } @@ -1285,13 +1305,15 @@ ze_result_t ContextImp::freeVirtualMem(const void *ptr, } } -size_t ContextImp::getPageAlignedSizeRequired(size_t size, NEO::HeapIndex *heapRequired, size_t *pageSizeRequired) { +size_t ContextImp::getPageAlignedSizeRequired(const void *pStart, size_t size, NEO::HeapIndex *heapRequired, size_t *pageSizeRequired) { [[maybe_unused]] NEO::HeapIndex heap; - size_t pageSize; - pageSize = this->driverHandle->getMemoryManager()->selectAlignmentAndHeap(size, &heap); + + auto pageSize = this->driverHandle->getMemoryManager()->selectAlignmentAndHeap(reinterpret_cast(pStart), size, &heap); + if (heapRequired) { *heapRequired = heap; } + if (pageSizeRequired) { *pageSizeRequired = pageSize; } diff --git a/level_zero/core/source/context/context_imp.h b/level_zero/core/source/context/context_imp.h index 6ab486e6ea..8f6dba5da5 100644 --- a/level_zero/core/source/context/context_imp.h +++ b/level_zero/core/source/context/context_imp.h @@ -243,7 +243,11 @@ struct ContextImp : Context, NEO::NonCopyableAndNonMovableClass { } } bool isAllocationSuitableForCompression(const StructuresLookupTable &structuresLookupTable, Device &device, size_t allocSize); - size_t getPageAlignedSizeRequired(size_t size, NEO::HeapIndex *heapRequired, size_t *pageSizeRequired); + size_t getPageAlignedSizeRequired(size_t size, NEO::HeapIndex *heapRequired, size_t *pageSizeRequired) { + return getPageAlignedSizeRequired(nullptr, size, heapRequired, pageSizeRequired); + } + + size_t getPageAlignedSizeRequired(const void *pStart, size_t size, NEO::HeapIndex *heapRequired, size_t *pageSizeRequired); NEO::UsmMemAllocPool *getUsmPoolOwningPtr(const void *ptr, NEO::SvmAllocationData *svmData); bool tryFreeViaPooling(const void *ptr, NEO::SvmAllocationData *svmData, NEO::UsmMemAllocPool *usmPool); diff --git a/level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h b/level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h index 2a12ea2761..1cd51eac1f 100644 --- a/level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/memory_ipc_fixture.h @@ -215,8 +215,23 @@ class MemoryManagerIpcMock : public NEO::MemoryManager { return {}; } size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); + } + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + return pageSizeAlignment; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override { return {}; } @@ -346,8 +361,22 @@ class MemoryManagerIpcImplicitScalingMock : public NEO::MemoryManager { return {}; } size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); + } + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override { + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + return pageSizeAlignment; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override { return {}; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 61cab8ce70..7f9046c446 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -893,7 +893,7 @@ HWTEST_F(CommandListTest, givenComputeCommandListWhenMemoryCopyWithReservedDevic std::make_unique(); void *dstBuffer = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 2; auto res = context->reserveVirtualMem(nullptr, reservationSize, &dstBuffer); @@ -944,7 +944,7 @@ HWTEST_F(CommandListTest, givenComputeCommandListWhenMemoryCopyWithOneReservedDe std::make_unique(); void *dstBuffer = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 2; auto res = context->reserveVirtualMem(nullptr, reservationSize, &dstBuffer); diff --git a/level_zero/core/test/unit_tests/sources/context/test_context.cpp b/level_zero/core/test/unit_tests/sources/context/test_context.cpp index 8d87797b4f..d5fbddac27 100644 --- a/level_zero/core/test/unit_tests/sources/context/test_context.cpp +++ b/level_zero/core/test/unit_tests/sources/context/test_context.cpp @@ -1261,24 +1261,93 @@ TEST_F(ContextTest, whenCallingQueryVirtualMemPageSizeCorrectAlignmentIsReturned size_t pagesize = 0u; res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); EXPECT_EQ(ZE_RESULT_SUCCESS, res); - EXPECT_EQ(pagesize, MemoryConstants::pageSize64k); + EXPECT_EQ(pagesize, MemoryConstants::pageSize2M); size = MemoryConstants::pageSize2M - 1000; pagesize = 0u; res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); EXPECT_EQ(ZE_RESULT_SUCCESS, res); - EXPECT_EQ(pagesize, MemoryConstants::pageSize64k); + EXPECT_EQ(pagesize, MemoryConstants::pageSize2M); size = MemoryConstants::pageSize2M + 1000; pagesize = 0u; res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); EXPECT_EQ(ZE_RESULT_SUCCESS, res); - EXPECT_EQ(pagesize, MemoryConstants::pageSize64k); + EXPECT_EQ(pagesize, MemoryConstants::pageSize2M); res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } +TEST_F(ContextTest, whenCallingQueryVirtualMemPageSizeWithStartAddressThenCorrectAlignmentIsReturned) { + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + struct MockContextImp : public ContextImp { + using ContextImp::getPageAlignedSizeRequired; + }; + + MockContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + + size_t size = 1024; + size_t pagesize = 0u; + + const auto &gfxPartition = driverHandle->getMemoryManager()->getGfxPartition(neoDevice->getRootDeviceIndex()); + for (uint32_t heapIndex = static_cast(HeapIndex::heapInternalDeviceMemory); heapIndex < static_cast(HeapIndex::totalHeaps); ++heapIndex) { + if (gfxPartition->isHeapInitialized(static_cast(heapIndex)) == false) { + continue; + } + auto heapBase = gfxPartition->getHeapBase(static_cast(heapIndex)); + auto heapAlignment = gfxPartition->getHeapAllocationAlignment(static_cast(heapIndex)); + + void *pStart = reinterpret_cast(heapBase); + + auto alignedSize = contextImp->getPageAlignedSizeRequired(pStart, size, nullptr, &pagesize); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(pagesize, heapAlignment); + EXPECT_EQ(alignedSize % heapAlignment, 0u); + } + + res = contextImp->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); +} + +TEST_F(ContextTest, whenCallingQueryVirtualMemPageSizeWithInvalidStartAddressThenDefaultAlignmentIsReturned) { + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + struct MockContextImp : public ContextImp { + using ContextImp::getPageAlignedSizeRequired; + }; + + MockContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + + size_t size = 1024; + size_t defaultPageSize = 0u; + + auto alignedSize = contextImp->getPageAlignedSizeRequired(nullptr, size, nullptr, &defaultPageSize); + EXPECT_EQ(defaultPageSize, MemoryConstants::pageSize2M); + + const auto &gfxPartition = driverHandle->getMemoryManager()->getGfxPartition(neoDevice->getRootDeviceIndex()); + uint64_t maxHeapLimit = 0; + for (uint32_t heapIndex = static_cast(HeapIndex::heapInternalDeviceMemory); heapIndex < static_cast(HeapIndex::totalHeaps); ++heapIndex) { + maxHeapLimit = std::max(maxHeapLimit, gfxPartition->getHeapLimit(static_cast(heapIndex))); + } + void *pStart = reinterpret_cast(maxHeapLimit + 64); + + size_t pageSize = 0u; + contextImp->getPageAlignedSizeRequired(pStart, alignedSize, nullptr, &pageSize); + EXPECT_EQ(pageSize, defaultPageSize); + + res = contextImp->destroy(); +} + TEST_F(ContextTest, whenCallingPhysicalMemInterfacesThenSuccessIsReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; @@ -1696,6 +1765,7 @@ TEST_F(ContextTest, whenCallingVirtualMemoryFreeWithInvalidValuesThenFailuresRet const auto maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); pStart = reinterpret_cast(maxCpuVa + 0x1234); + res = contextImp->reserveVirtualMem(pStart, pagesize, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GT(static_cast(driverHandle->getMemoryManager()->getVirtualMemoryReservationMap().size()), 0); @@ -1728,17 +1798,38 @@ class ReserveMemoryManagerMock : public NEO::MemoryManager { if (failReserveGpuAddress) { return {}; } + if (returnNonZeroReserveGpuAddressValue) { + return AddressRange{reserveGpuAddressValue, size}; + } return AddressRange{requiredStartAddress, size}; } AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override { if (failReserveGpuAddress) { return {}; } + if (returnNonZeroReserveGpuAddressValue) { + return AddressRange{reserveGpuAddressValue, size}; + } return AddressRange{requiredStartAddress, size}; } size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); + } + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + return pageSizeAlignment; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override { @@ -1794,6 +1885,8 @@ class ReserveMemoryManagerMock : public NEO::MemoryManager { } bool failReserveGpuAddress = true; + uint64_t reserveGpuAddressValue = 42; + bool returnNonZeroReserveGpuAddressValue = false; bool failReserveCpuAddress = true; bool failMapVirtualMemory = true; bool failAllocatePhysicalGraphicsMemory = true; @@ -1842,23 +1935,20 @@ TEST_F(ContextTest, whenCallingVirtualMemReserveWithPStartAboveSvmRangeWithSucce EXPECT_EQ(ZE_RESULT_SUCCESS, res); ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + auto reserveMemoryManager = std::make_unique(*neoDevice->executionEnvironment); + auto memoryManager = driverHandle->getMemoryManager(); + reserveMemoryManager->failReserveGpuAddress = false; + driverHandle->setMemoryManager(reserveMemoryManager.get()); const auto maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); void *pStart = reinterpret_cast(maxCpuVa + 0x1234); size_t size = 4096u; void *ptr = nullptr; - size_t pagesize = 0u; - res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); - auto reserveMemoryManager = std::make_unique(*neoDevice->executionEnvironment); - auto memoryManager = driverHandle->getMemoryManager(); - reserveMemoryManager->failReserveGpuAddress = false; - driverHandle->setMemoryManager(reserveMemoryManager.get()); - res = contextImp->reserveVirtualMem(pStart, pagesize, &ptr); + res = contextImp->reserveVirtualMem(pStart, size, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); EXPECT_GT(reserveMemoryManager->getVirtualMemoryReservationMap().size(), 0u); - res = contextImp->freeVirtualMem(ptr, pagesize); + res = contextImp->freeVirtualMem(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); driverHandle->setMemoryManager(memoryManager); @@ -2019,22 +2109,25 @@ HWTEST2_F(ContextTest, whenCallingVirtualMemoryReservationWhenOutOfMemoryThenOut void *pStart = 0x0; size_t size = 0u; void *ptr = nullptr; - size_t pagesize = 0u; + size_t pageSize = 0u; - res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); + res = contextImp->queryVirtualMemPageSize(device, size, &pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, res); NEO::MemoryManager *failingReserveMemoryManager = new ReserveMemoryManagerMock(*neoDevice->executionEnvironment); auto memoryManager = driverHandle->getMemoryManager(); driverHandle->setMemoryManager(failingReserveMemoryManager); - res = contextImp->reserveVirtualMem(pStart, pagesize, &ptr); + res = contextImp->reserveVirtualMem(pStart, pageSize, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, res); + pStart = reinterpret_cast(0x1234); - res = contextImp->reserveVirtualMem(pStart, pagesize, &ptr); + res = contextImp->reserveVirtualMem(pStart, pageSize, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, res); + const auto maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); pStart = reinterpret_cast(maxCpuVa + 0x1234); - res = contextImp->reserveVirtualMem(pStart, pagesize, &ptr); + res = contextImp->reserveVirtualMem(pStart, pageSize, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res); + driverHandle->setMemoryManager(memoryManager); delete failingReserveMemoryManager; @@ -2064,10 +2157,7 @@ TEST_F(ContextTest, whenCallingVirtualMemoryReservationWithInvalidArgumentsThenU driverHandle->setMemoryManager(failingReserveMemoryManager); res = contextImp->reserveVirtualMem(pStart, size, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, res); - const auto maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); - pStart = reinterpret_cast(maxCpuVa + 0x1234); - res = contextImp->reserveVirtualMem(pStart, size, &ptr); - EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, res); + driverHandle->setMemoryManager(memoryManager); delete failingReserveMemoryManager; @@ -2111,6 +2201,91 @@ TEST_F(ContextTest, whenCallingVirtualMemoryReservationWithInvalidMultiPageSizeI EXPECT_EQ(ZE_RESULT_SUCCESS, res); } +TEST_F(ContextTest, whenCallingVirtualMemoryReservationOnNonSvmHeapWithUnAlignedSizeInArgumentsThenUnsupportedSizeReturned) { + + DebugManagerStateRestore restore; + NEO::debugManager.flags.EnableReservingInSvmRange.set(0); + + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + + void *pStart = 0x0; + size_t size = 64u; + void *ptr = nullptr; + size_t pagesize = 0u; + + res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + NEO::MemoryManager *failingReserveMemoryManager = new ReserveMemoryManagerMock(*neoDevice->executionEnvironment); + auto memoryManager = driverHandle->getMemoryManager(); + driverHandle->setMemoryManager(failingReserveMemoryManager); + + size = pagesize * 3 + 10; + res = contextImp->reserveVirtualMem(pStart, size, &ptr); + EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, res); + + size = pagesize + MemoryConstants::pageSize; + res = contextImp->reserveVirtualMem(pStart, size, &ptr); + EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, res); + + driverHandle->setMemoryManager(memoryManager); + delete failingReserveMemoryManager; + + res = contextImp->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); +} + +TEST_F(ContextTest, whenCallingVirtualMemoryReservationOnNonSvmHeapWithOutStartAddressHintThenThenSuccessReturned) { + + DebugManagerStateRestore restore; + NEO::debugManager.flags.EnableReservingInSvmRange.set(0); + + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + + void *pStart = 0x0; + size_t size = 64u; + void *ptr = nullptr; + size_t pagesize = 0u; + + res = contextImp->queryVirtualMemPageSize(device, size, &pagesize); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + auto failingReserveMemoryManager = new ReserveMemoryManagerMock(*neoDevice->executionEnvironment); + auto memoryManager = driverHandle->getMemoryManager(); + failingReserveMemoryManager->failReserveGpuAddress = false; + failingReserveMemoryManager->returnNonZeroReserveGpuAddressValue = true; + driverHandle->setMemoryManager(failingReserveMemoryManager); + + size = pagesize; + res = contextImp->reserveVirtualMem(pStart, size, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + // Delete the allocated reservation from the map + auto &virtualMemoryReservationMap = driverHandle->getMemoryManager()->getVirtualMemoryReservationMap(); + if (auto it = virtualMemoryReservationMap.find(ptr); it != virtualMemoryReservationMap.end()) { + delete it->second; + virtualMemoryReservationMap.erase(it); + } + + driverHandle->setMemoryManager(memoryManager); + delete failingReserveMemoryManager; + + res = contextImp->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); +} + TEST_F(ContextTest, whenCallingVirtualMemoryReservationWithValidMultiPageSizeInArgumentsThenSuccessReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; @@ -2289,6 +2464,7 @@ HWTEST2_F(ContextTest, Given48BitCpuAddressWidthWhenCallingVirtualMemoryReservat } HWTEST2_F(ContextTest, Given57BitCpuAddressWidthWhenCallingVirtualMemoryReservationCorrectAllocationMethodIsSelected, IsNotMTL) { + MockCpuInfoOverrideVirtualAddressSize overrideCpuInfo(57); ze_context_handle_t hContext; @@ -2316,12 +2492,15 @@ HWTEST2_F(ContextTest, Given57BitCpuAddressWidthWhenCallingVirtualMemoryReservat EXPECT_EQ(ZE_RESULT_SUCCESS, res); pStart = addrToPtr(maxNBitValue(56) + 0x1234); - res = contextImp->reserveVirtualMem(pStart, size, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, res); + reserveMemoryManager->failReserveGpuAddress = false; res = contextImp->reserveVirtualMem(pStart, size, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(ptr, nullptr); + EXPECT_NE(ptr, pStart); + res = contextImp->freeVirtualMem(ptr, size); EXPECT_EQ(ZE_RESULT_SUCCESS, res); @@ -2332,6 +2511,42 @@ HWTEST2_F(ContextTest, Given57BitCpuAddressWidthWhenCallingVirtualMemoryReservat EXPECT_EQ(ZE_RESULT_SUCCESS, res); } +TEST_F(ContextTest, whenCallingVirtualMemoryReservationWithUnAlignedPstartThenNearbyAlignedPstartIsReturned) { + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + + ze_result_t res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + ContextImp *contextImp = static_cast(L0::Context::fromHandle(hContext)); + + auto failingReserveMemoryManager = new ReserveMemoryManagerMock(*neoDevice->executionEnvironment); + auto memoryManager = driverHandle->getMemoryManager(); + driverHandle->setMemoryManager(failingReserveMemoryManager); + failingReserveMemoryManager->failReserveGpuAddress = false; + + void *pStart = 0x0; + size_t size = MemoryConstants::pageSize; + void *ptr = nullptr; + + const auto maxCpuVa = NEO::CpuInfo::getInstance().getVirtualAddressSize() == 57u ? maxNBitValue(56) : maxNBitValue(47); + pStart = reinterpret_cast(maxCpuVa + 0x1234); + + // pStart is not aligned to any pagesize. The reserveVirtualMem will properly align it. + res = contextImp->reserveVirtualMem(pStart, size, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(ptr, nullptr); + EXPECT_NE(ptr, pStart); + res = contextImp->freeVirtualMem(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + driverHandle->setMemoryManager(memoryManager); + delete failingReserveMemoryManager; + + res = contextImp->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); +} + TEST_F(ContextTest, whenCallingPhysicalMemoryAllocateWhenOutOfMemoryThenOutofMemoryReturned) { ze_context_handle_t hContext; ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; @@ -2606,7 +2821,6 @@ TEST_F(ContextTest, whenCallingUnmapVirtualMemoryWithFailedUnmapThenUnknownError // Reset the memory manager to the original one. driverHandle->setMemoryManager(memManager); - res = contextImp->destroy(); EXPECT_EQ(ZE_RESULT_SUCCESS, res); } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 4829ba297e..628087f7a9 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -87,8 +87,23 @@ class MemoryManagerEventPoolFailMock : public NEO::MemoryManager { return {}; } size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); + } + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + return pageSizeAlignment; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override { return {}; } diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 1cf1ca4a65..ab7a402965 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -2692,7 +2692,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, ze_kernel_handle_t kernelHandle; void *ptr = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 2; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); @@ -2777,7 +2777,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, ze_kernel_handle_t kernelHandle; void *ptr = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 2; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); @@ -2856,7 +2856,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, ze_kernel_handle_t kernelHandle; void *ptr = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 2; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); @@ -2911,7 +2911,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, ze_kernel_handle_t kernelHandle; void *ptr = nullptr; - size_t size = MemoryConstants::pageSize64k; + size_t size = MemoryConstants::pageSize2M; size_t reservationSize = size * 4; ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = kernelName.c_str(); @@ -2941,7 +2941,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto virtualAlloc = svmAllocsManager->getSVMAlloc(ptr); - virtualAlloc->virtualReservationData->mappedAllocations.at(offsetAddress)->mappedAllocation.allocation->setSize((MemoryConstants::gigaByte * 4) - MemoryConstants::pageSize64k); + virtualAlloc->virtualReservationData->mappedAllocations.at(offsetAddress)->mappedAllocation.allocation->setSize((MemoryConstants::gigaByte * 4) - MemoryConstants::pageSize2M); L0::KernelImp *kernel = reinterpret_cast(Kernel::fromHandle(kernelHandle)); kernel->setArgBuffer(0, sizeof(ptr), &ptr); diff --git a/shared/source/memory_manager/gfx_partition.cpp b/shared/source/memory_manager/gfx_partition.cpp index 1fe1779f00..250b89d1d3 100644 --- a/shared/source/memory_manager/gfx_partition.cpp +++ b/shared/source/memory_manager/gfx_partition.cpp @@ -111,24 +111,26 @@ void GfxPartition::Heap::init(uint64_t base, uint64_t size, size_t allocationAli heapGranularity = GfxPartition::heapGranularity2MB; } - // Exclude very first and very last 64K from GPU address range allocation + // Exclude very first and very last page from GPU address range allocation if (size > 2 * heapGranularity) { size -= 2 * heapGranularity; } alloc = std::make_unique(base + heapGranularity, size, allocationAlignment); + initialized = true; } -void GfxPartition::Heap::initExternalWithFrontWindow(uint64_t base, uint64_t size) { +void GfxPartition::Heap::initExternalWithFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment) { this->base = base; this->size = size; size -= GfxPartition::heapGranularity; - alloc = std::make_unique(base, size, MemoryConstants::pageSize, 0u); + alloc = std::make_unique(base, size, allocationAlignment, 0u); + initialized = true; } -void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize) { +void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment) { this->base = base; this->size = size; @@ -136,24 +138,38 @@ void GfxPartition::Heap::initWithFrontWindow(uint64_t base, uint64_t size, uint6 size -= GfxPartition::heapGranularity; size -= frontWindowSize; - alloc = std::make_unique(base + frontWindowSize, size, MemoryConstants::pageSize); + alloc = std::make_unique(base + frontWindowSize, size, allocationAlignment); + initialized = true; } -void GfxPartition::Heap::initFrontWindow(uint64_t base, uint64_t size) { +void GfxPartition::Heap::initFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment) { this->base = base; this->size = size; - alloc = std::make_unique(base, size, MemoryConstants::pageSize, 0u); + alloc = std::make_unique(base, size, allocationAlignment, 0u); + initialized = true; +} + +size_t GfxPartition::Heap::getAllocAlignment() const { + return alloc->getAllocationAlignment(); } uint64_t GfxPartition::Heap::allocate(size_t &size) { return alloc->allocate(size); } +uint64_t GfxPartition::Heap::allocateWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size) { + return alloc->allocateWithStartAddressHint(requiredStartAddress, size); +} + uint64_t GfxPartition::Heap::allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) { return alloc->allocateWithCustomAlignment(sizeToAllocate, alignment); } +uint64_t GfxPartition::Heap::allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment) { + return alloc->allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, sizeToAllocate, alignment); +} + void GfxPartition::Heap::free(uint64_t ptr, size_t size) { alloc->free(ptr, size); } @@ -248,7 +264,7 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe auto cpuVirtualAddressSize = CpuInfo::getInstance().getVirtualAddressSize(); if (cpuVirtualAddressSize == 48 && gpuAddressSpace == maxNBitValue(48)) { gfxBase = maxNBitValue(48 - 1) + 1; - heapInit(HeapIndex::heapSvm, 0ull, gfxBase); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gfxBase, MemoryConstants::pageSize2M); } else if (gpuAddressSpace == maxNBitValue(47)) { if (reservedCpuAddressRangeForHeapSvm.alignedPtr == nullptr) { if (cpuAddressRangeSizeToReserve == 0) { @@ -264,10 +280,10 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe } gfxBase = reinterpret_cast(reservedCpuAddressRangeForHeapSvm.alignedPtr); gfxTop = gfxBase + cpuAddressRangeSizeToReserve; - heapInit(HeapIndex::heapSvm, 0ull, gpuAddressSpace + 1); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gpuAddressSpace + 1, MemoryConstants::pageSize2M); } else if (gpuAddressSpace < maxNBitValue(47)) { gfxBase = 0ull; - heapInit(HeapIndex::heapSvm, 0ull, 0ull); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, 0ull, MemoryConstants::pageSize2M); } else { if (!initAdditionalRange(cpuVirtualAddressSize, gpuAddressSpace, gfxBase, gfxTop, rootDeviceIndex, systemMemorySize)) { return false; @@ -277,14 +293,14 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe for (auto heap : GfxPartition::heap32Names) { if (useExternalFrontWindowPool && HeapAssigner::heapTypeExternalWithFrontWindowPool(heap)) { - heapInitExternalWithFrontWindow(heap, gfxBase, gfxHeap32Size); + heapInitExternalWithFrontWindow(heap, gfxBase, gfxHeap32Size, MemoryConstants::pageSize); size_t externalFrontWindowSize = GfxPartition::externalFrontWindowPoolSize; auto allocation = heapAllocate(heap, externalFrontWindowSize); heapInitExternalWithFrontWindow(HeapAssigner::mapExternalWindowIndex(heap), allocation, - externalFrontWindowSize); + externalFrontWindowSize, MemoryConstants::pageSize); } else if (HeapAssigner::isInternalHeap(heap)) { - heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize); - heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize); + heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize); + heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize); } else { heapInit(heap, gfxBase, gfxHeap32Size); } @@ -368,9 +384,9 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t gfxBase = castToUint64(reservedCpuAddressRangeForHeapSvm.alignedPtr); gfxTop = gfxBase + reservedCpuAddressRangeForHeapSvm.sizeToReserve; if (gpuAddressSpace == maxNBitValue(57)) { - heapInit(HeapIndex::heapSvm, 0ull, maxNBitValue(57 - 1) + 1); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, maxNBitValue(57 - 1) + 1, MemoryConstants::pageSize2M); } else { - heapInit(HeapIndex::heapSvm, 0ull, maxNBitValue(48) + 1); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, maxNBitValue(48) + 1, MemoryConstants::pageSize2M); } if (gpuAddressSpace == maxNBitValue(57)) { @@ -384,7 +400,7 @@ bool GfxPartition::initAdditionalRange(uint32_t cpuVirtualAddressSize, uint64_t // On 48 bit CPU this range is reserved for OS usage, do not reserve gfxBase = maxNBitValue(48 - 1) + 1; // 0x800000000000 gfxTop = maxNBitValue(48) + 1; // 0x1000000000000 - heapInit(HeapIndex::heapSvm, 0ull, gfxBase); + heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0ull, gfxBase, MemoryConstants::pageSize2M); } // Init HEAP_EXTENDED only for 57 bit GPU diff --git a/shared/source/memory_manager/gfx_partition.h b/shared/source/memory_manager/gfx_partition.h index 6a96b6a321..cba2f182ad 100644 --- a/shared/source/memory_manager/gfx_partition.h +++ b/shared/source/memory_manager/gfx_partition.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -49,26 +49,34 @@ class GfxPartition { getHeap(heapIndex).init(base, size, allocationAlignment); } - void heapInitExternalWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size) { - getHeap(heapIndex).initExternalWithFrontWindow(base, size); + void heapInitExternalWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) { + getHeap(heapIndex).initExternalWithFrontWindow(base, size, allocationAlignment); } - void heapInitWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, uint64_t frontWindowSize) { - getHeap(heapIndex).initWithFrontWindow(base, size, frontWindowSize); + void heapInitWithFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment) { + getHeap(heapIndex).initWithFrontWindow(base, size, frontWindowSize, allocationAlignment); } - void heapInitFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size) { - getHeap(heapIndex).initFrontWindow(base, size); + void heapInitFrontWindow(HeapIndex heapIndex, uint64_t base, uint64_t size, size_t allocationAlignment) { + getHeap(heapIndex).initFrontWindow(base, size, allocationAlignment); } MOCKABLE_VIRTUAL uint64_t heapAllocate(HeapIndex heapIndex, size_t &size) { return getHeap(heapIndex).allocate(size); } + MOCKABLE_VIRTUAL uint64_t heapAllocateWithStartAddressHint(const uint64_t requiredStartAddress, HeapIndex heapIndex, size_t &size) { + return getHeap(heapIndex).allocateWithStartAddressHint(requiredStartAddress, size); + } + MOCKABLE_VIRTUAL uint64_t heapAllocateWithCustomAlignment(HeapIndex heapIndex, size_t &size, size_t alignment) { return getHeap(heapIndex).allocateWithCustomAlignment(size, alignment); } + MOCKABLE_VIRTUAL uint64_t heapAllocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, HeapIndex heapIndex, size_t &size, size_t alignment) { + return getHeap(heapIndex).allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, size, alignment); + } + MOCKABLE_VIRTUAL void heapFree(HeapIndex heapIndex, uint64_t ptr, size_t size) { getHeap(heapIndex).free(ptr, size); } @@ -83,8 +91,31 @@ class GfxPartition { return getHeap(heapIndex).getLimit(); } + size_t getHeapAllocationAlignment(HeapIndex heapIndex) { + return getHeap(heapIndex).getAllocAlignment(); + } + + bool isHeapInitialized(HeapIndex heapIndex) { + return getHeap(heapIndex).isInitialized(); + } + uint64_t getHeapMinimalAddress(HeapIndex heapIndex); + MOCKABLE_VIRTUAL bool getHeapIndexAndPageSizeBasedOnAddress(uint64_t ptr, HeapIndex &heapIndex, size_t &pageSize) { + for (size_t index = 0; index < heaps.size(); ++index) { + + if (!isHeapInitialized(static_cast(index))) { + continue; + } + if (isAddressInHeapRange(static_cast(index), ptr)) { + heapIndex = static_cast(index); + pageSize = getHeapAllocationAlignment(heapIndex); + return true; + } + } + return false; + } + bool isLimitedRange() { return getHeap(HeapIndex::heapSvm).getSize() == 0ull; } static bool isAnyHeap32(HeapIndex heapIndex) { @@ -110,25 +141,34 @@ class GfxPartition { public: Heap() = default; void init(uint64_t base, uint64_t size, size_t allocationAlignment); - void initExternalWithFrontWindow(uint64_t base, uint64_t size); - void initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize); - void initFrontWindow(uint64_t base, uint64_t size); + void initExternalWithFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment); + void initWithFrontWindow(uint64_t base, uint64_t size, uint64_t frontWindowSize, size_t allocationAlignment); + void initFrontWindow(uint64_t base, uint64_t size, size_t allocationAlignment); uint64_t getBase() const { return base; } uint64_t getSize() const { return size; } uint64_t getLimit() const { return size ? base + size - 1 : 0; } + size_t getAllocAlignment() const; uint64_t allocate(size_t &size); + uint64_t allocateWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size); uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment); + uint64_t allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment); void free(uint64_t ptr, size_t size); + bool isInitialized() const { return initialized; } protected: uint64_t base = 0, size = 0; std::unique_ptr alloc; + bool initialized = false; }; Heap &getHeap(HeapIndex heapIndex) { return heaps[static_cast(heapIndex)]; } + bool isAddressInHeapRange(HeapIndex heapIndex, uint64_t ptr) { + return (ptr >= getHeap(heapIndex).getBase()) && (ptr <= getHeap(heapIndex).getLimit()); + } + std::array(HeapIndex::totalHeaps)> heaps; OSMemory::ReservedCpuAddressRange &reservedCpuAddressRangeForHeapSvm; diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index f12185d3b5..f2acddeddb 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -262,6 +262,7 @@ class MemoryManager { virtual AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) = 0; virtual AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) = 0; virtual size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) = 0; + virtual size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) = 0; virtual void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) = 0; virtual AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) = 0; AddressRange reserveCpuAddressWithZeroBaseRetry(const uint64_t requiredStartAddress, size_t size); diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.cpp b/shared/source/memory_manager/os_agnostic_memory_manager.cpp index 7918048993..f4294b3e6f 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/shared/source/memory_manager/os_agnostic_memory_manager.cpp @@ -647,12 +647,29 @@ MemoryAllocation *OsAgnosticMemoryManager::createMemoryAllocation(AllocationType } size_t OsAgnosticMemoryManager::selectAlignmentAndHeap(size_t size, HeapIndex *heap) { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); +} + +size_t OsAgnosticMemoryManager::selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + + return pageSizeAlignment; } AddressRange OsAgnosticMemoryManager::reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) { - return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard, MemoryConstants::pageSize64k); + return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard2MB, MemoryConstants::pageSize2M); } AddressRange OsAgnosticMemoryManager::reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) { @@ -661,7 +678,7 @@ AddressRange OsAgnosticMemoryManager::reserveGpuAddressOnHeap(const uint64_t req for (auto rootDeviceIndex : rootDeviceIndices) { auto gfxPartition = getGfxPartition(rootDeviceIndex); auto gmmHelper = getGmmHelper(rootDeviceIndex); - gpuVa = gmmHelper->canonize(gfxPartition->heapAllocate(heap, size)); + gpuVa = requiredStartAddress == 0 ? gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignment(heap, size, alignment)) : gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignmentWithStartAddressHint(gmmHelper->decanonize(requiredStartAddress), heap, size, alignment)); if (gpuVa != 0u) { *reservedOnRootDeviceIndex = rootDeviceIndex; break; diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.h b/shared/source/memory_manager/os_agnostic_memory_manager.h index 4873af9a9d..d90b29b6f2 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.h +++ b/shared/source/memory_manager/os_agnostic_memory_manager.h @@ -47,6 +47,7 @@ class OsAgnosticMemoryManager : public MemoryManager { AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) override; AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override; size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override; + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override; void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override; void freeCpuAddress(AddressRange addressRange) override; diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 49926ee176..6dbb449218 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -270,6 +270,12 @@ uint64_t DrmMemoryManager::acquireGpuRangeWithCustomAlignment(size_t &size, uint return gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignment(heapIndex, size, alignment)); } +uint64_t DrmMemoryManager::acquireGpuRangeWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size, uint32_t rootDeviceIndex, HeapIndex heapIndex, size_t alignment) { + auto gfxPartition = getGfxPartition(rootDeviceIndex); + auto gmmHelper = getGmmHelper(rootDeviceIndex); + return gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignmentWithStartAddressHint(gmmHelper->decanonize(requiredStartAddress), heapIndex, size, alignment)); +} + void DrmMemoryManager::releaseGpuRange(void *address, size_t unmapSize, uint32_t rootDeviceIndex) { uint64_t graphicsAddress = static_cast(reinterpret_cast(address)); auto gmmHelper = getGmmHelper(rootDeviceIndex); @@ -1719,46 +1725,62 @@ uint32_t DrmMemoryManager::getRootDeviceIndex(const Drm *drm) { } size_t DrmMemoryManager::selectAlignmentAndHeap(size_t size, HeapIndex *heap) { - AlignmentSelector::CandidateAlignment alignmentBase = alignmentSelector.selectAlignment(size); - size_t pageSizeAlignment = alignmentBase.alignment; - auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size(); + return selectAlignmentAndHeap(0ULL, size, heap); +} - // If all devices can support HEAP EXTENDED, then that heap is used, otherwise the HEAP based on the size is used. +size_t DrmMemoryManager::selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + + // If all devices can support HEAP EXTENDED, then that heap is used. + bool useExtendedHeap = true; + auto rootDeviceCount = this->executionEnvironment.rootDeviceEnvironments.size(); for (auto rootDeviceIndex = 0u; rootDeviceIndex < rootDeviceCount; rootDeviceIndex++) { auto gfxPartition = getGfxPartition(rootDeviceIndex); - if (gfxPartition->getHeapLimit(HeapIndex::heapExtended) > 0) { - auto alignSize = size >= 8 * MemoryConstants::gigaByte && Math::isPow2(size); - if (debugManager.flags.UseHighAlignmentForHeapExtended.get() != -1) { - alignSize = !!debugManager.flags.UseHighAlignmentForHeapExtended.get(); - } - - if (alignSize) { - pageSizeAlignment = Math::prevPowerOfTwo(size); - } - - *heap = HeapIndex::heapExtended; - } else { - pageSizeAlignment = alignmentBase.alignment; - *heap = alignmentBase.heap; + if (!(gfxPartition->getHeapLimit(HeapIndex::heapExtended) > 0)) { + useExtendedHeap = false; break; } } + + if (useExtendedHeap) { + auto alignSize = size >= 8 * MemoryConstants::gigaByte && Math::isPow2(size); + if (debugManager.flags.UseHighAlignmentForHeapExtended.get() != -1) { + alignSize = !!debugManager.flags.UseHighAlignmentForHeapExtended.get(); + } + + if (alignSize) { + pageSizeAlignment = Math::prevPowerOfTwo(size); + } + + *heap = HeapIndex::heapExtended; + } + return pageSizeAlignment; } AddressRange DrmMemoryManager::reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) { - return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard, MemoryConstants::pageSize64k); + return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard2MB, MemoryConstants::pageSize2M); } AddressRange DrmMemoryManager::reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) { uint64_t gpuVa = 0u; *reservedOnRootDeviceIndex = 0; + for (auto rootDeviceIndex : rootDeviceIndices) { - if (heap == HeapIndex::heapExtended) { - gpuVa = acquireGpuRangeWithCustomAlignment(size, rootDeviceIndex, heap, alignment); - } else { - gpuVa = acquireGpuRange(size, rootDeviceIndex, heap); - } + + gpuVa = requiredStartAddress == 0 ? acquireGpuRangeWithCustomAlignment(size, rootDeviceIndex, heap, alignment) : acquireGpuRangeWithCustomAlignmentWithStartAddressHint(requiredStartAddress, size, rootDeviceIndex, heap, alignment); if (gpuVa != 0u) { *reservedOnRootDeviceIndex = rootDeviceIndex; break; diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index c7fb7060ae..790851fbce 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -77,6 +77,7 @@ class DrmMemoryManager : public MemoryManager { AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) override; AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override; size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override; + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override; void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override; void freeCpuAddress(AddressRange addressRange) override; @@ -149,6 +150,7 @@ class DrmMemoryManager : public MemoryManager { void pushSharedBufferObject(BufferObject *bo); bool setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable); MOCKABLE_VIRTUAL uint64_t acquireGpuRangeWithCustomAlignment(size_t &size, uint32_t rootDeviceIndex, HeapIndex heapIndex, size_t alignment); + MOCKABLE_VIRTUAL uint64_t acquireGpuRangeWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &size, uint32_t rootDeviceIndex, HeapIndex heapIndex, size_t alignment); void emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const; uint32_t getDefaultDrmContextId(uint32_t rootDeviceIndex) const; OsContextLinux *getDefaultOsContext(uint32_t rootDeviceIndex) const; diff --git a/shared/source/os_interface/windows/wddm/wddm.cpp b/shared/source/os_interface/windows/wddm/wddm.cpp index 51c08cbcb6..0a59beb760 100644 --- a/shared/source/os_interface/windows/wddm/wddm.cpp +++ b/shared/source/os_interface/windows/wddm/wddm.cpp @@ -1258,34 +1258,34 @@ bool Wddm::isGpuHangDetected(OsContext &osContext) { void Wddm::initGfxPartition(GfxPartition &outGfxPartition, uint32_t rootDeviceIndex, size_t numRootDevices, bool useExternalFrontWindowPool) const { if (gfxPartition.SVM.Limit != 0) { - outGfxPartition.heapInit(HeapIndex::heapSvm, gfxPartition.SVM.Base, gfxPartition.SVM.Limit - gfxPartition.SVM.Base + 1); + outGfxPartition.heapInitWithAllocationAlignment(HeapIndex::heapSvm, gfxPartition.SVM.Base, gfxPartition.SVM.Limit - gfxPartition.SVM.Base + 1, MemoryConstants::pageSize64k); } else if (is32bit) { - outGfxPartition.heapInit(HeapIndex::heapSvm, 0x0ull, 4 * MemoryConstants::gigaByte); + outGfxPartition.heapInitWithAllocationAlignment(HeapIndex::heapSvm, 0x0ull, 4 * MemoryConstants::gigaByte, MemoryConstants::pageSize64k); } - outGfxPartition.heapInit(HeapIndex::heapStandard, gfxPartition.Standard.Base, gfxPartition.Standard.Limit - gfxPartition.Standard.Base + 1); + outGfxPartition.heapInitWithAllocationAlignment(HeapIndex::heapStandard, gfxPartition.Standard.Base, gfxPartition.Standard.Limit - gfxPartition.Standard.Base + 1, MemoryConstants::pageSize64k); // Split HEAP_STANDARD64K among root devices auto gfxStandard64KBSize = alignDown((gfxPartition.Standard64KB.Limit - gfxPartition.Standard64KB.Base + 1) / numRootDevices, GfxPartition::heapGranularity); - outGfxPartition.heapInit(HeapIndex::heapStandard64KB, gfxPartition.Standard64KB.Base + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize); + outGfxPartition.heapInitWithAllocationAlignment(HeapIndex::heapStandard64KB, gfxPartition.Standard64KB.Base + rootDeviceIndex * gfxStandard64KBSize, gfxStandard64KBSize, MemoryConstants::pageSize64k); for (auto heap : GfxPartition::heap32Names) { if (useExternalFrontWindowPool && HeapAssigner::heapTypeExternalWithFrontWindowPool(heap)) { outGfxPartition.heapInitExternalWithFrontWindow(heap, gfxPartition.Heap32[static_cast(heap)].Base, - gfxPartition.Heap32[static_cast(heap)].Limit - gfxPartition.Heap32[static_cast(heap)].Base + 1); + gfxPartition.Heap32[static_cast(heap)].Limit - gfxPartition.Heap32[static_cast(heap)].Base + 1, MemoryConstants::pageSize64k); size_t externalFrontWindowSize = GfxPartition::externalFrontWindowPoolSize; outGfxPartition.heapInitExternalWithFrontWindow(HeapAssigner::mapExternalWindowIndex(heap), outGfxPartition.heapAllocate(heap, externalFrontWindowSize), - externalFrontWindowSize); + externalFrontWindowSize, MemoryConstants::pageSize64k); } else if (HeapAssigner::isInternalHeap(heap)) { auto baseAddress = gfxPartition.Heap32[static_cast(heap)].Base >= minAddress ? gfxPartition.Heap32[static_cast(heap)].Base : minAddress; outGfxPartition.heapInitWithFrontWindow(heap, baseAddress, gfxPartition.Heap32[static_cast(heap)].Limit - baseAddress + 1, - GfxPartition::internalFrontWindowPoolSize); - outGfxPartition.heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), baseAddress, GfxPartition::internalFrontWindowPoolSize); + GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize64k); + outGfxPartition.heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), baseAddress, GfxPartition::internalFrontWindowPoolSize, MemoryConstants::pageSize64k); } else { - outGfxPartition.heapInit(heap, gfxPartition.Heap32[static_cast(heap)].Base, - gfxPartition.Heap32[static_cast(heap)].Limit - gfxPartition.Heap32[static_cast(heap)].Base + 1); + outGfxPartition.heapInitWithAllocationAlignment(heap, gfxPartition.Heap32[static_cast(heap)].Base, + gfxPartition.Heap32[static_cast(heap)].Limit - gfxPartition.Heap32[static_cast(heap)].Base + 1, MemoryConstants::pageSize64k); } } } diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 5dfa5d506c..7a13006d3b 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -987,13 +987,30 @@ bool WddmMemoryManager::createWddmAllocation(WddmAllocation *allocation, void *r } size_t WddmMemoryManager::selectAlignmentAndHeap(size_t size, HeapIndex *heap) { - AlignmentSelector::CandidateAlignment alignment = alignmentSelector.selectAlignment(size); + return selectAlignmentAndHeap(0ULL, size, heap); +} + +size_t WddmMemoryManager::selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) { + + // Always default to heapStandard64KB. *heap = HeapIndex::heapStandard64KB; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + size_t pageSizeAlignment = 0; + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + + AlignmentSelector::CandidateAlignment alignment = alignmentSelector.selectAlignment(size); return alignment.alignment; } AddressRange WddmMemoryManager::reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) { - return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard64KB, MemoryConstants::pageSize64k); + return reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, HeapIndex::heapStandard64KB, MemoryConstants::pageSize2M); } AddressRange WddmMemoryManager::reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) { diff --git a/shared/source/os_interface/windows/wddm_memory_manager.h b/shared/source/os_interface/windows/wddm_memory_manager.h index 8212307728..f2d86e7a66 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.h +++ b/shared/source/os_interface/windows/wddm_memory_manager.h @@ -65,6 +65,7 @@ class WddmMemoryManager : public MemoryManager, NEO::NonCopyableAndNonMovableCla AddressRange reserveGpuAddress(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex) override; AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, const RootDeviceIndicesContainer &rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override; size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override; + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override; void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override; void freeCpuAddress(AddressRange addressRange) override; diff --git a/shared/source/utilities/heap_allocator.cpp b/shared/source/utilities/heap_allocator.cpp index eb47eb41c5..0ec930049d 100644 --- a/shared/source/utilities/heap_allocator.cpp +++ b/shared/source/utilities/heap_allocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,6 +18,62 @@ bool operator<(const HeapChunk &hc1, const HeapChunk &hc2) { return hc1.ptr < hc2.ptr; } +uint64_t HeapAllocator::allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment) { + + if (alignment < this->allocationAlignment) { + alignment = this->allocationAlignment; + } + + UNRECOVERABLE_IF(alignment % allocationAlignment != 0); // custom alignment have to be a multiple of allocator alignment + sizeToAllocate = alignUp(sizeToAllocate, allocationAlignment); + + uint64_t ptrReturn = 0llu; + + { + std::lock_guard lock(mtx); + DBG_LOG(LogAllocationMemoryPool, __FUNCTION__, "Allocator usage == ", this->getUsage()); + if (availableSize < sizeToAllocate) { + return 0llu; + } + + if (requiredStartAddress >= pLeftBound && requiredStartAddress <= pRightBound) { + + const uint64_t misalignment = requiredStartAddress - pLeftBound; + if (pLeftBound + misalignment + sizeToAllocate <= pRightBound) { + if (misalignment) { + storeInFreedChunks(pLeftBound, static_cast(misalignment), freedChunksBig); + pLeftBound += misalignment; + } + ptrReturn = pLeftBound; + pLeftBound += sizeToAllocate; + availableSize -= sizeToAllocate; + } + } else { // Try to find in freed chunks + + defragment(); + + if (requiredStartAddress < this->pLeftBound) { + // If between baseAddress and pLeftBound, get from freedChunksBig + ptrReturn = getFromFreedChunksWithStartAddressHint(requiredStartAddress, sizeToAllocate, freedChunksBig); + } else { + // If between pRightBound and heapLimit, get from freedChunksSmall + ptrReturn = getFromFreedChunksWithStartAddressHint(requiredStartAddress, sizeToAllocate, freedChunksSmall); + } + + if (ptrReturn != 0llu) { + availableSize -= sizeToAllocate; + } + } + } + + if (ptrReturn == 0llu) { + return allocateWithCustomAlignment(sizeToAllocate, alignment); + } + + UNRECOVERABLE_IF(!isAligned(ptrReturn, alignment)); + return ptrReturn; +} + uint64_t HeapAllocator::allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) { if (alignment < this->allocationAlignment) { alignment = this->allocationAlignment; @@ -73,7 +129,7 @@ uint64_t HeapAllocator::allocateWithCustomAlignment(size_t &sizeToAllocate, size } else { availableSize -= sizeToAllocate; } - DEBUG_BREAK_IF(!isAligned(ptrReturn, alignment)); + UNRECOVERABLE_IF(!isAligned(ptrReturn, alignment)); return ptrReturn; } @@ -115,6 +171,43 @@ double HeapAllocator::getUsage() const { return static_cast(size - availableSize) / size; } +uint64_t HeapAllocator::getFromFreedChunksWithStartAddressHint(const uint64_t requiredStartAddress, size_t size, std::vector &freedChunks) { + + for (size_t i = 0; i < freedChunks.size(); i++) { + uint64_t chunkStart = freedChunks[i].ptr; + uint64_t chunkEnd = chunkStart + freedChunks[i].size; + + if (requiredStartAddress >= chunkStart && requiredStartAddress + size <= chunkEnd) { + size_t leadingSize = static_cast(requiredStartAddress - chunkStart); + size_t trailingSize = static_cast(chunkEnd - (requiredStartAddress + size)); + + // Chunk splitting + if (leadingSize > 0) { + + freedChunks[i].size = leadingSize; + + if (trailingSize > 0) { + freedChunks.emplace_back(requiredStartAddress + size, trailingSize); + } + } else { + + if (trailingSize > 0) { + + freedChunks[i].ptr = requiredStartAddress + size; + freedChunks[i].size = trailingSize; + } else { + + freedChunks.erase(freedChunks.begin() + i); + } + } + + return requiredStartAddress; + } + } + + return 0llu; +} + uint64_t HeapAllocator::getFromFreedChunks(size_t size, std::vector &freedChunks, size_t &sizeOfFreedChunk, size_t requiredAlignment) { size_t elements = freedChunks.size(); size_t bestFitIndex = -1; diff --git a/shared/source/utilities/heap_allocator.h b/shared/source/utilities/heap_allocator.h index ee2fae6345..42bd1c6721 100644 --- a/shared/source/utilities/heap_allocator.h +++ b/shared/source/utilities/heap_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,6 +44,11 @@ class HeapAllocator { return allocateWithCustomAlignment(sizeToAllocate, 0u); } + uint64_t allocateWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate) { + return allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, sizeToAllocate, 0u); + } + + uint64_t allocateWithCustomAlignmentWithStartAddressHint(const uint64_t requiredStartAddress, size_t &sizeToAllocate, size_t alignment); uint64_t allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment); MOCKABLE_VIRTUAL void free(uint64_t ptr, size_t size); @@ -62,6 +67,10 @@ class HeapAllocator { return this->baseAddress; } + size_t getAllocationAlignment() const { + return this->allocationAlignment; + } + protected: const uint64_t baseAddress; const uint64_t size; @@ -76,6 +85,7 @@ class HeapAllocator { std::mutex mtx; uint64_t getFromFreedChunks(size_t size, std::vector &freedChunks, size_t &sizeOfFreedChunk, size_t requiredAlignment); + MOCKABLE_VIRTUAL uint64_t getFromFreedChunksWithStartAddressHint(const uint64_t requiredStartAddress, size_t size, std::vector &freedChunks); void storeInFreedChunks(uint64_t ptr, size_t size, std::vector &freedChunks) { for (auto &freedChunk : freedChunks) { diff --git a/shared/test/common/mocks/mock_gfx_partition.h b/shared/test/common/mocks/mock_gfx_partition.h index d2581ee783..a3c259994c 100644 --- a/shared/test/common/mocks/mock_gfx_partition.h +++ b/shared/test/common/mocks/mock_gfx_partition.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,6 +13,7 @@ using namespace NEO; class MockGfxPartition : public GfxPartition { public: + using GfxPartition::isAddressInHeapRange; using GfxPartition::osMemory; MockGfxPartition() : GfxPartition(reservedCpuAddressRange) {} diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index a4acdeec01..be30b21d88 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -556,8 +556,23 @@ TEST_F(DeviceGetCapsTest, givenFlagEnabled64kbPagesWhenCallConstructorMemoryMana return {}; } size_t selectAlignmentAndHeap(size_t size, HeapIndex *heap) override { - *heap = HeapIndex::heapStandard; - return MemoryConstants::pageSize64k; + return selectAlignmentAndHeap(0ULL, size, heap); + } + size_t selectAlignmentAndHeap(const uint64_t requiredStartAddress, size_t size, HeapIndex *heap) override { + + // Always default to HEAP STANDARD 2MB. + *heap = HeapIndex::heapStandard2MB; + size_t pageSizeAlignment = MemoryConstants::pageSize2M; + + // If the user provides a start address, we try to find the heap and page size alignment based on that address. + if (requiredStartAddress != 0ULL) { + auto rootDeviceIndex = 0u; + auto gfxPartition = getGfxPartition(rootDeviceIndex); + if (gfxPartition->getHeapIndexAndPageSizeBasedOnAddress(requiredStartAddress, *heap, pageSizeAlignment)) { + return pageSizeAlignment; + } + } + return pageSizeAlignment; } void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; AddressRange reserveCpuAddress(const uint64_t requiredStartAddress, size_t size) override { return {}; } diff --git a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp index 962581e4c0..184d948f4a 100644 --- a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp +++ b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp @@ -66,6 +66,23 @@ using namespace NEO; constexpr size_t reservedCpuAddressRangeSize = static_cast(is64bit ? (6 * 4 * MemoryConstants::gigaByte) : 0); constexpr uint64_t sizeHeap32 = 4 * MemoryConstants::gigaByte; +TEST(GfxPartitionTest, GivenHeapAndAddressInGfxPartitionThenIsAddressInHeapRangeCorrectlyReturns) { + MockGfxPartition gfxPartition; + uint64_t gfxTop = maxNBitValue(48) + 1; + gfxPartition.init(maxNBitValue(48), reservedCpuAddressRangeSize, 0, 1, false, 0u, gfxTop); + + auto heap = HeapIndex::heapStandard64KB; + + auto heapBase = gfxPartition.getHeapBase(heap); + auto heapLimit = gfxPartition.getHeapLimit(heap); + + EXPECT_FALSE(gfxPartition.isAddressInHeapRange(heap, heapBase - 1)); + EXPECT_TRUE(gfxPartition.isAddressInHeapRange(heap, heapBase)); + EXPECT_TRUE(gfxPartition.isAddressInHeapRange(heap, heapBase + MemoryConstants::pageSize)); + EXPECT_TRUE(gfxPartition.isAddressInHeapRange(heap, heapLimit)); + EXPECT_FALSE(gfxPartition.isAddressInHeapRange(heap, heapLimit + 1)); +} + void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t gfxTop, uint64_t svmTop) { if (svmTop) { // SVM should be initialized @@ -130,7 +147,10 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t } const bool isInternalHeapType = heap == HeapIndex::heapInternal || heap == HeapIndex::heapInternalDeviceMemory; - const auto heapGranularity = (heap == HeapIndex::heapStandard2MB) ? GfxPartition::heapGranularity2MB : GfxPartition::heapGranularity; + auto heapGranularity = (heap == HeapIndex::heapSvm || heap == HeapIndex::heapStandard2MB) ? GfxPartition::heapGranularity2MB : GfxPartition::heapGranularity; + if (is32bit && heap == HeapIndex::heapSvm) { + heapGranularity = GfxPartition::heapGranularity; + } if (heap == HeapIndex::heapSvm) { EXPECT_EQ(gfxPartition.getHeapMinimalAddress(heap), gfxPartition.getHeapBase(heap)); @@ -159,6 +179,12 @@ void testGfxPartition(MockGfxPartition &gfxPartition, uint64_t gfxBase, uint64_t EXPECT_EQ(ptrSmall, gfxPartition.getHeapBase(heap) + gfxPartition.getHeapSize(heap) - heapGranularity - sizeSmall); gfxPartition.heapFree(heap, ptrSmall, sizeSmall); + + uint64_t requiredStartAddress = gfxPartition.getHeapBase(heap) + MemoryConstants::pageSize2M; + auto ptrSmallWithHint = gfxPartition.heapAllocateWithStartAddressHint(requiredStartAddress, heap, sizeSmall); + EXPECT_NE(ptrSmallWithHint, 0ull); + EXPECT_EQ(ptrSmallWithHint, requiredStartAddress); + gfxPartition.heapFree(heap, ptrSmallWithHint, sizeSmall); } } diff --git a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp index a55e282124..6f5bdccddc 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -289,16 +289,91 @@ TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReserv auto gmmHelper = memoryManager.getGmmHelper(0); HeapIndex heap = HeapIndex::heapStandard64KB; auto alignment = memoryManager.selectAlignmentAndHeap(MemoryConstants::pageSize, &heap); - EXPECT_EQ(heap, HeapIndex::heapStandard); - EXPECT_EQ(MemoryConstants::pageSize64k, alignment); + EXPECT_EQ(heap, HeapIndex::heapStandard2MB); + EXPECT_EQ(MemoryConstants::pageSize2M, alignment); auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); EXPECT_EQ(0u, rootDeviceIndexReserved); - EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 0); } +TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReservedWithStartAddressOnSpecifiedHeapAndFreedThenAddressFromGfxPartitionIsUsed) { + MockExecutionEnvironment executionEnvironment; + OsAgnosticMemoryManager memoryManager(executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + uint32_t rootDeviceIndexReserved = 10; + auto gmmHelper = memoryManager.getGmmHelper(0); + + const auto &gfxPartition = memoryManager.getGfxPartition(rootDeviceIndices[0]); + HeapIndex heap = HeapIndex::heapStandard64KB; + auto heapBase = gfxPartition->getHeapBase(heap); + const uint64_t requiredStartAddress = heapBase; + + auto alignment = memoryManager.selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(alignment, MemoryConstants::pageSize64k); + auto addressRange = memoryManager.reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + + memoryManager.freeGpuAddress(addressRange, 0); + + addressRange = memoryManager.reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); +} + +TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReservedWithInvalidStartAddressOnSpecifiedHeapAndFreedThenSomeOtherAddressIsUsed) { + + MockExecutionEnvironment executionEnvironment; + OsAgnosticMemoryManager memoryManager(executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + uint32_t rootDeviceIndexReserved = 10; + auto gmmHelper = memoryManager.getGmmHelper(0); + + const auto &gfxPartition = memoryManager.getGfxPartition(rootDeviceIndices[0]); + HeapIndex heap = HeapIndex::heapStandard64KB; + auto heapLimit = gfxPartition->getHeapLimit(heap); + const uint64_t requiredStartAddress = heapLimit + 64; + size_t alignment = MemoryConstants::pageSize64k; + + auto addressRange = memoryManager.reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(heap), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(heap), gmmHelper->decanonize(addressRange.address)); + + memoryManager.freeGpuAddress(addressRange, 0); +} + +TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenSelectAlignmentAndHeapWithInvalidStartAddressOnSpecifiedHeapIsCalledThenDefaultHeapAndAlignmentIsUsed) { + MockExecutionEnvironment executionEnvironment; + OsAgnosticMemoryManager memoryManager(executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + + const auto &gfxPartition = memoryManager.getGfxPartition(rootDeviceIndices[0]); + + uint64_t maxHeapLimit = 0; + for (uint32_t heapIndex = static_cast(HeapIndex::heapInternalDeviceMemory); heapIndex < static_cast(HeapIndex::totalHeaps); ++heapIndex) { + maxHeapLimit = std::max(maxHeapLimit, gfxPartition->getHeapLimit(static_cast(heapIndex))); + } + const uint64_t requiredStartAddress = maxHeapLimit + 64; + + HeapIndex heap = HeapIndex::heapStandard64KB; + auto alignment = memoryManager.selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard2MB); + EXPECT_EQ(alignment, MemoryConstants::pageSize2M); +} + TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReservedAndFreedThenAddressFromGfxPartitionIsUsed) { MockExecutionEnvironment executionEnvironment; OsAgnosticMemoryManager memoryManager(executionEnvironment); @@ -306,17 +381,17 @@ TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReserv rootDeviceIndices.pushUnique(0); uint32_t rootDeviceIndexReserved = 10; auto gmmHelper = memoryManager.getGmmHelper(0); - auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(0u, rootDeviceIndexReserved); - EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 0); addressRange = memoryManager.reserveGpuAddress(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(0u, rootDeviceIndexReserved); - EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager.getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 0); } @@ -328,17 +403,17 @@ TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressIsReserv rootDeviceIndices.pushUnique(1); uint32_t rootDeviceIndexReserved = 10; auto gmmHelper = memoryManager.getGmmHelper(1); - auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(1u, rootDeviceIndexReserved); - EXPECT_LE(memoryManager.getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager.getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager.getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 1); addressRange = memoryManager.reserveGpuAddress(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(1u, rootDeviceIndexReserved); - EXPECT_LE(memoryManager.getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager.getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager.getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager.getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager.freeGpuAddress(addressRange, 1); } @@ -350,10 +425,10 @@ TEST(OsAgnosticMemoryManager, givenOsAgnosticMemoryManagerWhenGpuAddressReservat rootDeviceIndices.pushUnique(0); uint32_t rootDeviceIndexReserved = 10; // emulate GPU address space exhaust - memoryManager.getGfxPartition(0)->heapInit(HeapIndex::heapStandard, 0x0, 0x10000); - auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, (size_t)(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard) * 2), rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + memoryManager.getGfxPartition(0)->heapInit(HeapIndex::heapStandard2MB, 0x0, 0x10000); + auto addressRange = memoryManager.reserveGpuAddressOnHeap(0ull, (size_t)(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB) * 2), rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(static_cast(addressRange.address), 0); - addressRange = memoryManager.reserveGpuAddress(0ull, (size_t)(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard) * 2), rootDeviceIndices, &rootDeviceIndexReserved); + addressRange = memoryManager.reserveGpuAddress(0ull, (size_t)(memoryManager.getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB) * 2), rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(static_cast(addressRange.address), 0); } diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index a968978118..2b2a7cff73 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -324,18 +324,18 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp rootDeviceIndices.pushUnique(1); uint32_t rootDeviceIndexReserved = 0; auto gmmHelper = memoryManager->getGmmHelper(1); - auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(rootDeviceIndexReserved, 1u); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 1); addressRange = memoryManager->reserveGpuAddress(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(rootDeviceIndexReserved, 1u); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 1); } @@ -345,19 +345,49 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp rootDeviceIndices.pushUnique(0); uint32_t rootDeviceIndexReserved = 1; auto gmmHelper = memoryManager->getGmmHelper(0); - auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); - EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 0); addressRange = memoryManager->reserveGpuAddress(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); - EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 0); } +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressIsReservedWithStartAddressOnSpecifiedHeapAndFreedThenAddressFromGfxPartitionIsUsed) { + auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + uint32_t rootDeviceIndexReserved = 10; + auto gmmHelper = memoryManager->getGmmHelper(0); + + const auto &gfxPartition = memoryManager->getGfxPartition(rootDeviceIndices[0]); + HeapIndex heap = HeapIndex::heapStandard64KB; + auto heapBase = gfxPartition->getHeapBase(heap); + const uint64_t requiredStartAddress = heapBase; + + auto alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(alignment, MemoryConstants::pageSize64k); + auto addressRange = memoryManager->reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + + memoryManager->freeGpuAddress(addressRange, 0); + + addressRange = memoryManager->reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard64KB), gmmHelper->decanonize(addressRange.address)); +} + TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDebugVariableToDisableAddressAlignmentAndCallToSelectAlignmentAndHeapWithPow2MemoryThenAlignmentIs2Mb) { if (!memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::heapExtended)) { GTEST_SKIP(); @@ -438,15 +468,60 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp if (gfxPartition->getHeapLimit(HeapIndex::heapExtended) > 0) { EXPECT_EQ(heap, HeapIndex::heapExtended); } else { - EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(heap, HeapIndex::heapStandard2MB); } - EXPECT_EQ(MemoryConstants::pageSize64k, alignment); + EXPECT_EQ(MemoryConstants::pageSize2M, alignment); auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); EXPECT_NE(static_cast(addressRange.address), 0); EXPECT_NE(static_cast(addressRange.size), 0); memoryManager->freeGpuAddress(addressRange, 0); } +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressReservationWithStartAddressIsAttemptedWithTheQuieriedHeapThenSuccessReturned) { + auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + uint32_t rootDeviceIndexReserved = 1; + + HeapIndex heap = HeapIndex::heapStandard; + auto gfxPartition = memoryManager->getGfxPartition(0); + EXPECT_NE(gfxPartition->getHeapLimit(heap), 0u); + auto heapBase = gfxPartition->getHeapBase(heap); + const uint64_t requiredStartAddress = heapBase + 64; + + auto alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard); + EXPECT_EQ(MemoryConstants::pageSize, alignment); + + auto addressRange = memoryManager->reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_NE(addressRange.address, 0u); + EXPECT_NE(addressRange.size, 0u); + memoryManager->freeGpuAddress(addressRange, 0); +} + +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenSelectAlignmentAndHeapWithInvalidStartAddressOnSpecifiedHeapIsCalledThenDefaultHeapAndAlignmentIsUsed) { + auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + + const auto &gfxPartition = memoryManager->getGfxPartition(rootDeviceIndices[0]); + + uint64_t maxHeapLimit = 0; + for (uint32_t heapIndex = static_cast(HeapIndex::heapInternalDeviceMemory); heapIndex < static_cast(HeapIndex::totalHeaps); ++heapIndex) { + maxHeapLimit = std::max(maxHeapLimit, gfxPartition->getHeapLimit(static_cast(heapIndex))); + } + const uint64_t requiredStartAddress = maxHeapLimit + 64; + + HeapIndex heap = HeapIndex::heapStandard64KB; + auto alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + if (gfxPartition->getHeapLimit(HeapIndex::heapExtended) > 0) { + EXPECT_EQ(heap, HeapIndex::heapExtended); + } else { + EXPECT_EQ(heap, HeapIndex::heapStandard2MB); + } + EXPECT_EQ(alignment, MemoryConstants::pageSize2M); +} + TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressReservationIsAttemptedWithSizeEqualToHalfOfHeapLimitThenSuccessReturned) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); auto gfxPartition = memoryManager->getGfxPartition(0); @@ -512,7 +587,7 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp RootDeviceIndicesContainer rootDeviceIndices; rootDeviceIndices.pushUnique(0); uint32_t rootDeviceIndexReserved = 1; - auto addressRange = memoryManager->reserveGpuAddressOnHeap(0x1234, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0x1234, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_NE(static_cast(addressRange.address), 0x1234); EXPECT_NE(static_cast(addressRange.size), 0); memoryManager->freeGpuAddress(addressRange, 0); @@ -522,6 +597,28 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp memoryManager->freeGpuAddress(addressRange, 0); } +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressIsReservedWithInvalidStartAddressOnSpecifiedHeapAndFreedThenSomeOtherAddressIsUsed) { + auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(0); + uint32_t rootDeviceIndexReserved = 10; + auto gmmHelper = memoryManager->getGmmHelper(0); + + const auto &gfxPartition = memoryManager->getGfxPartition(rootDeviceIndices[0]); + HeapIndex heap = HeapIndex::heapStandard64KB; + auto heapLimit = gfxPartition->getHeapLimit(heap); + const uint64_t requiredStartAddress = heapLimit + 64; + size_t alignment = MemoryConstants::pageSize64k; + + auto addressRange = memoryManager->reserveGpuAddressOnHeap(requiredStartAddress, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, heap, alignment); + EXPECT_EQ(0u, rootDeviceIndexReserved); + EXPECT_NE(requiredStartAddress, addressRange.address); + EXPECT_LE(memoryManager->getGfxPartition(0)->getHeapBase(heap), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(0)->getHeapLimit(heap), gmmHelper->decanonize(addressRange.address)); + + memoryManager->freeGpuAddress(addressRange, 0); +} + TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressReservationIsAttemptedWhichFailsThenNullRangeReturned) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); RootDeviceIndicesContainer rootDeviceIndices; @@ -529,27 +626,28 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp uint32_t rootDeviceIndexReserved = 1; // emulate GPU address space exhaust memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); - memoryManager->getGfxPartition(0)->heapInit(HeapIndex::heapStandard, 0x0, 0x10000); - size_t invalidSize = (size_t)memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard) + MemoryConstants::pageSize; - auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, invalidSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + memoryManager->getGfxPartition(0)->heapInit(HeapIndex::heapStandard2MB, 0x0, 0x10000); + size_t invalidSize = (size_t)memoryManager->getGfxPartition(0)->getHeapLimit(HeapIndex::heapStandard2MB) + MemoryConstants::pageSize; + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, invalidSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(static_cast(addressRange.address), 0); addressRange = memoryManager->reserveGpuAddress(0ull, invalidSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(static_cast(addressRange.address), 0); } -TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenHeapAndAlignmentRequestedWithoutAllExtendedHeapsForRootDevicesThenHeapStandardReturned) { +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenHeapAndAlignmentRequestedWithoutAllExtendedHeapsForRootDevicesThenHeapStandard2MBReturned) { auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); // emulate GPU address space exhaust memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); memoryManager->getGfxPartition(0)->heapInit(HeapIndex::heapExtended, 0x11000, 0x10000); memoryManager->getGfxPartition(1)->heapInit(HeapIndex::heapStandard, 0, 0x10000); + memoryManager->getGfxPartition(1)->heapInit(HeapIndex::heapStandard2MB, 0, 0x10000); memoryManager->getGfxPartition(1)->heapInit(HeapIndex::heapExtended, 0, 0); auto size = MemoryConstants::pageSize64k; HeapIndex heap = HeapIndex::heapStandard; auto alignment = memoryManager->selectAlignmentAndHeap(size, &heap); - EXPECT_EQ(heap, HeapIndex::heapStandard64KB); - EXPECT_EQ(MemoryConstants::pageSize64k, alignment); + EXPECT_EQ(heap, HeapIndex::heapStandard2MB); + EXPECT_EQ(MemoryConstants::pageSize2M, alignment); } TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenSmallSizeAndGpuAddressSetWhenGraphicsMemoryIsAllocatedThenAllocationWithSpecifiedGpuAddressInSystemMemoryIsCreated) { @@ -8649,34 +8747,58 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp rootDeviceIndices.pushUnique(1); uint32_t rootDeviceIndexReserved = 0; auto gmmHelper = memoryManager->getGmmHelper(1); - auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(rootDeviceIndexReserved, 1u); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); uint64_t requiredAddr = addressRange.address; memoryManager->freeGpuAddress(addressRange, 1); - addressRange = memoryManager->reserveGpuAddressOnHeap(requiredAddr, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard, MemoryConstants::pageSize64k); + addressRange = memoryManager->reserveGpuAddressOnHeap(requiredAddr, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); EXPECT_EQ(rootDeviceIndexReserved, 1u); EXPECT_EQ(addressRange.address, requiredAddr); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 1); addressRange = memoryManager->reserveGpuAddress(0ull, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(rootDeviceIndexReserved, 1u); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); requiredAddr = addressRange.address; memoryManager->freeGpuAddress(addressRange, 1); addressRange = memoryManager->reserveGpuAddress(requiredAddr, MemoryConstants::pageSize, rootDeviceIndices, &rootDeviceIndexReserved); EXPECT_EQ(rootDeviceIndexReserved, 1u); EXPECT_EQ(addressRange.address, requiredAddr); - EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); - EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard), gmmHelper->decanonize(addressRange.address)); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + memoryManager->freeGpuAddress(addressRange, 1); +} + +TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGpuAddressReservationIsAttemptedWithSizeGreaterThanSizeThreshold4MBInAKnownRegionThenRequiredAddressIsFoundAndUsed) { + auto memoryManager = std::make_unique(false, true, false, *executionEnvironment); + RootDeviceIndicesContainer rootDeviceIndices; + rootDeviceIndices.pushUnique(1); + uint32_t rootDeviceIndexReserved = 0; + auto gmmHelper = memoryManager->getGmmHelper(1); + + size_t sizeToAllocate = 128 * MemoryConstants::pageSize64k; // 8MB = 128 * 64KB + auto addressRange = memoryManager->reserveGpuAddressOnHeap(0ull, sizeToAllocate, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); + + EXPECT_EQ(rootDeviceIndexReserved, 1u); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + uint64_t requiredAddr = addressRange.address + sizeToAllocate; + memoryManager->freeGpuAddress(addressRange, 1); + addressRange = memoryManager->reserveGpuAddressOnHeap(requiredAddr, sizeToAllocate, rootDeviceIndices, &rootDeviceIndexReserved, NEO::HeapIndex::heapStandard2MB, MemoryConstants::pageSize64k); + + EXPECT_EQ(rootDeviceIndexReserved, 1u); + EXPECT_EQ(addressRange.address, requiredAddr); + EXPECT_LE(memoryManager->getGfxPartition(1)->getHeapBase(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); + EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::heapStandard2MB), gmmHelper->decanonize(addressRange.address)); memoryManager->freeGpuAddress(addressRange, 1); } diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 2744d6b121..76a3c6ea1e 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -2683,6 +2683,48 @@ class WddmMemoryManagerTest : public ::Test { const uint32_t rootDeviceIndex = 0u; }; +TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenSelectAlignmentWithInvalidStartAddressHintThenDefaultHeapAndAlignmentBasedOnSizeReturned) { + const auto &gfxPartition = memoryManager->getGfxPartition(rootDeviceIndex); + uint64_t maxHeapLimit = 0; + for (uint32_t heapIndex = static_cast(HeapIndex::heapInternalDeviceMemory); heapIndex < static_cast(HeapIndex::totalHeaps); ++heapIndex) { + maxHeapLimit = std::max(maxHeapLimit, gfxPartition->getHeapLimit(static_cast(heapIndex))); + } + + uint64_t requiredStartAddress = maxHeapLimit + 64; + + size_t size = 16 * MemoryConstants::megaByte; + HeapIndex heap = HeapIndex::heapStandard; + auto alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, size, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(MemoryConstants::pageSize2M, alignment); + + size = MemoryConstants::pageSize64k; + heap = HeapIndex::heapStandard; + alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, size, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(MemoryConstants::pageSize64k, alignment); + + size = MemoryConstants::pageSize; + heap = HeapIndex::heapStandard; + alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, size, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(MemoryConstants::pageSize64k, alignment); +} + +TEST_F(WddmMemoryManagerTest, givenWddmMemoryManagerWhenSelectAlignmentWithValidStartAddressHintThenCorrectHeapAndAlignmentSizeReturned) { + const auto &gfxPartition = memoryManager->getGfxPartition(rootDeviceIndex); + HeapIndex heap = HeapIndex::heapStandard64KB; + auto isInit = gfxPartition->isHeapInitialized(heap); + EXPECT_TRUE(isInit); + + auto heapBase = gfxPartition->getHeapBase(heap); + const uint64_t requiredStartAddress = heapBase; + + auto alignment = memoryManager->selectAlignmentAndHeap(requiredStartAddress, MemoryConstants::pageSize, &heap); + EXPECT_EQ(heap, HeapIndex::heapStandard64KB); + EXPECT_EQ(alignment, MemoryConstants::pageSize64k); +} + TEST_F(WddmMemoryManagerTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhencreateWddmAllocationFailsThenGraphicsAllocationIsNotCreated) { char hostPtr[64]; memoryManager->setDeferredDeleter(nullptr); diff --git a/shared/test/unit_test/utilities/heap_allocator_tests.cpp b/shared/test/unit_test/utilities/heap_allocator_tests.cpp index ae572a6472..e4985a5f79 100644 --- a/shared/test/unit_test/utilities/heap_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/heap_allocator_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -29,18 +29,41 @@ class HeapAllocatorUnderTest : public HeapAllocator { uint64_t getRightBound() const { return this->pRightBound; } uint64_t getavailableSize() const { return this->availableSize; } size_t getThresholdSize() const { return this->sizeThreshold; } + + using HeapAllocator::allocationAlignment; + using HeapAllocator::availableSize; using HeapAllocator::defragment; + using HeapAllocator::pLeftBound; + using HeapAllocator::pRightBound; + + uint64_t getFromFreedChunksWithStartAddressHint(const uint64_t requiredStartAddress, size_t size, std::vector &freedChunks) { + if (failGetFromFreedChunksWithStartAddressHintCall) { + return 0ULL; + } + + if (getFromFreedChunksWithStartAddressHintCall) { + return HeapAllocator::getFromFreedChunksWithStartAddressHint(requiredStartAddress, size, freedChunks); + } + + return allocationAlignment; + } uint64_t getFromFreedChunks(size_t size, std::vector &vec, size_t requiredAlignment) { return HeapAllocator::getFromFreedChunks(size, vec, sizeOfFreedChunk, requiredAlignment); } + + void free(uint64_t ptr, size_t size) { + HeapAllocator::free(ptr, size); + } + void storeInFreedChunks(uint64_t ptr, size_t size, std::vector &vec) { return HeapAllocator::storeInFreedChunks(ptr, size, vec); } std::vector &getFreedChunksSmall() { return this->freedChunksSmall; }; std::vector &getFreedChunksBig() { return this->freedChunksBig; }; - using HeapAllocator::allocationAlignment; size_t sizeOfFreedChunk = 0; + bool getFromFreedChunksWithStartAddressHintCall = true; + bool failGetFromFreedChunksWithStartAddressHintCall = false; }; TEST(HeapAllocatorTest, WhenHeapAllocatorIsCreatedWithAlignmentThenAlignmentIsSet) { @@ -223,6 +246,169 @@ TEST(HeapAllocatorTest, GivenMoreThanTwiceBiggerSizeChunksButSmallerThanTwiceAli EXPECT_EQ(0u, freedChunks.size()); } +TEST(HeapAllocatorTest, GivenExactMatchInFreedChunksWhenGettingWithStartAddressHintThenCorrectChunkIsReturned) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x101000llu; + size_t chunkSize = MemoryConstants::pageSize * 2; + + freedChunks.emplace_back(requiredStartAddress, chunkSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, chunkSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_TRUE(freedChunks.empty()); +} + +TEST(HeapAllocatorTest, GivenNoExactMatchInFreedChunksWhenGettingWithStartAddressHintThenZeroIsReturned) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x101000llu; + size_t chunkSize = MemoryConstants::pageSize * 2; + + freedChunks.emplace_back(0x102000llu, chunkSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, chunkSize, freedChunks); + + EXPECT_EQ(0llu, returnedAddress); + EXPECT_EQ(1u, freedChunks.size()); +} + +TEST(HeapAllocatorTest, GivenLargerChunkInFreedChunksWhenGettingWithStartAddressHintThenChunkIsSplitAndCorrectAddressIsReturned) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x101000llu; + size_t chunkSize = MemoryConstants::pageSize * 2; + + freedChunks.emplace_back(requiredStartAddress, chunkSize * 2); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, chunkSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_EQ(1u, freedChunks.size()); + EXPECT_EQ(requiredStartAddress + chunkSize, freedChunks[0].ptr); + EXPECT_EQ(chunkSize, freedChunks[0].size); +} + +TEST(HeapAllocatorTest, GivenSmallerChunkInFreedChunksWhenGettingWithStartAddressHintThenZeroIsReturned) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x101000llu; + size_t chunkSize = MemoryConstants::pageSize * 2; + + freedChunks.emplace_back(requiredStartAddress, MemoryConstants::pageSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, chunkSize, freedChunks); + + EXPECT_EQ(0llu, returnedAddress); + EXPECT_EQ(1u, freedChunks.size()); +} + +TEST(HeapAllocatorTest, GivenMultipleChunksInFreedChunksWhenGettingWithStartAddressHintThenCorrectChunkIsReturned) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x103000llu; + size_t chunkSize = MemoryConstants::pageSize * 2; + + freedChunks.emplace_back(0x101000llu, MemoryConstants::pageSize); + freedChunks.emplace_back(requiredStartAddress, chunkSize); + freedChunks.emplace_back(0x105000llu, MemoryConstants::pageSize * 3); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, chunkSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_EQ(2u, freedChunks.size()); + EXPECT_EQ(0x101000llu, freedChunks[0].ptr); + EXPECT_EQ(0x105000llu, freedChunks[1].ptr); +} + +TEST(HeapAllocatorTest, GivenChunkWithSmallTrailingSizeWhenGetFromFreedChunksWithStartAddressHintThenChunkIsSplitAndRemainingSpaceIsStored) { + std::vector freedChunks; + uint64_t chunkStartAddress = 0x101000llu; + size_t requestedSize = MemoryConstants::pageSize; + size_t chunkSize = requestedSize + 4; // Just slightly larger than the requested size + + freedChunks.emplace_back(chunkStartAddress, chunkSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + + uint64_t requiredStartAddress = chunkStartAddress + 1; + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, requestedSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_EQ(2u, freedChunks.size()); + EXPECT_EQ(requiredStartAddress - 1, freedChunks[0].ptr); + EXPECT_EQ(requiredStartAddress + requestedSize, freedChunks[1].ptr); + EXPECT_EQ(1u, freedChunks[0].size); // Leading size is just 1 byte + EXPECT_EQ(3u, freedChunks[1].size); // Trailing size is just 3 bytes +} + +TEST(HeapAllocatorTest, GivenChunkWithLeadingSizeAndNoTrailingSizeWhenGetFromFreedChunksWithStartAddressHintThenChunkIsSplitAndNoRemainingSpaceIsStored) { + std::vector freedChunks; + uint64_t chunkStartAddress = 0x101000llu; + size_t requestedSize = MemoryConstants::pageSize; + size_t chunkSize = requestedSize + 1; // Just enough for requested size plus 1 byte leading offset + + freedChunks.emplace_back(chunkStartAddress, chunkSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + + uint64_t requiredStartAddress = chunkStartAddress + 1; + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, requestedSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_EQ(1u, freedChunks.size()); + EXPECT_EQ(requiredStartAddress - 1, freedChunks[0].ptr); + EXPECT_EQ(1u, freedChunks[0].size); // Leading size is just 1 byte +} + +TEST(HeapAllocatorTest, GivenMultipleChunksAndTrailingSizeWhenGetFromFreedChunksWithStartAddressHintThenCorrectChunkIsSplitAndRemainingSpaceIsStored) { + std::vector freedChunks; + uint64_t requiredStartAddress = 0x103000llu; + size_t requestedSize = MemoryConstants::pageSize; + size_t chunkSize = 3 * MemoryConstants::pageSize; + + freedChunks.emplace_back(0x101000llu, MemoryConstants::pageSize); + freedChunks.emplace_back(requiredStartAddress, chunkSize); + freedChunks.emplace_back(0x105000llu, 2 * MemoryConstants::pageSize); + + uint64_t ptrBase = 0x100000llu; + size_t size = 1024 * 4096; + + auto heapAllocator = std::make_unique(ptrBase, size, allocationAlignment, sizeThreshold); + uint64_t returnedAddress = heapAllocator->getFromFreedChunksWithStartAddressHint(requiredStartAddress, requestedSize, freedChunks); + + EXPECT_EQ(requiredStartAddress, returnedAddress); + EXPECT_EQ(3u, freedChunks.size()); + EXPECT_EQ(0x101000llu, freedChunks[0].ptr); + EXPECT_EQ(MemoryConstants::pageSize, freedChunks[0].size); + EXPECT_EQ(requiredStartAddress + requestedSize, freedChunks[1].ptr); + EXPECT_EQ(chunkSize - requestedSize, freedChunks[1].size); + EXPECT_EQ(0x105000llu, freedChunks[2].ptr); + EXPECT_EQ(2 * MemoryConstants::pageSize, freedChunks[2].size); +} + TEST(HeapAllocatorTest, GivenStoredChunkAdjacentToLeftBoundaryOfIncomingChunkWhenStoreIsCalledThenChunkIsMerged) { uint64_t ptrBase = 0x100000llu; size_t size = 1024 * 4096; @@ -1438,6 +1624,109 @@ TEST(HeapAllocatorTest, givenZeroAlignmentPassedWhenAllocatingMemoryWithCustomAl EXPECT_EQ(alignUp(heapBase, allocationAlignment), ptr); } +TEST(HeapAllocatorTest, givenAllocateWithCustomAlignmentWithStartAddressHintAndStartAddressNotAvailableThenAddressReservationIsSuccessful) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * MemoryConstants::megaByte; + const size_t sizeThreshold = 4 * MemoryConstants::megaByte; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + EXPECT_EQ(heapBase, heapAllocator.getLeftBound()); + + heapAllocator.pLeftBound += 10u * sizeThreshold; // Move left bound to the right. + heapAllocator.pLeftBound = alignUp(heapAllocator.pLeftBound, allocationAlignment); // Align left bound. + + heapAllocator.pRightBound -= 10u * allocationAlignment; // Move right bound to the right. + heapAllocator.pRightBound = alignUp(heapAllocator.pRightBound, allocationAlignment); // Align right bound. + + uint64_t requiredStartAddress = alignUp(heapAllocator.getBaseAddress(), allocationAlignment); + size_t ptrSize = 32u; + uint64_t ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_NE(ptr, 0u); + + requiredStartAddress = alignUp(heapAllocator.pRightBound + allocationAlignment, allocationAlignment); + ptrSize = 32u; + heapAllocator.getFromFreedChunksWithStartAddressHintCall = false; + ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_EQ(ptr, allocationAlignment); +} + +TEST(HeapAllocatorTest, givenLargeAllocationWhenAllocateWithCustomAlignmentWithStartAddressHintThenMisalignmentStoredInFreeChunksAndAddressReservationIsSuccessful) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * MemoryConstants::megaByte; + const size_t sizeThreshold = 4 * MemoryConstants::megaByte; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + EXPECT_EQ(heapBase, heapAllocator.getLeftBound()); + + heapAllocator.pLeftBound += 10u * sizeThreshold; // Move left bound to the right. + heapAllocator.pLeftBound = alignUp(heapAllocator.pLeftBound, allocationAlignment); // Align left bound. + + heapAllocator.pRightBound -= 10u * allocationAlignment; // Move right bound to the right. + heapAllocator.pRightBound = alignUp(heapAllocator.pRightBound, allocationAlignment); // Align right bound. + + uint64_t requiredStartAddress = heapAllocator.pLeftBound + allocationAlignment; + size_t ptrSize = (2 * sizeThreshold); + uint64_t ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_EQ(ptr, requiredStartAddress); + + std::vector &freedChunksBig = heapAllocator.getFreedChunksBig(); + EXPECT_EQ(1u, freedChunksBig.size()); + EXPECT_EQ(requiredStartAddress - allocationAlignment, freedChunksBig[0].ptr); +} + +TEST(HeapAllocatorTest, givenLargeAllocationWhenAllocateWithCustomAlignmentWithStartAddressHintThenNoMisalignmentStoredInFreeChunksAndAddressReservationIsSuccessful) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * MemoryConstants::megaByte; + const size_t sizeThreshold = 4 * MemoryConstants::megaByte; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + EXPECT_EQ(heapBase, heapAllocator.getLeftBound()); + + heapAllocator.pLeftBound += 10u * sizeThreshold; // Move left bound to the right. + heapAllocator.pLeftBound = alignUp(heapAllocator.pLeftBound, allocationAlignment); // Align left bound. + + heapAllocator.pRightBound -= 10u * allocationAlignment; // Move right bound to the right. + heapAllocator.pRightBound = alignUp(heapAllocator.pRightBound, allocationAlignment); // Align right bound. + + uint64_t requiredStartAddress = heapAllocator.pLeftBound; + size_t ptrSize = (2 * sizeThreshold); + uint64_t ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_EQ(ptr, requiredStartAddress); + + std::vector &freedChunksBig = heapAllocator.getFreedChunksBig(); + EXPECT_EQ(0u, freedChunksBig.size()); +} + +TEST(HeapAllocatorTest, givenLargeAllocationAndNotEnoughSpaceAtRequiredStartAddressWhenAllocateWithCustomAlignmentWithStartAddressHintThenSomeOtherAddressReserved) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * MemoryConstants::megaByte; + const size_t sizeThreshold = 4 * MemoryConstants::megaByte; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + EXPECT_EQ(heapBase, heapAllocator.getLeftBound()); + + heapAllocator.pRightBound -= 10u * allocationAlignment; // Move right bound to the right. + heapAllocator.pRightBound = alignUp(heapAllocator.pRightBound, allocationAlignment); // Align right bound. + + heapAllocator.pLeftBound = heapAllocator.pRightBound - sizeThreshold; // Move left bound near the right bound. + heapAllocator.pLeftBound = alignUp(heapAllocator.pLeftBound, allocationAlignment); // Align left bound. + + uint64_t requiredStartAddress = heapAllocator.pLeftBound + allocationAlignment; + size_t ptrSize = (2 * sizeThreshold); + uint64_t ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_NE(ptr, requiredStartAddress); +} + +TEST(HeapAllocatorTest, givenLargeAllocationAndNotEnoughSpaceWhenAllocateWithCustomAlignmentWithStartAddressHintThenErrorReturned) { + const uint64_t heapBase = 0x111111llu; + const size_t heapSize = 1024u * MemoryConstants::megaByte; + const size_t sizeThreshold = 4 * MemoryConstants::megaByte; + HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); + EXPECT_EQ(heapBase, heapAllocator.getLeftBound()); + + uint64_t requiredStartAddress = heapAllocator.pLeftBound; + size_t ptrSize = (2 * sizeThreshold); + heapAllocator.availableSize = 0; + uint64_t ptr = heapAllocator.allocateWithCustomAlignmentWithStartAddressHint(requiredStartAddress, ptrSize, 0u); + EXPECT_EQ(ptr, 0u); +} + TEST(HeapAllocatorTest, whenGetBaseAddressIsCalledThenReturnInitialBaseAddress) { const uint64_t heapBase = 0x100000llu; const size_t heapSize = 16 * MemoryConstants::megaByte; @@ -1453,4 +1742,4 @@ TEST(HeapAllocatorTest, whenGetBaseAddressIsCalledThenReturnInitialBaseAddress) size_t smallChunk = 4096; EXPECT_NE(0u, heapAllocator.allocate(smallChunk)); EXPECT_EQ(heapBase, heapAllocator.getBaseAddress()); -} \ No newline at end of file +}