From 4de792cee0ae5d896c667d2b025653aaeccc3acd Mon Sep 17 00:00:00 2001 From: "Lu, Wenbin" Date: Fri, 16 Jun 2023 17:55:01 +0000 Subject: [PATCH] fix: support alignments in host and shared UnifiedMemoryAllocation Related-To: LOCI-4334 Signed-off-by: Lu, Wenbin --- .../unit_tests/sources/memory/test_memory.cpp | 63 ++++++++++++++++++- .../os_agnostic_memory_manager.cpp | 3 +- .../memory_manager/unified_memory_manager.cpp | 15 ++--- .../os_interface/linux/drm_memory_manager.cpp | 5 +- shared/source/utilities/heap_allocator.cpp | 2 +- .../unified_memory_manager_tests.cpp | 58 ++++++++++++++++- .../linux/drm_memory_manager_tests.cpp | 24 +++++++ 7 files changed, 157 insertions(+), 13 deletions(-) diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 2039b5be56..581a9f9778 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -1174,7 +1174,7 @@ TEST_F(ZexHostPointerTests, whenAllocatingSharedMemoryWithUseHostPtrFlagThenCrea EXPECT_EQ(ZE_RESULT_SUCCESS, result); } -TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectly) { +TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) { const size_t size = 1; ze_device_mem_alloc_desc_t deviceDesc = {}; @@ -1193,6 +1193,67 @@ TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectly) { ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); + if (alignment != 0) { + EXPECT_EQ(reinterpret_cast(ptr) & (~(alignment - 1)), reinterpret_cast(ptr)); + } + result = context->freeMem(ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } while (alignment != 0); +} + +TEST_F(MemoryTest, whenAllocatingHostMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) { + const size_t size = 1; + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + + auto memoryManager = static_cast(neoDevice->getMemoryManager()); + + size_t alignment = 8 * MemoryConstants::megaByte; + do { + alignment >>= 1; + memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) { + EXPECT_EQ(properties.alignment, alignUp(alignment, MemoryConstants::pageSize)); + }; + void *ptr = nullptr; + ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + if (alignment != 0) { + EXPECT_EQ(reinterpret_cast(ptr) & (~(alignment - 1)), reinterpret_cast(ptr)); + } + result = context->freeMem(ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } while (alignment != 0); +} + +TEST_F(MemoryTest, whenAllocatingSharedMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) { + const size_t size = 1; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.pNext = nullptr; + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + + auto memoryManager = static_cast(neoDevice->getMemoryManager()); + + size_t alignment = 8 * MemoryConstants::megaByte; + do { + alignment >>= 1; + memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) { + EXPECT_EQ(properties.alignment, alignUp(alignment, MemoryConstants::pageSize64k)); + }; + void *ptr = nullptr; + ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + if (alignment != 0) { + EXPECT_EQ(reinterpret_cast(ptr) & (~(alignment - 1)), reinterpret_cast(ptr)); + } result = context->freeMem(ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } while (alignment != 0); diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.cpp b/shared/source/memory_manager/os_agnostic_memory_manager.cpp index 919eb71e18..1047374843 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/shared/source/memory_manager/os_agnostic_memory_manager.cpp @@ -457,7 +457,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateMemoryByKMD(const Allocatio GraphicsAllocation *alloc = nullptr; - auto ptr = allocateSystemMemory(alignUp(allocationData.size, MemoryConstants::pageSize), MemoryConstants::pageSize); + const size_t alignment = std::max(allocationData.alignment, MemoryConstants::pageSize); + auto ptr = allocateSystemMemory(alignUp(allocationData.size, alignment), alignment); if (ptr != nullptr) { alloc = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast(ptr), allocationData.size, counter, MemoryPool::SystemCpuInaccessible, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false); diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index a96d9ef57e..ac08fb7920 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -195,8 +195,8 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationPropertie void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) { - size_t pageSizeForAlignment = MemoryConstants::pageSize; - size_t alignedSize = alignUp(size, pageSizeForAlignment); + size_t pageSizeForAlignment = alignUp(memoryProperties.alignment, MemoryConstants::pageSize); + size_t alignedSize = alignUp(size, MemoryConstants::pageSize); bool compressionEnabled = false; AllocationType allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, compressionEnabled); @@ -213,6 +213,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size, false, (deviceBitfield.count() > 1) && multiOsContextSupport, deviceBitfield}; + unifiedMemoryProperties.alignment = pageSizeForAlignment; unifiedMemoryProperties.flags.preferCompressed = compressionEnabled; unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable; unifiedMemoryProperties.flags.isUSMHostAllocation = true; @@ -375,8 +376,8 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex(); auto &deviceBitfield = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex); - size_t pageSizeForAlignment = 2 * MemoryConstants::megaByte; - size_t alignedSize = alignUp(size, pageSizeForAlignment); + size_t pageSizeForAlignment = std::max(alignUp(unifiedMemoryProperties.alignment, MemoryConstants::pageSize2M), MemoryConstants::pageSize2M); + size_t alignedSize = alignUp(size, MemoryConstants::pageSize2M); AllocationProperties gpuProperties{rootDeviceIndex, true, alignedSize, @@ -577,15 +578,15 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex(); auto externalPtr = reinterpret_cast(unifiedMemoryProperties.allocationFlags.hostptr); bool useExternalHostPtrForCpu = externalPtr != nullptr; - constexpr auto pageSizeForAlignment = MemoryConstants::pageSize64k; - size_t alignedSize = alignUp(size, pageSizeForAlignment); + const auto pageSizeForAlignment = alignUp(unifiedMemoryProperties.alignment, MemoryConstants::pageSize64k); + size_t alignedSize = alignUp(size, MemoryConstants::pageSize64k); DeviceBitfield subDevices = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex); AllocationProperties cpuProperties{rootDeviceIndex, !useExternalHostPtrForCpu, // allocateMemory alignedSize, AllocationType::SVM_CPU, false, // isMultiStorageAllocation subDevices}; - cpuProperties.alignment = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment(); + cpuProperties.alignment = std::max(pageSizeForAlignment, memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment()); cpuProperties.flags.isUSMHostAllocation = useExternalHostPtrForCpu; cpuProperties.forceKMDAllocation = true; cpuProperties.makeGPUVaDifferentThanCPUPtr = true; diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index d289e621f9..f75479d7cc 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -619,7 +619,7 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData & auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmHelper(), allocationData.hostPtr, allocationData.size, 0u, CacheSettingsHelper::getGmmUsageType(allocationData.type, allocationData.flags.uncacheable, productHelper), false, systemMemoryStorageInfo, true); size_t bufferSize = allocationData.size; - uint64_t gpuRange = acquireGpuRange(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB); + uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB, allocationData.alignment); GemCreate create{}; create.size = bufferSize; @@ -1602,6 +1602,9 @@ AllocationStatus getGpuAddress(const AlignmentSelector &alignmentSelector, HeapA alignment.heap = HeapIndex::HEAP_EXTENDED; } + if (alignment.alignment < allocationData.alignment) { + alignment.alignment = allocationData.alignment; + } gpuAddress = gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignment(alignment.heap, sizeAllocated, alignment.alignment)); break; } diff --git a/shared/source/utilities/heap_allocator.cpp b/shared/source/utilities/heap_allocator.cpp index b27114889a..2c1a7cd129 100644 --- a/shared/source/utilities/heap_allocator.cpp +++ b/shared/source/utilities/heap_allocator.cpp @@ -19,7 +19,7 @@ bool operator<(const HeapChunk &hc1, const HeapChunk &hc2) { } uint64_t HeapAllocator::allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) { - if (alignment == 0) { + if (alignment < this->allocationAlignment) { alignment = this->allocationAlignment; } diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 65ae88c1e6..d2145d1b7d 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -267,7 +267,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenForceMemoryPrefetchForKmdMigratedShared svmManager->freeSVMAlloc(ptr); } -TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocationsAreAlignedCorrectly) { +TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenUnifiedMemoryAllocationsAreAlignedCorrectly) { std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); auto device = deviceFactory->rootDevices[0]; auto memoryManager = static_cast(device->getMemoryManager()); @@ -281,7 +281,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocat memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) { EXPECT_EQ(properties.alignment, alignUp(alignment, MemoryConstants::pageSize64k)); }; - SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields); + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields); unifiedMemoryProperties.device = device; auto ptr = svmManager->createUnifiedMemoryAllocation(1, unifiedMemoryProperties); EXPECT_NE(nullptr, ptr); @@ -291,3 +291,57 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocat svmManager->freeSVMAlloc(ptr); } while (alignment != 0); } + +TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenHostUnifiedMemoryAllocationsAreAlignedCorrectly) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + auto device = deviceFactory->rootDevices[0]; + auto memoryManager = static_cast(device->getMemoryManager()); + auto svmManager = std::make_unique(memoryManager, false); + auto csr = std::make_unique(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + csr->setupContext(*device->getDefaultEngine().osContext); + + size_t alignment = 8 * MemoryConstants::megaByte; + do { + alignment >>= 1; + memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) { + EXPECT_EQ(properties.alignment, alignUp(alignment, MemoryConstants::pageSize)); + }; + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto ptr = svmManager->createHostUnifiedMemoryAllocation(1, unifiedMemoryProperties); + EXPECT_NE(nullptr, ptr); + if (alignment != 0) { + EXPECT_EQ(reinterpret_cast(ptr) & (~(alignment - 1)), reinterpret_cast(ptr)); + } + svmManager->freeSVMAlloc(ptr); + } while (alignment != 0); +} + +TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocationsAreAlignedCorrectly) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + auto device = deviceFactory->rootDevices[0]; + auto memoryManager = static_cast(device->getMemoryManager()); + auto svmManager = std::make_unique(memoryManager, false); + auto csr = std::make_unique(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); + csr->setupContext(*device->getDefaultEngine().osContext); + + void *cmdQ = reinterpret_cast(0x12345); + auto mockPageFaultManager = new MockPageFaultManager(); + memoryManager->pageFaultManager.reset(mockPageFaultManager); + + size_t alignment = 8 * MemoryConstants::megaByte; + do { + alignment >>= 1; + memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) { + EXPECT_EQ(properties.alignment, alignUp(alignment, MemoryConstants::pageSize64k)); + }; + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto ptr = svmManager->createSharedUnifiedMemoryAllocation(1, unifiedMemoryProperties, cmdQ); + EXPECT_NE(nullptr, ptr); + if (alignment != 0) { + EXPECT_EQ(reinterpret_cast(ptr) & (~(alignment - 1)), reinterpret_cast(ptr)); + } + svmManager->freeSVMAlloc(ptr); + } while (alignment != 0); +} diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 9fee3203bd..d95d82de89 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1624,6 +1624,30 @@ TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAlloc memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerTest, GivenSizeAndAlignmentWhenAskedToCreateGraphicsAllocationThenValidAllocationIsReturnedAndMemoryIsAligned) { + allocationData.size = 1; + int ioctlCnt = 0; + size_t alignment = 8 * MemoryConstants::megaByte; + + do { + alignment >>= 1; + allocationData.alignment = alignment; + auto allocation = memoryManager->allocateMemoryByKMD(allocationData); + EXPECT_NE(nullptr, allocation); + auto gpuAddr = allocation->getGpuAddress(); + EXPECT_NE(0u, gpuAddr); + if (alignment != 0) { + EXPECT_EQ(gpuAddr & (~(alignment - 1)), gpuAddr); + } + memoryManager->freeGraphicsMemory(allocation); + ioctlCnt += 1; + } while (alignment != 0); + + mock->ioctlExpected.gemCreate = ioctlCnt; + mock->ioctlExpected.gemWait = ioctlCnt; + mock->ioctlExpected.gemClose = ioctlCnt; +} + TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllocationThenItContainsAllFragmentsWithProperGpuAdrresses) { mock->ioctlExpected.gemUserptr = 3; mock->ioctlExpected.gemWait = 3;