From fe82425fa94273829ff94fc77a6af1a849ca01bb Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Mon, 4 Nov 2024 14:41:41 +0000 Subject: [PATCH] performance: Prefer to assign new gpu va rather than reuse Resolves: NEO-10734 Signed-off-by: Lukasz Jobczyk --- shared/source/utilities/heap_allocator.cpp | 48 +++++++------- .../linux/drm_memory_manager_tests.cpp | 64 ------------------- .../utilities/heap_allocator_tests.cpp | 58 +++++++---------- 3 files changed, 49 insertions(+), 121 deletions(-) diff --git a/shared/source/utilities/heap_allocator.cpp b/shared/source/utilities/heap_allocator.cpp index 0b06c66e2d..37ca47f424 100644 --- a/shared/source/utilities/heap_allocator.cpp +++ b/shared/source/utilities/heap_allocator.cpp @@ -36,32 +36,34 @@ uint64_t HeapAllocator::allocateWithCustomAlignment(size_t &sizeToAllocate, size uint32_t defragmentCount = 0; for (;;) { - size_t sizeOfFreedChunk = 0; - uint64_t ptrReturn = getFromFreedChunks(sizeToAllocate, freedChunks, sizeOfFreedChunk, alignment); + uint64_t ptrReturn = 0llu; - if (ptrReturn == 0llu) { - if (sizeToAllocate > sizeThreshold) { - const uint64_t misalignment = alignUp(pLeftBound, alignment) - pLeftBound; - if (pLeftBound + misalignment + sizeToAllocate <= pRightBound) { - if (misalignment) { - storeInFreedChunks(pLeftBound, static_cast(misalignment), freedChunks); - pLeftBound += misalignment; - } - ptrReturn = pLeftBound; - pLeftBound += sizeToAllocate; - } - } else { - const uint64_t pStart = pRightBound - sizeToAllocate; - const uint64_t misalignment = pStart - alignDown(pStart, alignment); - if (pLeftBound + sizeToAllocate + misalignment <= pRightBound) { - if (misalignment) { - pRightBound -= misalignment; - storeInFreedChunks(pRightBound, static_cast(misalignment), freedChunks); - } - pRightBound -= sizeToAllocate; - ptrReturn = pRightBound; + if (sizeToAllocate > sizeThreshold) { + const uint64_t misalignment = alignUp(pLeftBound, alignment) - pLeftBound; + if (pLeftBound + misalignment + sizeToAllocate <= pRightBound) { + if (misalignment) { + storeInFreedChunks(pLeftBound, static_cast(misalignment), freedChunks); + pLeftBound += misalignment; } + ptrReturn = pLeftBound; + pLeftBound += sizeToAllocate; } + } else { + const uint64_t pStart = pRightBound - sizeToAllocate; + const uint64_t misalignment = pStart - alignDown(pStart, alignment); + if (pLeftBound + sizeToAllocate + misalignment <= pRightBound) { + if (misalignment) { + pRightBound -= misalignment; + storeInFreedChunks(pRightBound, static_cast(misalignment), freedChunks); + } + pRightBound -= sizeToAllocate; + ptrReturn = pRightBound; + } + } + + size_t sizeOfFreedChunk = 0; + if (ptrReturn == 0llu) { + ptrReturn = getFromFreedChunks(sizeToAllocate, freedChunks, sizeOfFreedChunk, alignment); } if (ptrReturn != 0llu) { diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index b0fefef82f..f509f99807 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -2655,37 +2655,6 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndUnifiedAuxCapableAllocation memoryManager->freeGraphicsMemory(allocation); } -TEST_F(DrmMemoryManagerTest, given32BitAllocatorWithHeapAllocatorWhenLargerFragmentIsReusedThenOnlyUnmapSizeIsLargerWhileSizeStaysTheSame) { - mock->ioctlExpected.gemUserptr = 1; - mock->ioctlExpected.gemWait = 1; - mock->ioctlExpected.gemClose = 1; - - DebugManagerStateRestore dbgFlagsKeeper; - memoryManager->setForce32BitAllocations(true); - - size_t allocationSize = 4 * MemoryConstants::pageSize; - auto ptr = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::heapExternal, allocationSize); - size_t smallAllocationSize = MemoryConstants::pageSize; - memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(HeapIndex::heapExternal, smallAllocationSize); - - // now free first allocation , this will move it to chunks - memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(HeapIndex::heapExternal, ptr, allocationSize); - - // now ask for 3 pages, this will give ptr from chunks - size_t pages3size = 3 * MemoryConstants::pageSize; - - void *hostPtr = reinterpret_cast(0x1000); - DrmAllocation *graphicsAlloaction = memoryManager->allocate32BitGraphicsMemory(rootDeviceIndex, pages3size, hostPtr, AllocationType::buffer); - - auto bo = graphicsAlloaction->getBO(); - EXPECT_EQ(pages3size, bo->peekSize()); - - auto gmmHelper = device->getGmmHelper(); - EXPECT_EQ(gmmHelper->canonize(ptr), graphicsAlloaction->getGpuAddress()); - - memoryManager->freeGraphicsMemory(graphicsAlloaction); -} - TEST_F(DrmMemoryManagerTest, givenSharedAllocationWithSmallerThenRealSizeWhenCreateIsCalledThenRealSizeIsUsed) { unsigned int realSize = 64 * 1024; VariableBackup lseekBackup(&SysCalls::lseekReturn, realSize); @@ -7236,39 +7205,6 @@ TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenUnsuppor } } -TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenOversizedAllocationWhenGraphicsAllocationInDevicePoolIsAllocatedThenAllocationAndBufferObjectHaveRequestedSize) { - auto heap = HeapIndex::heapStandard64KB; - if (memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::heapExtended)) { - heap = HeapIndex::heapExtended; - } - auto largerSize = 6 * MemoryConstants::megaByte; - - auto gpuAddress0 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocateWithCustomAlignment(heap, largerSize, MemoryConstants::pageSize2M); - EXPECT_NE(0u, gpuAddress0); - EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); - auto gpuAddress1 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(heap, largerSize); - EXPECT_NE(0u, gpuAddress1); - EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); - auto gpuAddress2 = memoryManager->getGfxPartition(rootDeviceIndex)->heapAllocate(heap, largerSize); - EXPECT_NE(0u, gpuAddress2); - EXPECT_EQ(6 * MemoryConstants::megaByte, largerSize); - memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(heap, gpuAddress1, largerSize); - - auto status = MemoryManager::AllocationStatus::Error; - AllocationData allocData; - allocData.size = 5 * MemoryConstants::megaByte; - allocData.type = AllocationType::buffer; - allocData.rootDeviceIndex = rootDeviceIndex; - auto allocation = memoryManager->allocateGraphicsMemoryInDevicePool(allocData, status); - memoryManager->getGfxPartition(rootDeviceIndex)->heapFree(heap, gpuAddress2, largerSize); - EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); - ASSERT_NE(nullptr, allocation); - EXPECT_EQ(largerSize, allocation->getReservedAddressSize()); - EXPECT_EQ(allocData.size, allocation->getUnderlyingBufferSize()); - EXPECT_EQ(allocData.size, static_cast(allocation)->getBO()->peekSize()); - memoryManager->freeGraphicsMemory(allocation); -} - TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenAllocationsThatAreAlignedToPowerOf2InSizeAndAreGreaterThen8GBThenTheyAreAlignedToPreviousPowerOfTwoForGpuVirtualAddress) { if (!memoryManager->getGfxPartition(rootDeviceIndex)->getHeapLimit(HeapIndex::heapExtended)) { GTEST_SKIP(); diff --git a/shared/test/unit_test/utilities/heap_allocator_tests.cpp b/shared/test/unit_test/utilities/heap_allocator_tests.cpp index 397e3e33da..8dd20f6f18 100644 --- a/shared/test/unit_test/utilities/heap_allocator_tests.cpp +++ b/shared/test/unit_test/utilities/heap_allocator_tests.cpp @@ -598,12 +598,12 @@ TEST(HeapAllocatorTest, WhenFreeingThenMemoryAvailableForAllocation) { uint64_t ptr3 = heapAllocator->allocate(ptrSize3); EXPECT_NE(0llu, ptr3); - EXPECT_EQ(0u, heapAllocator->getFreedChunksSmall().size()); + EXPECT_EQ(1u, heapAllocator->getFreedChunksSmall().size()); EXPECT_EQ(0u, heapAllocator->getFreedChunksBig().size()); heapAllocator->free(ptr2, ptrSize2); - EXPECT_EQ(0u, heapAllocator->getFreedChunksSmall().size()); + EXPECT_EQ(1u, heapAllocator->getFreedChunksSmall().size()); EXPECT_EQ(0u, heapAllocator->getFreedChunksBig().size()); heapAllocator->free(ptr3, ptrSize3); @@ -632,25 +632,25 @@ TEST(HeapAllocatorTest, WhenFreeingChunkThenMemoryAvailableForAllocation) { EXPECT_NE(0llu, ptr); EXPECT_LE(ptrBase, ptr); - sizeAllocated += 8192; + sizeAllocated += ptrSize; size_t ptrSize1 = 4 * 4096; uint64_t ptr1 = heapAllocator->allocate(ptrSize1); EXPECT_NE(0llu, ptr1); EXPECT_LE(ptrBase, ptr1); - sizeAllocated += 4 * 4096; + sizeAllocated += ptrSize1; size_t ptrSize2 = 8192; uint64_t ptr2 = heapAllocator->allocate(ptrSize2); EXPECT_NE(0llu, ptr2); - sizeAllocated += 8192; + sizeAllocated += ptrSize2; EXPECT_EQ(size - sizeAllocated, heapAllocator->getavailableSize()); heapAllocator->free(ptr1, ptrSize1); - sizeAllocated -= 4 * 4096; + sizeAllocated -= ptrSize1; EXPECT_EQ(size - sizeAllocated, heapAllocator->getavailableSize()); EXPECT_EQ(1u, heapAllocator->getFreedChunksSmall().size()); @@ -660,15 +660,15 @@ TEST(HeapAllocatorTest, WhenFreeingChunkThenMemoryAvailableForAllocation) { uint64_t ptr3 = heapAllocator->allocate(ptrSize3); EXPECT_NE(0llu, ptr3); - EXPECT_EQ(0u, heapAllocator->getFreedChunksSmall().size()); + EXPECT_EQ(1u, heapAllocator->getFreedChunksSmall().size()); EXPECT_EQ(0u, heapAllocator->getFreedChunksBig().size()); - sizeAllocated += 4 * 4096; // 4*4096 because this was chunk that was stored on free list + sizeAllocated += ptrSize3; // 4*4096 because this was chunk that was stored on free list EXPECT_EQ(size - sizeAllocated, heapAllocator->getavailableSize()); heapAllocator->free(ptr2, ptrSize2); - EXPECT_EQ(0u, heapAllocator->getFreedChunksSmall().size()); + EXPECT_EQ(1u, heapAllocator->getFreedChunksSmall().size()); EXPECT_EQ(0u, heapAllocator->getFreedChunksBig().size()); heapAllocator->free(ptr3, ptrSize3); @@ -950,11 +950,9 @@ TEST(HeapAllocatorTest, Given10SmallAllocationsWhenFreedInTheSameOrderThenLastCh TEST(HeapAllocatorTest, Given10SmallAllocationsWhenMergedToBigAllocatedAsSmallSplittedAndReleasedThenItDoesNotGoToFreedBigChunksList) { uint64_t ptrBase = 0llu; - uintptr_t basePtr = 0; // Size for 10 small allocs plus one single 2 page plus some space size_t size = (10 + 2 + 1) * 4096; - uintptr_t upperLimitPtr = basePtr + size; size_t threshold = 4 * 4096; @@ -983,11 +981,8 @@ TEST(HeapAllocatorTest, Given10SmallAllocationsWhenMergedToBigAllocatedAsSmallSp heapAllocator->free(ptrs[i], sizes[i]); } - // Allocate small chunk, should be taken from freed list smallAlloc = heapAllocator->allocate(sizeOfSmallAlloc); - EXPECT_NE(0llu, smallAlloc); - EXPECT_LE(upperLimitPtr - (8 * 4096), smallAlloc); EXPECT_EQ(1u, freedChunksSmall.size()); @@ -1011,11 +1006,9 @@ TEST(HeapAllocatorTest, Given10SmallAllocationsWhenMergedToBigAllocatedAsSmallSp TEST(HeapAllocatorTest, Given10SmallAllocationsWhenMergedToBigAllocatedAsSmallNotSplittedAndReleasedThenItDoesNotGoToFreedBigChunksList) { uint64_t ptrBase = 0llu; - uintptr_t basePtr = 0; // Size for 10 small allocs plus one single 3 page plus some space size_t size = (10 + 3 + 1) * 4096; - uint64_t upperLimitPtr = basePtr + size; size_t threshold = 4 * 4096; @@ -1044,13 +1037,10 @@ TEST(HeapAllocatorTest, Given10SmallAllocationsWhenMergedToBigAllocatedAsSmallNo heapAllocator->free(ptrs[i], sizes[i]); } - // Allocate small chunk, should be taken from freed list smallAlloc = heapAllocator->allocate(sizeOfSmallAlloc); EXPECT_NE(0llu, smallAlloc); - EXPECT_LE(upperLimitPtr - (5 * 4096), smallAlloc); - - EXPECT_EQ(0u, freedChunksSmall.size()); + EXPECT_EQ(1u, freedChunksSmall.size()); heapAllocator->free(smallAlloc, sizeOfSmallAlloc); @@ -1150,11 +1140,14 @@ TEST(HeapAllocatorTest, givenAlignedBoundWhenAllocatingMemoryWithCustomAlignment TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFromLeftThenAlignBoundBeforeAllocation) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t customAlignment = 8 * MemoryConstants::pageSize; + const size_t alignedAllocationSize = 16 * MemoryConstants::pageSize; + const size_t misaligningAllocationSize = 2 * MemoryConstants::pageSize; + const size_t additionalAllocationSize = customAlignment - misaligningAllocationSize; + const size_t heapSize = customAlignment + alignedAllocationSize + misaligningAllocationSize; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, 0); // Misalign the left bound - const size_t misaligningAllocationSize = 2 * MemoryConstants::pageSize; size_t ptrSize = misaligningAllocationSize; uint64_t ptr = heapAllocator.allocate(ptrSize); EXPECT_EQ(heapBase, ptr); @@ -1164,8 +1157,6 @@ TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFrom EXPECT_EQ(0u, heapAllocator.getFreedChunksBig().size()); // Allocate with alignment - const size_t customAlignment = 8 * MemoryConstants::pageSize; - const size_t alignedAllocationSize = 16 * MemoryConstants::pageSize; ptrSize = alignedAllocationSize; ptr = heapAllocator.allocateWithCustomAlignment(ptrSize, customAlignment); EXPECT_EQ(alignedAllocationSize, ptrSize); @@ -1175,7 +1166,6 @@ TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFrom EXPECT_EQ(1u, heapAllocator.getFreedChunksBig().size()); // Try to use w hole, we just created by aligning - const size_t additionalAllocationSize = customAlignment - misaligningAllocationSize; ptrSize = additionalAllocationSize; ptr = heapAllocator.allocate(ptrSize); EXPECT_EQ(heapBase + misaligningAllocationSize, ptr); @@ -1187,11 +1177,14 @@ TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFrom TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFromRightThenAlignBoundBeforeAllocation) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t misaligningAllocationSize = 2 * MemoryConstants::pageSize; + const size_t customAlignment = 8 * MemoryConstants::pageSize; + const size_t alignedAllocationSize = 16 * MemoryConstants::pageSize; + const size_t additionalAllocationSize = customAlignment - misaligningAllocationSize; + const size_t heapSize = alignedAllocationSize + customAlignment; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, std::numeric_limits::max()); // Misalign the right bound - const size_t misaligningAllocationSize = 2 * MemoryConstants::pageSize; size_t ptrSize = misaligningAllocationSize; uint64_t ptr = heapAllocator.allocate(ptrSize); EXPECT_EQ(misaligningAllocationSize, ptrSize); @@ -1201,8 +1194,6 @@ TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFrom EXPECT_EQ(0u, heapAllocator.getFreedChunksSmall().size()); // Allocate with alignment - const size_t customAlignment = 8 * MemoryConstants::pageSize; - const size_t alignedAllocationSize = 16 * MemoryConstants::pageSize; ptrSize = alignedAllocationSize; ptr = heapAllocator.allocateWithCustomAlignment(ptrSize, customAlignment); EXPECT_EQ(alignedAllocationSize, ptrSize); @@ -1212,7 +1203,6 @@ TEST(HeapAllocatorTest, givenUnalignedBoundWhenAllocatingWithCustomAlignmentFrom EXPECT_EQ(1u, heapAllocator.getFreedChunksSmall().size()); // Try to use w hole, we just created by aligning - const size_t additionalAllocationSize = customAlignment - misaligningAllocationSize; ptrSize = additionalAllocationSize; ptr = heapAllocator.allocate(ptrSize); EXPECT_EQ(heapBase + heapSize - customAlignment, ptr); @@ -1357,7 +1347,7 @@ TEST(HeapAllocatorTest, givenSizeNotAlignedToBaseAllocatorAlignmentWhenAllocatin TEST(HeapAllocatorTest, givenAlignedFreedChunkAvailableWhenAllocatingMemoryWithCustomAlignmentFromLeftThenReturnUseFreedChunk) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t heapSize = 64u * 4096u; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); // First create an aligned freed chunk @@ -1381,7 +1371,7 @@ TEST(HeapAllocatorTest, givenAlignedFreedChunkAvailableWhenAllocatingMemoryWithC TEST(HeapAllocatorTest, givenAlignedFreedChunkSlightlyBiggerThanAllocationeWhenAllocatingMemoryWithCustomAlignmentFromLeftThenUseEntireFreedChunk) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t heapSize = 96u * 4096u; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); // First create an aligned freed chunk @@ -1406,7 +1396,7 @@ TEST(HeapAllocatorTest, givenAlignedFreedChunkSlightlyBiggerThanAllocationeWhenA TEST(HeapAllocatorTest, givenAlignedFreedChunkTwoTimesBiggerThanAllocationeWhenAllocatingMemoryWithCustomAlignmentFromRightThenUseAPortionOfTheFreedChunk) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t heapSize = 128u * 4096u; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold); // First create an aligned freed chunk @@ -1431,7 +1421,7 @@ TEST(HeapAllocatorTest, givenAlignedFreedChunkTwoTimesBiggerThanAllocationeWhenA TEST(HeapAllocatorTest, givenUnalignedFreedChunkAvailableWhenAllocatingMemoryWithCustomAlignmentFromLeftThenDoNotUseFreedChunk) { const uint64_t heapBase = 0x100000llu; - const size_t heapSize = 1024u * 4096u; + const size_t heapSize = 128u * 4096u; HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, 1); // First create an unaligned freed chunk