From 0424f30782ec6ecea236194919a6f8379668bd5b Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Mon, 24 Jan 2022 16:44:38 +0000 Subject: [PATCH] Improve mmap logic in createAllocWithAlignment Group small allocations and reuse mapped memory in order to keep map count small. Related-To: NEO-6417 Signed-off-by: Filip Hazubski --- .../drm_memory_manager_localmem_tests.cpp | 9 +- .../linux/drm_memory_manager_tests.cpp | 112 ++++++++++++++++++ .../test/unit_test/test_files/igdrcl.config | 4 +- .../debug_settings/debug_variables_base.inl | 2 + .../os_interface/linux/drm_memory_manager.cpp | 52 ++++++++ .../os_interface/linux/drm_memory_manager.h | 5 + .../linux/drm_memory_manager_local_memory.cpp | 21 +--- .../mocks/linux/mock_drm_memory_manager.h | 2 + 8 files changed, 187 insertions(+), 20 deletions(-) diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp index bb7408bd4b..a4d2bbd47a 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp @@ -961,12 +961,12 @@ HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenAlignmentAndSizeWhenMmapReturnsU }; munmapCalledCount = 0; - auto allocation = memoryManager->createAllocWithAlignment(allocationData, MemoryConstants::pageSize, MemoryConstants::pageSize64k, MemoryConstants::pageSize64k, 0u); + auto allocation = memoryManager->createAllocWithAlignment(allocationData, 5 * MemoryConstants::megaByte, + MemoryConstants::pageSize64k, 5 * MemoryConstants::megaByte, 0u); EXPECT_EQ(alignUp(reinterpret_cast(0x12345678), MemoryConstants::pageSize64k), allocation->getMmapPtr()); - EXPECT_EQ(1u, munmapCalledCount); + EXPECT_EQ(2u, munmapCalledCount); memoryManager->freeGraphicsMemory(allocation); - EXPECT_EQ(3u, munmapCalledCount); munmapCalledCount = 0u; } @@ -999,7 +999,8 @@ HWTEST2_F(DrmMemoryManagerLocalMemoryTest, givenAlignmentAndSizeWhenMmapReturnsA }; munmapCalledCount = 0u; - auto allocation = memoryManager->createAllocWithAlignment(allocationData, MemoryConstants::pageSize, 4u, MemoryConstants::pageSize64k, 0u); + auto allocation = memoryManager->createAllocWithAlignment(allocationData, 5 * MemoryConstants::megaByte, + 4u, 5 * MemoryConstants::megaByte, 0u); EXPECT_EQ(reinterpret_cast(0x12345678), allocation->getMmapPtr()); EXPECT_EQ(1u, munmapCalledCount); diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 2598c1f87d..dcee67fc1d 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -5915,4 +5915,116 @@ HWTEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletion memoryManager->freeGraphicsMemory(allocation); } +static uint32_t mmapCallCount = 0u; +static uint32_t munmapCallCount = 0u; + +HWTEST_F(DrmMemoryManagerTest, WhenMappingMemoryForMultipleSharedAllocationsThenMapMemoryOnce) { + auto mockMmap = [](void *addr, size_t len, int prot, + int flags, int fd, off_t offset) throw() { + mmapCallCount++; + return reinterpret_cast(0x1000000); + }; + VariableBackupmmapFunction)> mmapBackup{&memoryManager->mmapFunction, mockMmap}; + + mmapCallCount = 0; + for (int i = 0; i < 10; i++) { + memoryManager->mapCpuPointerOrReuse(MemoryConstants::pageSize2Mb, MemoryConstants::pageSize2Mb); + } + EXPECT_EQ(1u, mmapCallCount); +} + +HWTEST_F(DrmMemoryManagerTest, givenMapReuseBufferUsedUpWhenMappingMemoryForMultipleSharedAllocationsThenReleaseLeftoverMemoryAndMapNewMapReuseBuffer) { + auto mockMmap = [](void *addr, size_t len, int prot, + int flags, int fd, off_t offset) throw() { + mmapCallCount++; + return reinterpret_cast(0x1000000); + }; + auto mockMunmap = [](void *addr, size_t len) throw() { + munmapCallCount++; + return 0; + }; + VariableBackupmmapFunction)> mmapBackup{&memoryManager->mmapFunction, mockMmap}; + VariableBackupmunmapFunction)> munmapBackup{&memoryManager->munmapFunction, mockMunmap}; + + mmapCallCount = 0; + munmapCallCount = 0; + + memoryManager->mapCpuPointerOrReuse(MemoryConstants::pageSize, MemoryConstants::pageSize); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(1u, munmapCallCount); + + memoryManager->remainingMapBufferSize = 0; + memoryManager->mapCpuPointerOrReuse(MemoryConstants::pageSize, MemoryConstants::pageSize); + EXPECT_EQ(2u, mmapCallCount); + EXPECT_EQ(2u, munmapCallCount); + + memoryManager->remainingMapBufferSize = MemoryConstants::pageSize - 1; + memoryManager->mapCpuPointerOrReuse(MemoryConstants::pageSize, MemoryConstants::pageSize); + EXPECT_EQ(3u, mmapCallCount); + EXPECT_EQ(4u, munmapCallCount); +} + +HWTEST_F(DrmMemoryManagerTest, givenMapReuseBufferAllocatedWhenMemoryManagerIsDeletedThenReleaseLeftoverMapping) { + auto mockMunmap = [](void *addr, size_t len) throw() { + munmapCallCount++; + return 0; + }; + + munmapCallCount = 0; + + auto pMemoryManager = new TestedDrmMemoryManager(false, false, false, *executionEnvironment); + pMemoryManager->munmapFunction = mockMunmap; + delete pMemoryManager; + EXPECT_EQ(0u, munmapCallCount); + + pMemoryManager = new TestedDrmMemoryManager(false, false, false, *executionEnvironment); + pMemoryManager->munmapFunction = mockMunmap; + pMemoryManager->remainingMapBufferSize = 1; + delete pMemoryManager; + EXPECT_EQ(1u, munmapCallCount); +} + +HWTEST_F(DrmMemoryManagerTest, givenMapReuseBufferRelatedDebugValuesChangedWhenMappingMemoryForSharedAllocationsThenCorrectlyUseMapReuseBuffer) { + DebugManagerStateRestore restorer; + auto mockMmap = [](void *addr, size_t len, int prot, + int flags, int fd, off_t offset) throw() { + mmapCallCount++; + return reinterpret_cast(0x1000000); + }; + VariableBackupmmapFunction)> mmapBackup{&memoryManager->mmapFunction, mockMmap}; + + DebugManager.flags.OverrideMaxAllocationSizeForMapReuseBufferInMb.set(1); + DebugManager.flags.OverrideMapReuseBufferSizeInMb.set(2); + mmapCallCount = 0; + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, MemoryConstants::megaByte); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(1u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, MemoryConstants::megaByte); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(0u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, MemoryConstants::megaByte); + EXPECT_EQ(2u, mmapCallCount); + EXPECT_EQ(1u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, MemoryConstants::megaByte); + EXPECT_EQ(2u, mmapCallCount); + EXPECT_EQ(0u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + + DebugManager.flags.OverrideMaxAllocationSizeForMapReuseBufferInMb.set(3); + DebugManager.flags.OverrideMapReuseBufferSizeInMb.set(10); + mmapCallCount = 0; + memoryManager->remainingMapBufferSize = 0; + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, 3 * MemoryConstants::megaByte); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(7u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, 3 * MemoryConstants::megaByte); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(4u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, 3 * MemoryConstants::megaByte); + EXPECT_EQ(1u, mmapCallCount); + EXPECT_EQ(1u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); + memoryManager->mapCpuPointerOrReuse(MemoryConstants::megaByte, 3 * MemoryConstants::megaByte); + EXPECT_EQ(2u, mmapCallCount); + EXPECT_EQ(7u * MemoryConstants::megaByte, memoryManager->remainingMapBufferSize); +} + } // namespace NEO diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 7d47b0b137..361197c320 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -368,4 +368,6 @@ EnableDrmCompletionFence = -1 UseDrmCompletionFenceForAllAllocations = -1 ExperimentalEnableSourceLevelDebugger = 0 Force2dImageAsArray = -1 -ForceExtendedBufferSize = -1 \ No newline at end of file +ForceExtendedBufferSize = -1 +OverrideMaxAllocationSizeForMapReuseBufferInMb = -1 +OverrideMapReuseBufferSizeInMb = -1 \ No newline at end of file diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 58e002464e..ea6ecff782 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -179,6 +179,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EngineUsageHint, -1, "-1: default, >=0: engine u DECLARE_DEBUG_VARIABLE(int32_t, ForceBcsEngineIndex, -1, "-1: default, >=0 Copy Engine index") DECLARE_DEBUG_VARIABLE(int32_t, Force2dImageAsArray, -1, "-1: default, 0: WA Disabled, 1: Forces surface state of 2dImage to array") DECLARE_DEBUG_VARIABLE(int32_t, ForceExtendedBufferSize, -1, "-1: default, 0: disabled, >=1: Forces extended buffer size by specify pageSize number in clCreateBuffer, clCreateBufferWithProperties and clCreateBufferWithPropertiesINTEL calls") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxAllocationSizeForMapReuseBufferInMb, -1, "-1: default, >=0: Forces maximal allocation size eligible to reuse map buffer") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideMapReuseBufferSizeInMb, -1, "-1: default, >=0: Forces size of the map reuse buffer") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index f98cbd54e6..52cea11773 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -125,6 +125,10 @@ DrmMemoryManager::~DrmMemoryManager() { MemoryManager::alignedFreeWrapper(memoryForPinBB); } } + if (remainingMapBufferSize > 0) { + [[maybe_unused]] int retCode = this->munmapFunction(mapBufferAddress, remainingMapBufferSize); + DEBUG_BREAK_IF(retCode != 0); + } } void DrmMemoryManager::releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex) { @@ -342,6 +346,54 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignmentFromUserptr(const Alloc return allocation.release(); } +void *DrmMemoryManager::mapCpuPointer(size_t alignment, size_t alignedSize) { + auto totalSizeToAlloc = alignedSize + alignment; + auto cpuBasePointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + + auto *cpuPointer = alignUp(cpuBasePointer, alignment); + auto pointerDiff = ptrDiff(cpuPointer, cpuBasePointer); + + if (pointerDiff != 0) { + [[maybe_unused]] int retCode = this->munmapFunction(cpuBasePointer, pointerDiff); + DEBUG_BREAK_IF(retCode != 0); + } + [[maybe_unused]] int retCode = this->munmapFunction(ptrOffset(cpuPointer, alignedSize), alignment - pointerDiff); + DEBUG_BREAK_IF(retCode != 0); + return cpuPointer; +} + +void *DrmMemoryManager::mapCpuPointerOrReuse(size_t alignment, size_t alignedSize) { + size_t maxSizeForReuse = 4 * MemoryConstants::megaByte; + size_t mapBufferSize = 64 * MemoryConstants::megaByte; + + if (DebugManager.flags.OverrideMaxAllocationSizeForMapReuseBufferInMb.get() != -1) { + maxSizeForReuse = DebugManager.flags.OverrideMaxAllocationSizeForMapReuseBufferInMb.get() * MemoryConstants::megaByte; + } + if (DebugManager.flags.OverrideMapReuseBufferSizeInMb.get() != -1) { + mapBufferSize = DebugManager.flags.OverrideMapReuseBufferSizeInMb.get() * MemoryConstants::megaByte; + } + + if (alignedSize > maxSizeForReuse) { + return mapCpuPointer(alignment, alignedSize); + } + + std::lock_guard guard(mapBufferMutex); + if (remainingMapBufferSize < alignedSize) { + if (remainingMapBufferSize > 0) { + [[maybe_unused]] int retCode = this->munmapFunction(mapBufferAddress, remainingMapBufferSize); + DEBUG_BREAK_IF(retCode != 0); + } + mapBufferAddress = mapCpuPointer(alignment, mapBufferSize); + remainingMapBufferSize = mapBufferSize; + } + auto cpuPointerToReturn = mapBufferAddress; + + mapBufferAddress = ptrOffset(mapBufferAddress, alignedSize); + remainingMapBufferSize -= alignedSize; + + return cpuPointerToReturn; +} + void DrmMemoryManager::obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress) { if ((isLimitedRange(allocationData.rootDeviceIndex) || allocationData.type == GraphicsAllocation::AllocationType::SVM_CPU) && !allocationData.flags.isUSMHostAllocation) { diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index e00160ad70..828d931525 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -98,6 +98,8 @@ class DrmMemoryManager : public MemoryManager { DrmAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override; DrmAllocation *allocateGraphicsMemoryWithAlignmentImpl(const AllocationData &allocationData); DrmAllocation *createAllocWithAlignmentFromUserptr(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSVMSize, uint64_t gpuAddress); + void *mapCpuPointer(size_t alignment, size_t alignedSize); + void *mapCpuPointerOrReuse(size_t alignment, size_t alignedSize); DrmAllocation *createAllocWithAlignment(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSize, uint64_t gpuAddress); DrmAllocation *createMultiHostAllocation(const AllocationData &allocationData); void obtainGpuAddress(const AllocationData &allocationData, BufferObject *bo, uint64_t gpuAddress); @@ -137,7 +139,10 @@ class DrmMemoryManager : public MemoryManager { decltype(&munmap) munmapFunction = munmap; decltype(&lseek) lseekFunction = lseek; decltype(&close) closeFunction = close; + void *mapBufferAddress = nullptr; + size_t remainingMapBufferSize = 0; std::vector sharingBufferObjects; + std::mutex mapBufferMutex; std::mutex mtx; std::vector> localMemAllocs; diff --git a/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp b/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp index daef3a8a21..e254bf31ca 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp @@ -38,23 +38,19 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData & } if (useBooMmap) { - auto totalSizeToAlloc = alignedSize + alignment; - auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - - auto cpuBasePointer = cpuPointer; - cpuPointer = alignUp(cpuPointer, alignment); - - auto pointerDiff = ptrDiff(cpuPointer, cpuBasePointer); - std::unique_ptr bo(this->createBufferObjectInMemoryRegion(&this->getDrm(allocationData.rootDeviceIndex), reinterpret_cast(cpuPointer), alignedSize, 0u, maxOsContextCount)); + void *cpuPointer = mapCpuPointerOrReuse(alignment, alignedSize); + auto drm = &this->getDrm(allocationData.rootDeviceIndex); + std::unique_ptr bo( + this->createBufferObjectInMemoryRegion(drm, reinterpret_cast(cpuPointer), alignedSize, 0u, maxOsContextCount)); if (!bo) { - this->munmapFunction(cpuBasePointer, totalSizeToAlloc); + this->munmapFunction(cpuPointer, alignedSize); return nullptr; } uint64_t offset = 0; if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) { - this->munmapFunction(cpuPointer, size); + this->munmapFunction(cpuPointer, alignedSize); return nullptr; } @@ -67,11 +63,6 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData & auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), cpuPointer, bo->peekAddress(), alignedSize, MemoryPool::System4KBPages); allocation->setMmapPtr(cpuPointer); allocation->setMmapSize(alignedSize); - if (pointerDiff != 0) { - allocation->registerMemoryToUnmap(cpuBasePointer, pointerDiff, this->munmapFunction); - } - [[maybe_unused]] int retCode = this->munmapFunction(ptrOffset(cpuPointer, alignedSize), alignment - pointerDiff); - DEBUG_BREAK_IF(retCode != 0); allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), alignedSize); bo.release(); diff --git a/shared/test/common/mocks/linux/mock_drm_memory_manager.h b/shared/test/common/mocks/linux/mock_drm_memory_manager.h index 828835245d..0125929678 100644 --- a/shared/test/common/mocks/linux/mock_drm_memory_manager.h +++ b/shared/test/common/mocks/linux/mock_drm_memory_manager.h @@ -83,6 +83,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate { using DrmMemoryManager::gfxPartitions; using DrmMemoryManager::handleFenceCompletion; using DrmMemoryManager::lockResourceInLocalMemoryImpl; + using DrmMemoryManager::mapCpuPointerOrReuse; using DrmMemoryManager::memoryForPinBBs; using DrmMemoryManager::mmapFunction; using DrmMemoryManager::munmapFunction; @@ -91,6 +92,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate { using DrmMemoryManager::pushSharedBufferObject; using DrmMemoryManager::registerAllocationInOs; using DrmMemoryManager::releaseGpuRange; + using DrmMemoryManager::remainingMapBufferSize; using DrmMemoryManager::retrieveMmapOffsetForBufferObject; using DrmMemoryManager::setDomainCpu; using DrmMemoryManager::sharingBufferObjects;