From 91940d2a790854420c5eff4f8aeade9dd9af45c7 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Wed, 21 May 2025 11:35:07 +0000 Subject: [PATCH] fix: initial support for single temporary allocations list Related-To: NEO-14641 Signed-off-by: Bartosz Dunajski --- .../cmdlist/test_in_order_cmdlist_2.cpp | 69 +++++-- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 59 ++++-- .../debug_settings/debug_variables_base.inl | 1 + .../memory_manager/allocations_list.cpp | 54 +++-- .../memory_manager/graphics_allocation.h | 1 + .../internal_allocation_storage.cpp | 36 +++- .../internal_allocation_storage.h | 4 +- .../source/memory_manager/memory_manager.cpp | 76 ++++++- shared/source/memory_manager/memory_manager.h | 11 +- .../mocks/mock_internal_allocation_storage.h | 3 +- .../test/common/mocks/mock_memory_manager.h | 12 +- shared/test/common/test_files/igdrcl.config | 1 + .../memory_manager/host_ptr_manager_tests.cpp | 195 +++++++++++++++++- .../internal_allocation_storage_tests.cpp | 117 ++++++++++- 14 files changed, 562 insertions(+), 77 deletions(-) diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index 82b6ee157f..bf05b85db0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -1242,6 +1242,16 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHandleCompletionAndTempAllocations, IsAtLeastXeHpCore) { auto immCmdList = createImmCmdListWithOffload(); + auto memoryManager = static_cast(device->getNEODevice()->getMemoryManager()); + memoryManager->callBaseAllocInUse = true; + + NEO::AllocationsList *memoryManagerTempAllocsList = nullptr; + + auto singleTempAllocationsList = memoryManager->isSingleTemporaryAllocationsListEnabled(); + if (singleTempAllocationsList) { + memoryManagerTempAllocsList = &memoryManager->getTemporaryAllocationsList(); + } + auto mainQueueCsr = static_cast *>(immCmdList->getCsr(false)); auto offloadCsr = static_cast *>(immCmdList->getCsr(true)); @@ -1273,44 +1283,71 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan // only main is completed immCmdList->hostSynchronize(0, true); - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + if (singleTempAllocationsList) { + EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } immCmdList->appendMemoryCopy(©Data1, ©Data2, 1, nullptr, 0, nullptr, copyParams); - - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); // temp allocation created on offload csr + if (singleTempAllocationsList) { + EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); // temp allocation created on offload csr + } mainInternalStorage->storeAllocationWithTaskCount(std::move(std::make_unique()), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 1); // both completed immCmdList->hostSynchronize(0, true); - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + if (singleTempAllocationsList) { + EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } immCmdList->appendMemoryCopy(©Data1, ©Data2, 1, nullptr, 0, nullptr, copyParams); - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + if (singleTempAllocationsList) { + EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } auto mockAlloc = new MockGraphicsAllocation(); mainInternalStorage->storeAllocationWithTaskCount(std::move(std::unique_ptr(mockAlloc)), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 123); // only copy completed immCmdList->hostSynchronize(0, true); - EXPECT_FALSE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + if (singleTempAllocationsList) { + EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_FALSE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } mockAlloc->updateTaskCount(1, mainQueueCsr->getOsContext().getContextId()); - immCmdList->hostSynchronize(0, true); - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + + if (singleTempAllocationsList) { + EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } // stored only in copy storage offloadInternalStorage->storeAllocationWithTaskCount(std::move(std::make_unique()), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 1); immCmdList->hostSynchronize(0, true); - EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + if (singleTempAllocationsList) { + EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty()); + } else { + EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); + } *hostAddress = std::numeric_limits::max(); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 115d755f57..f3e7bd1d57 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -1071,8 +1071,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1118,8 +1123,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1167,8 +1177,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1210,8 +1225,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } bool tempAllocationFound = false; auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1249,8 +1268,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1284,8 +1307,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); @@ -1319,8 +1346,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty()); + } else { + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + } auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 0f3f5b0c48..1fefaa1189 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -315,6 +315,7 @@ DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabl DECLARE_DEBUG_VARIABLE(int32_t, GetSipBinaryFromExternalLib, -1, "-1: default, 0: disabled, 1: enabled. If enabled, then retrieve Sip from external library") DECLARE_DEBUG_VARIABLE(int32_t, EnablePidFdOrSocketsForIpc, -1, "-1: default, 0: disabled (default), 1: enabled. If enabled, L0 IPC handles are opaque and pidfd or sockets are used for IPC exchange") DECLARE_DEBUG_VARIABLE(int32_t, OverrideCopyOffloadMode, -1, "-1: default, 0: disabled, >=1: if enabled, override to any value from CopyOffloadModes enum") +DECLARE_DEBUG_VARIABLE(int32_t, UseSingleListForTemporaryAllocations, -1, "-1: default, 0: disabled, 0: enabled. If enabled, use single list, instead of per CSR for tracking temporary allocations") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/source/memory_manager/allocations_list.cpp b/shared/source/memory_manager/allocations_list.cpp index d2fc4a5c8b..8822e84e4c 100644 --- a/shared/source/memory_manager/allocations_list.cpp +++ b/shared/source/memory_manager/allocations_list.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,20 +14,34 @@ namespace { struct ReusableAllocationRequirements { - const void *requiredPtr; - size_t requiredMinimalSize; - volatile TagAddressType *csrTagAddress; - NEO::AllocationType allocationType; - uint32_t contextId; - uint32_t activeTileCount; - uint32_t tagOffset; - bool forceSystemMemoryFlag; + ReusableAllocationRequirements() = delete; + ReusableAllocationRequirements(NEO::CommandStreamReceiver *csr, const void *requiredPtr, size_t requiredMinimalSize, NEO::AllocationType allocationType, bool forceSystemMemoryFlag) + : requiredPtr(requiredPtr), requiredMinimalSize(requiredMinimalSize), allocationType(allocationType), forceSystemMemoryFlag(forceSystemMemoryFlag) { + + if (csr) { + csrTagAddress = csr->getTagAddress(); + contextId = csr->getOsContext().getContextId(); + rootDeviceIndex = csr->getRootDeviceIndex(); + deviceBitfield = csr->getOsContext().getDeviceBitfield(); + tagOffset = csr->getImmWritePostSyncWriteOffset(); + } + } + + const void *requiredPtr = nullptr; + size_t requiredMinimalSize = 0; + volatile TagAddressType *csrTagAddress = nullptr; + NEO::AllocationType allocationType = NEO::AllocationType::unknown; + NEO::DeviceBitfield deviceBitfield = 1; + uint32_t contextId = std::numeric_limits::max(); + uint32_t rootDeviceIndex = 0; + uint32_t tagOffset = 0; + bool forceSystemMemoryFlag = false; }; bool checkTagAddressReady(ReusableAllocationRequirements *requirements, NEO::GraphicsAllocation *gfxAllocation) { auto tagAddress = requirements->csrTagAddress; auto taskCount = gfxAllocation->getTaskCount(requirements->contextId); - for (uint32_t count = 0; count < requirements->activeTileCount; count++) { + for (uint32_t count = 0; count < requirements->deviceBitfield.count(); count++) { if (*tagAddress < taskCount) { return false; } @@ -47,15 +61,8 @@ std::unique_ptr AllocationsList::detachAllocation(size_t req } std::unique_ptr AllocationsList::detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, bool forceSystemMemoryFlag, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType) { - ReusableAllocationRequirements req; - req.requiredMinimalSize = requiredMinimalSize; - req.csrTagAddress = (commandStreamReceiver == nullptr) ? nullptr : commandStreamReceiver->getTagAddress(); - req.allocationType = allocationType; - req.contextId = (commandStreamReceiver == nullptr) ? UINT32_MAX : commandStreamReceiver->getOsContext().getContextId(); - req.requiredPtr = requiredPtr; - req.activeTileCount = (commandStreamReceiver == nullptr) ? 1u : commandStreamReceiver->getActivePartitions(); - req.tagOffset = (commandStreamReceiver == nullptr) ? 0u : commandStreamReceiver->getImmWritePostSyncWriteOffset(); - req.forceSystemMemoryFlag = forceSystemMemoryFlag; + ReusableAllocationRequirements req(commandStreamReceiver, requiredPtr, requiredMinimalSize, allocationType, forceSystemMemoryFlag); + GraphicsAllocation *a = nullptr; GraphicsAllocation *retAlloc = processLocked(a, static_cast(&req)); return std::unique_ptr(retAlloc); @@ -71,8 +78,13 @@ GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, if (req->csrTagAddress == nullptr) { return removeOneImpl(curr, nullptr); } - if ((this->allocationUsage == TEMPORARY_ALLOCATION || checkTagAddressReady(req, curr)) && - (req->requiredPtr == nullptr || req->requiredPtr == curr->getUnderlyingBuffer())) { + + bool usageMatch = (this->allocationUsage == TEMPORARY_ALLOCATION || checkTagAddressReady(req, curr)); + bool ptrMatch = (req->requiredPtr == nullptr || req->requiredPtr == curr->getUnderlyingBuffer()); + bool tileMatch = (req->deviceBitfield == curr->storageInfo.subDeviceBitfield) || (curr->storageInfo.subDeviceBitfield == 0); + bool placementMatch = (req->rootDeviceIndex == curr->getRootDeviceIndex()) && tileMatch; + + if (usageMatch && ptrMatch && placementMatch) { if (this->allocationUsage == TEMPORARY_ALLOCATION) { // We may not have proper task count yet, so set notReady to avoid releasing in a different thread curr->updateTaskCount(CompletionStamp::notReady, req->contextId); diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 2be17fa703..46256031bd 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -159,6 +159,7 @@ class GraphicsAllocation : public IDNode, NEO::NonCopyableAn bool isUsed() const { return registeredContextsNum > 0; } bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; } bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); } + uint32_t getNumRegisteredContexts() const { return registeredContextsNum.load(); } MOCKABLE_VIRTUAL void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId); MOCKABLE_VIRTUAL TaskCountType getTaskCount(uint32_t contextId) const { if (contextId >= usageInfos.size()) { diff --git a/shared/source/memory_manager/internal_allocation_storage.cpp b/shared/source/memory_manager/internal_allocation_storage.cpp index 33dac03c0b..0a9cf3d13f 100644 --- a/shared/source/memory_manager/internal_allocation_storage.cpp +++ b/shared/source/memory_manager/internal_allocation_storage.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -27,14 +27,22 @@ void InternalAllocationStorage::storeAllocation(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount) { + auto memoryManager = commandStreamReceiver.getMemoryManager(); + auto osContextId = commandStreamReceiver.getOsContext().getContextId(); + + if (allocationUsage == TEMPORARY_ALLOCATION && memoryManager->isSingleTemporaryAllocationsListEnabled()) { + memoryManager->storeTemporaryAllocation(std::move(gfxAllocation), osContextId, taskCount); + return; + } + if (allocationUsage == REUSABLE_ALLOCATION) { if (debugManager.flags.DisableResourceRecycling.get()) { - commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release()); + memoryManager->freeGraphicsMemory(gfxAllocation.release()); return; } } auto &allocationsList = allocationLists[allocationUsage]; - gfxAllocation->updateTaskCount(taskCount, commandStreamReceiver.getOsContext().getContextId()); + gfxAllocation->updateTaskCount(taskCount, osContextId); allocationsList.pushTailOne(*gfxAllocation.release()); } @@ -44,6 +52,12 @@ void InternalAllocationStorage::cleanAllocationList(TaskCountType waitTaskCount, void InternalAllocationStorage::freeAllocationsList(TaskCountType waitTaskCount, AllocationsList &allocationsList) { auto memoryManager = commandStreamReceiver.getMemoryManager(); + + if (&allocationsList == &allocationLists[TEMPORARY_ALLOCATION] && memoryManager->isSingleTemporaryAllocationsListEnabled()) { + memoryManager->cleanTemporaryAllocations(commandStreamReceiver, waitTaskCount); + return; + } + auto lock = memoryManager->getHostPtrManager()->obtainOwnership(); GraphicsAllocation *curr = allocationsList.detachNodes(); @@ -70,10 +84,26 @@ std::unique_ptr InternalAllocationStorage::obtainReusableAll } std::unique_ptr InternalAllocationStorage::obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType) { + auto memoryManager = commandStreamReceiver.getMemoryManager(); + + if (memoryManager->isSingleTemporaryAllocationsListEnabled()) { + return memoryManager->obtainTemporaryAllocationWithPtr(&commandStreamReceiver, requiredSize, requiredPtr, allocationType); + } + auto allocation = allocationLists[TEMPORARY_ALLOCATION].detachAllocation(requiredSize, requiredPtr, &commandStreamReceiver, allocationType); return allocation; } +AllocationsList &InternalAllocationStorage::getTemporaryAllocations() { + auto memoryManager = commandStreamReceiver.getMemoryManager(); + + if (memoryManager->isSingleTemporaryAllocationsListEnabled()) { + return memoryManager->getTemporaryAllocationsList(); + } + + return allocationLists[TEMPORARY_ALLOCATION]; +} + DeviceBitfield InternalAllocationStorage::getDeviceBitfield() const { return commandStreamReceiver.getOsContext().getDeviceBitfield(); } diff --git a/shared/source/memory_manager/internal_allocation_storage.h b/shared/source/memory_manager/internal_allocation_storage.h index a2ea4dbf3e..5ed30ab50f 100644 --- a/shared/source/memory_manager/internal_allocation_storage.h +++ b/shared/source/memory_manager/internal_allocation_storage.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ class InternalAllocationStorage { void storeAllocationWithTaskCount(std::unique_ptr &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount); std::unique_ptr obtainReusableAllocation(size_t requiredSize, AllocationType allocationType); std::unique_ptr obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType); - AllocationsList &getTemporaryAllocations() { return allocationLists[TEMPORARY_ALLOCATION]; } + AllocationsList &getTemporaryAllocations(); AllocationsList &getAllocationsForReuse() { return allocationLists[REUSABLE_ALLOCATION]; } AllocationsList &getDeferredAllocations() { return allocationLists[DEFERRED_DEALLOCATION]; } DeviceBitfield getDeviceBitfield() const; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 10b95d4061..3cdd731cba 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -28,6 +28,7 @@ #include "shared/source/helpers/string_helpers.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/allocation_properties.h" +#include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/compression_selector.h" #include "shared/source/memory_manager/deferrable_allocation_deletion.h" #include "shared/source/memory_manager/deferred_deleter.h" @@ -90,6 +91,56 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu if (debugManager.flags.EnableMultiStorageResources.get() != -1) { supportsMultiStorageResources = !!debugManager.flags.EnableMultiStorageResources.get(); } + + if (debugManager.flags.UseSingleListForTemporaryAllocations.get() == 1) { + singleTemporaryAllocationsList = true; + temporaryAllocations = std::make_unique(AllocationUsage::TEMPORARY_ALLOCATION); + } +} + +void MemoryManager::storeTemporaryAllocation(std::unique_ptr &&gfxAllocation, uint32_t osContextId, TaskCountType taskCount) { + gfxAllocation->updateTaskCount(taskCount, osContextId); + temporaryAllocations->pushTailOne(*gfxAllocation.release()); +} + +void MemoryManager::cleanTemporaryAllocations(const CommandStreamReceiver &csr, TaskCountType waitedTaskCount) { + auto lock = getHostPtrManager()->obtainOwnership(); + + GraphicsAllocation *currentAlloc = temporaryAllocations->detachNodes(); + + IDList allocationsLeft; + + while (currentAlloc != nullptr) { + const auto waitedOsContextId = csr.getOsContext().getContextId(); + auto *nextAlloc = currentAlloc->next; + bool freeAllocation = false; + + if (currentAlloc->isUsedByOsContext(waitedOsContextId)) { + if (currentAlloc->hostPtrTaskCountAssignment == 0 && currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) { + if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) { + freeAllocation = true; + } + } + } else if (!allocInUse(*currentAlloc)) { + freeAllocation = true; + } + + if (freeAllocation) { + freeGraphicsMemory(currentAlloc); + } else { + allocationsLeft.pushTailOne(*currentAlloc); + } + + currentAlloc = nextAlloc; + } + + if (!allocationsLeft.peekIsEmpty()) { + temporaryAllocations->splice(*allocationsLeft.detachNodes()); + } +} + +std::unique_ptr MemoryManager::obtainTemporaryAllocationWithPtr(CommandStreamReceiver *csr, size_t requiredSize, const void *requiredPtr, AllocationType allocationType) { + return temporaryAllocations->detachAllocation(requiredSize, requiredPtr, csr, allocationType); } MemoryManager::~MemoryManager() { @@ -976,14 +1027,24 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat } } -bool MemoryManager::allocInUse(GraphicsAllocation &graphicsAllocation) { +bool MemoryManager::allocInUse(GraphicsAllocation &graphicsAllocation) const { + uint32_t numEnginesChecked = 0; + const uint32_t numContextsToCheck = graphicsAllocation.getNumRegisteredContexts(); + for (auto &engine : getRegisteredEngines(graphicsAllocation.getRootDeviceIndex())) { auto osContextId = engine.osContext->getContextId(); auto allocationTaskCount = graphicsAllocation.getTaskCount(osContextId); - if (graphicsAllocation.isUsedByOsContext(osContextId) && - engine.commandStreamReceiver->getTagAllocation() != nullptr && - allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) { - return true; + + if (graphicsAllocation.isUsedByOsContext(osContextId)) { + numEnginesChecked++; + + if (engine.commandStreamReceiver->getTagAddress() && (allocationTaskCount > *engine.commandStreamReceiver->getTagAddress())) { + return true; + } + } + + if (numEnginesChecked == numContextsToCheck) { + return false; } } return false; @@ -993,10 +1054,15 @@ void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForComplet for (auto &engineContainer : allRegisteredEngines) { for (auto &engine : engineContainer) { auto csr = engine.commandStreamReceiver; + if (waitForCompletion) { csr->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, csr->peekLatestSentTaskCount()); } csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION); + + if (isSingleTemporaryAllocationsListEnabled() && (temporaryAllocations->peekIsEmpty() || !waitForCompletion)) { + return; + } } } } diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index c0155d567a..d2709d7697 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -30,6 +30,7 @@ namespace NEO { using SubDeviceIdsVec = StackVec; +class AllocationsList; class MultiGraphicsAllocation; class CpuPageFaultManager; class GfxPartition; @@ -205,7 +206,7 @@ class MemoryManager { void waitForDeletions(); MOCKABLE_VIRTUAL void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation); - MOCKABLE_VIRTUAL bool allocInUse(GraphicsAllocation &graphicsAllocation); + MOCKABLE_VIRTUAL bool allocInUse(GraphicsAllocation &graphicsAllocation) const; void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion); bool isAsyncDeleterEnabled() const; @@ -355,6 +356,12 @@ class MemoryManager { std::optional> getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool); void removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool); + void storeTemporaryAllocation(std::unique_ptr &&gfxAllocation, uint32_t osContextId, TaskCountType taskCount); + void cleanTemporaryAllocations(const CommandStreamReceiver &csr, TaskCountType waitTaskCount); + std::unique_ptr obtainTemporaryAllocationWithPtr(CommandStreamReceiver *csr, size_t requiredSize, const void *requiredPtr, AllocationType allocationType); + bool isSingleTemporaryAllocationsListEnabled() const { return singleTemporaryAllocationsList; } + AllocationsList &getTemporaryAllocationsList() const { return *temporaryAllocations; } + protected: bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo); static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties); @@ -396,6 +403,7 @@ class MemoryManager { bool initialized = false; bool forceNonSvmForExternalHostPtr = false; bool force32bitAllocations = false; + bool singleTemporaryAllocationsList = false; std::unique_ptr deferredDeleter; bool asyncDeleterEnabled = false; std::vector enable64kbpages; @@ -406,6 +414,7 @@ class MemoryManager { MultiDeviceEngineControlContainer allRegisteredEngines; MultiDeviceEngineControlContainer secondaryEngines; std::unique_ptr hostPtrManager; + std::unique_ptr temporaryAllocations; uint32_t latestContextId = std::numeric_limits::max(); std::map rootDeviceIndexToContextId; // This map will contain initial value of latestContextId for each rootDeviceIndex std::unique_ptr multiContextResourceDestructor; diff --git a/shared/test/common/mocks/mock_internal_allocation_storage.h b/shared/test/common/mocks/mock_internal_allocation_storage.h index d20619bfe7..7db5e94dcf 100644 --- a/shared/test/common/mocks/mock_internal_allocation_storage.h +++ b/shared/test/common/mocks/mock_internal_allocation_storage.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,6 +12,7 @@ namespace NEO { class MockInternalAllocationStorage : public InternalAllocationStorage { public: + using InternalAllocationStorage::allocationLists; using InternalAllocationStorage::InternalAllocationStorage; void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage) override { cleanAllocationsCalled++; diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 7f3d31cccd..12302fef9f 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -47,7 +47,9 @@ class MockMemoryManager : public MemoryManagerCreate { using MemoryManager::overrideAllocationData; using MemoryManager::pageFaultManager; using MemoryManager::prefetchManager; + using MemoryManager::singleTemporaryAllocationsList; using MemoryManager::supportsMultiStorageResources; + using MemoryManager::temporaryAllocations; using MemoryManager::unMapPhysicalDeviceMemoryFromVirtualMemory; using MemoryManager::unMapPhysicalHostMemoryFromVirtualMemory; using MemoryManager::useNonSvmHostPtrAlloc; @@ -153,8 +155,13 @@ class MockMemoryManager : public MemoryManagerCreate { OsAgnosticMemoryManager::unlockResourceImpl(gfxAllocation); } - bool allocInUse(GraphicsAllocation &graphicsAllocation) override { + bool allocInUse(GraphicsAllocation &graphicsAllocation) const override { allocInUseCalled++; + + if (callBaseAllocInUse) { + return OsAgnosticMemoryManager::allocInUse(graphicsAllocation); + } + if (deferAllocInUse) { return true; } @@ -317,7 +324,7 @@ class MockMemoryManager : public MemoryManagerCreate { uint32_t unlockResourceCalled = 0u; uint32_t lockResourceCalled = 0u; uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u; - uint32_t allocInUseCalled = 0u; + mutable uint32_t allocInUseCalled = 0u; uint32_t registerIpcExportedAllocationCalled = 0; int32_t overrideAllocateAsPackReturn = -1; std::vector allocationsFromExistingStorage{}; @@ -358,6 +365,7 @@ class MockMemoryManager : public MemoryManagerCreate { bool singleFailureInAllocationWithHostPointer = false; bool isMockHostMemoryManager = false; bool deferAllocInUse = false; + bool callBaseAllocInUse = false; bool isMockEventPoolCreateMemoryManager = false; bool limitedGPU = false; bool returnFakeAllocation = false; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index c9c890bc9e..548e210523 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -664,4 +664,5 @@ UseIgcAsFcl = 0 EnablePidFdOrSocketsForIpc = -1 ExposeSingleDevice=-1 OverrideCopyOffloadMode = -1 +UseSingleListForTemporaryAllocations = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp index 4bb6c7a080..a2e29f6d37 100644 --- a/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/host_ptr_manager_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,6 +11,7 @@ #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/fixtures/memory_manager_fixture.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_csr.h" @@ -944,14 +945,202 @@ HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWa requirements.allocationFragments[0].fragmentPosition = FragmentPosition::none; requirements.rootDeviceIndex = csr0->getRootDeviceIndex(); + memoryManager->deferAllocInUse = true; hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); EXPECT_EQ(1u, csr0->waitForCompletionWithTimeoutCalled); EXPECT_EQ(1u, csr1->waitForCompletionWithTimeoutCalled); EXPECT_EQ(2u, storage0->cleanAllocationsCalled); EXPECT_EQ(2u, storage0->lastCleanAllocationsTaskCount); - EXPECT_EQ(2u, storage1->cleanAllocationsCalled); - EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount); + + if (memoryManager->isSingleTemporaryAllocationsListEnabled()) { + EXPECT_EQ(1u, storage1->cleanAllocationsCalled); + EXPECT_EQ(1u, storage1->lastCleanAllocationsTaskCount); + } else { + EXPECT_EQ(2u, storage1->cleanAllocationsCalled); + EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount); + } +} + +HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsAndSingleTempAllocationsListWhenCheckIsCalledThenWaitAndCleanOnAllEngines) { + TaskCountType taskCountReady = 2; + TaskCountType taskCountNotReady = 1; + + memoryManager->singleTemporaryAllocationsList = true; + memoryManager->temporaryAllocations = std::make_unique(AllocationUsage::TEMPORARY_ALLOCATION); + + auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex); + EXPECT_EQ(1u, engines.size()); + + auto csr0 = static_cast(engines[0].commandStreamReceiver); + auto csr1 = std::make_unique(executionEnvironment, 0, 1); + TaskCountType csr0GpuTag = taskCountNotReady; + TaskCountType csr1GpuTag = taskCountNotReady; + csr0->tagAddress = &csr0GpuTag; + csr1->tagAddress = &csr1GpuTag; + auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::lowPriority})); + csr1->setupContext(*osContext); + + void *cpuPtr = reinterpret_cast(0x100004); + + auto hostPtrManager = static_cast(memoryManager->getHostPtrManager()); + auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); + auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); + + auto storage0 = new MockInternalAllocationStorage(*csr0); + auto storage1 = new MockInternalAllocationStorage(*csr1); + csr0->internalAllocationStorage.reset(storage0); + storage0->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady); + storage0->updateCompletionAfterCleaningList(taskCountReady); + csr1->internalAllocationStorage.reset(storage1); + storage1->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); + storage1->updateCompletionAfterCleaningList(taskCountReady); + + csr0->setLatestSentTaskCount(taskCountNotReady); + csr1->setLatestSentTaskCount(taskCountNotReady); + + AllocationRequirements requirements; + + requirements.requiredFragmentsCount = 1; + requirements.totalRequiredSize = MemoryConstants::pageSize * 10; + + requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr, MemoryConstants::pageSize); + requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10; + requirements.allocationFragments[0].fragmentPosition = FragmentPosition::none; + requirements.rootDeviceIndex = csr0->getRootDeviceIndex(); + + memoryManager->deferAllocInUse = true; + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1)); + + // first CSR tag updated + hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); + + EXPECT_FALSE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1)); + + // second CSR tag updated + hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements); + + EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty()); +} + +HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorageThenCleanCorrectly) { + TaskCountType taskCountReady = 2; + TaskCountType taskCountNotReady = 1; + + memoryManager->singleTemporaryAllocationsList = true; + memoryManager->temporaryAllocations = std::make_unique(AllocationUsage::TEMPORARY_ALLOCATION); + memoryManager->callBaseAllocInUse = true; + + auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex); + EXPECT_EQ(1u, engines.size()); + + auto csr0 = static_cast(engines[0].commandStreamReceiver); + auto csr1 = std::make_unique(executionEnvironment, 0, 1); + TaskCountType csr0GpuTag = taskCountNotReady; + TaskCountType csr1GpuTag = taskCountNotReady; + csr0->tagAddress = &csr0GpuTag; + csr1->tagAddress = &csr1GpuTag; + auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::lowPriority})); + csr1->setupContext(*osContext); + + void *cpuPtr = reinterpret_cast(0x100004); + + auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); + auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); + + auto storage0 = new MockInternalAllocationStorage(*csr0); + auto storage1 = new MockInternalAllocationStorage(*csr1); + csr0->internalAllocationStorage.reset(storage0); + csr1->internalAllocationStorage.reset(storage1); + + EXPECT_EQ(memoryManager->temporaryAllocations.get(), &csr0->getTemporaryAllocations()); + EXPECT_EQ(memoryManager->temporaryAllocations.get(), &csr1->getTemporaryAllocations()); + + storage0->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady); + EXPECT_TRUE(storage0->allocationLists[TEMPORARY_ALLOCATION].peekIsEmpty()); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + EXPECT_EQ(taskCountReady, graphicsAllocation0->getTaskCount(csr0->getOsContext().getContextId())); + + storage1->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady); + EXPECT_TRUE(storage1->allocationLists[TEMPORARY_ALLOCATION].peekIsEmpty()); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1)); + EXPECT_EQ(taskCountReady, graphicsAllocation1->getTaskCount(csr1->getOsContext().getContextId())); + + csr0->setLatestSentTaskCount(taskCountNotReady); + csr1->setLatestSentTaskCount(taskCountNotReady); + + storage0->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION); + storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1)); + + csr1GpuTag = taskCountReady; + + storage0->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + EXPECT_FALSE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1)); + + storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0)); + + csr0GpuTag = taskCountReady; + storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty()); +} + +HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorageThenObtainCorrectly) { + TaskCountType taskCountReady = 2; + TaskCountType taskCountNotReady = 1; + + memoryManager->singleTemporaryAllocationsList = true; + memoryManager->temporaryAllocations = std::make_unique(AllocationUsage::TEMPORARY_ALLOCATION); + memoryManager->callBaseAllocInUse = true; + + auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex); + auto csr = static_cast(engines[0].commandStreamReceiver); + + TaskCountType csrGpuTag = taskCountNotReady; + csr->tagAddress = &csrGpuTag; + + void *cpuPtr = reinterpret_cast(0x100004); + + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr); + + auto storage = new MockInternalAllocationStorage(*csr); + csr->internalAllocationStorage.reset(storage); + csr->setLatestSentTaskCount(taskCountNotReady); + + storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION, taskCountReady); + + auto alloc = storage->obtainTemporaryAllocationWithPtr(MemoryConstants::pageSize, cpuPtr, graphicsAllocation->getAllocationType()); + EXPECT_NE(nullptr, alloc.get()); + EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty()); + alloc.release(); + + EXPECT_EQ(CompletionStamp::notReady, graphicsAllocation->getTaskCount(csr->getOsContext().getContextId())); + + // clean on CSR destruction + storage->storeAllocationWithTaskCount(std::unique_ptr(graphicsAllocation), TEMPORARY_ALLOCATION, taskCountReady); + csr->tagAddress = nullptr; +} + +TEST_F(HostPtrAllocationTest, givenDebugFlagSetWhenCreatingMemoryManagerThenEnableSingleTempAllocationsList) { + DebugManagerStateRestore debugRestorer; + + { + auto memoryManager = std::make_unique(executionEnvironment); + EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled()); + EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get()); + } + + debugManager.flags.UseSingleListForTemporaryAllocations.set(1); + { + auto memoryManager = std::make_unique(executionEnvironment); + EXPECT_TRUE(memoryManager->isSingleTemporaryAllocationsListEnabled()); + EXPECT_NE(nullptr, memoryManager->temporaryAllocations.get()); + } } TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) { diff --git a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp index d09196b207..0c456c85b1 100644 --- a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp +++ b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,6 +12,7 @@ #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/unit_test/utilities/containers_tests_helpers.h" @@ -284,11 +285,17 @@ TEST_F(InternalAllocationStorageTest, givenAllocationListWhenTwoThreadsCleanConc } HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachingReusableAllocationThenCheckTaskCountFinishedOnAllTiles) { - auto ultCsr = reinterpret_cast *>(csr); - csr->setActivePartitions(2u); + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + + auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager(); + + auto ultCsr = reinterpret_cast *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver); + ultCsr->setActivePartitions(2); ultCsr->immWritePostSyncWriteOffset = 32; - auto tagAddress = csr->getTagAddress(); + auto storage = ultCsr->getInternalAllocationStorage(); + + auto tagAddress = ultCsr->getTagAddress(); *tagAddress = 0xFF; tagAddress = ptrOffset(tagAddress, 32); *tagAddress = 0x0; @@ -296,19 +303,111 @@ HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachi auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); - EXPECT_EQ(allocation, csr->getAllocationsForReuse().peekHead()); - EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty()); - allocation->updateTaskCount(1u, csr->getOsContext().getContextId()); + EXPECT_EQ(allocation, ultCsr->getAllocationsForReuse().peekHead()); + EXPECT_FALSE(ultCsr->getAllocationsForReuse().peekIsEmpty()); + allocation->updateTaskCount(1u, ultCsr->getOsContext().getContextId()); - std::unique_ptr allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::internalHostMemory); + std::unique_ptr allocationReusable = ultCsr->getAllocationsForReuse().detachAllocation(0, nullptr, ultCsr, AllocationType::internalHostMemory); EXPECT_EQ(nullptr, allocationReusable.get()); *tagAddress = 0x1; - allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::internalHostMemory); + allocationReusable = ultCsr->getAllocationsForReuse().detachAllocation(0, nullptr, ultCsr, AllocationType::internalHostMemory); EXPECT_EQ(allocation, allocationReusable.get()); memoryManager->freeGraphicsMemory(allocationReusable.release()); } + +HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringFromDifferentRootDeviceThenSelectCorrectly) { + DebugManagerStateRestore restore; + debugManager.flags.UseSingleListForTemporaryAllocations.set(1); + + std::unique_ptr deviceFactory(new UltDeviceFactory(2, 1)); + + auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager(); + + auto rootCsr0 = reinterpret_cast *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver); + auto rootCsr1 = reinterpret_cast *>(deviceFactory->rootDevices[1]->getDefaultEngine().commandStreamReceiver); + + auto allocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootCsr0->getRootDeviceIndex(), MemoryConstants::pageSize}); + auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootCsr1->getRootDeviceIndex(), MemoryConstants::pageSize}); + + memoryManager->storeTemporaryAllocation(std::unique_ptr(allocation0), rootCsr0->getOsContext().getContextId(), 0); + memoryManager->storeTemporaryAllocation(std::unique_ptr(allocation1), rootCsr1->getOsContext().getContextId(), 0); + + std::unique_ptr allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr1, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType()); + EXPECT_EQ(nullptr, allocationReusable.get()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr0, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType()); + EXPECT_NE(nullptr, allocationReusable.get()); + + memoryManager->freeGraphicsMemory(allocationReusable.release()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr0, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType()); + EXPECT_EQ(nullptr, allocationReusable.get()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr1, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType()); + EXPECT_NE(nullptr, allocationReusable.get()); + + memoryManager->freeGraphicsMemory(allocationReusable.release()); +} + +HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringFromDifferentTileThenSelectCorrectly) { + DebugManagerStateRestore restore; + debugManager.flags.UseSingleListForTemporaryAllocations.set(1); + + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + + auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager(); + + auto csr0 = reinterpret_cast *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver); + auto csr1 = reinterpret_cast *>(deviceFactory->subDevices[0]->getDefaultEngine().commandStreamReceiver); + + auto allocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr0->getRootDeviceIndex(), MemoryConstants::pageSize}); + allocation0->storageInfo.subDeviceBitfield = csr0->deviceBitfield; + auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr1->getRootDeviceIndex(), MemoryConstants::pageSize}); + allocation1->storageInfo.subDeviceBitfield = csr1->deviceBitfield; + + memoryManager->storeTemporaryAllocation(std::unique_ptr(allocation0), csr0->getOsContext().getContextId(), 0); + memoryManager->storeTemporaryAllocation(std::unique_ptr(allocation1), csr1->getOsContext().getContextId(), 0); + + std::unique_ptr allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr1, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType()); + EXPECT_EQ(nullptr, allocationReusable.get()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr0, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType()); + EXPECT_NE(nullptr, allocationReusable.get()); + + memoryManager->freeGraphicsMemory(allocationReusable.release()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr0, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType()); + EXPECT_EQ(nullptr, allocationReusable.get()); + + allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr1, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType()); + EXPECT_NE(nullptr, allocationReusable.get()); + + memoryManager->freeGraphicsMemory(allocationReusable.release()); +} + +HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringSysMemThenObtainCorrectly) { + DebugManagerStateRestore restore; + debugManager.flags.UseSingleListForTemporaryAllocations.set(1); + + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); + + auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager(); + + auto csr = reinterpret_cast *>(deviceFactory->subDevices[0]->getDefaultEngine().commandStreamReceiver); + + auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); + allocation->storageInfo.subDeviceBitfield = 0; + + memoryManager->storeTemporaryAllocation(std::unique_ptr(allocation), csr->getOsContext().getContextId(), 0); + + std::unique_ptr allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr, MemoryConstants::pageSize, allocation->getUnderlyingBuffer(), allocation->getAllocationType()); + EXPECT_NE(nullptr, allocationReusable.get()); + + memoryManager->freeGraphicsMemory(allocationReusable.release()); +} + TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasBeenAssignedThenAllocIsRemoved) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize}); uint32_t expectedTaskCount = 10u;