fix: initial support for single temporary allocations list

Related-To: NEO-14641

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2025-05-21 11:35:07 +00:00 committed by Compute-Runtime-Automation
parent f25fa0ad27
commit 91940d2a79
14 changed files with 562 additions and 77 deletions

View File

@ -1242,6 +1242,16 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan
HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHandleCompletionAndTempAllocations, IsAtLeastXeHpCore) {
auto immCmdList = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
auto memoryManager = static_cast<MockMemoryManager *>(device->getNEODevice()->getMemoryManager());
memoryManager->callBaseAllocInUse = true;
NEO::AllocationsList *memoryManagerTempAllocsList = nullptr;
auto singleTempAllocationsList = memoryManager->isSingleTemporaryAllocationsListEnabled();
if (singleTempAllocationsList) {
memoryManagerTempAllocsList = &memoryManager->getTemporaryAllocationsList();
}
auto mainQueueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(false));
auto offloadCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(immCmdList->getCsr(true));
@ -1273,44 +1283,71 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonInOrderModeWaitWhenCallingSyncThenHan
// only main is completed
immCmdList->hostSynchronize(0, true);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
immCmdList->appendMemoryCopy(&copyData1, &copyData2, 1, nullptr, 0, nullptr, copyParams);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); // temp allocation created on offload csr
if (singleTempAllocationsList) {
EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty()); // temp allocation created on offload csr
}
mainInternalStorage->storeAllocationWithTaskCount(std::move(std::make_unique<MockGraphicsAllocation>()), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 1);
// both completed
immCmdList->hostSynchronize(0, true);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
immCmdList->appendMemoryCopy(&copyData1, &copyData2, 1, nullptr, 0, nullptr, copyParams);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
auto mockAlloc = new MockGraphicsAllocation();
mainInternalStorage->storeAllocationWithTaskCount(std::move(std::unique_ptr<MockGraphicsAllocation>(mockAlloc)), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 123);
// only copy completed
immCmdList->hostSynchronize(0, true);
EXPECT_FALSE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_FALSE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_FALSE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
mockAlloc->updateTaskCount(1, mainQueueCsr->getOsContext().getContextId());
immCmdList->hostSynchronize(0, true);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
// stored only in copy storage
offloadInternalStorage->storeAllocationWithTaskCount(std::move(std::make_unique<MockGraphicsAllocation>()), NEO::AllocationUsage::TEMPORARY_ALLOCATION, 1);
immCmdList->hostSynchronize(0, true);
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
if (singleTempAllocationsList) {
EXPECT_TRUE(memoryManagerTempAllocsList->peekIsEmpty());
} else {
EXPECT_TRUE(mainInternalStorage->getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(offloadInternalStorage->getTemporaryAllocations().peekIsEmpty());
}
*hostAddress = std::numeric_limits<uint64_t>::max();
}

View File

@ -1071,8 +1071,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
bool tempAllocationFound = false;
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
@ -1118,8 +1123,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
bool tempAllocationFound = false;
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
@ -1167,8 +1177,13 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
bool tempAllocationFound = false;
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
@ -1210,8 +1225,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
bool tempAllocationFound = false;
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
@ -1249,8 +1268,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa
cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
@ -1284,8 +1307,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq
cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
@ -1319,8 +1346,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV
cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
if (device->getMemoryManager()->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_FALSE(device->getMemoryManager()->getTemporaryAllocationsList().peekIsEmpty());
} else {
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
}
auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();

View File

@ -315,6 +315,7 @@ DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabl
DECLARE_DEBUG_VARIABLE(int32_t, GetSipBinaryFromExternalLib, -1, "-1: default, 0: disabled, 1: enabled. If enabled, then retrieve Sip from external library")
DECLARE_DEBUG_VARIABLE(int32_t, EnablePidFdOrSocketsForIpc, -1, "-1: default, 0: disabled (default), 1: enabled. If enabled, L0 IPC handles are opaque and pidfd or sockets are used for IPC exchange")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideCopyOffloadMode, -1, "-1: default, 0: disabled, >=1: if enabled, override to any value from CopyOffloadModes enum")
DECLARE_DEBUG_VARIABLE(int32_t, UseSingleListForTemporaryAllocations, -1, "-1: default, 0: disabled, 0: enabled. If enabled, use single list, instead of per CSR for tracking temporary allocations")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -14,20 +14,34 @@
namespace {
struct ReusableAllocationRequirements {
const void *requiredPtr;
size_t requiredMinimalSize;
volatile TagAddressType *csrTagAddress;
NEO::AllocationType allocationType;
uint32_t contextId;
uint32_t activeTileCount;
uint32_t tagOffset;
bool forceSystemMemoryFlag;
ReusableAllocationRequirements() = delete;
ReusableAllocationRequirements(NEO::CommandStreamReceiver *csr, const void *requiredPtr, size_t requiredMinimalSize, NEO::AllocationType allocationType, bool forceSystemMemoryFlag)
: requiredPtr(requiredPtr), requiredMinimalSize(requiredMinimalSize), allocationType(allocationType), forceSystemMemoryFlag(forceSystemMemoryFlag) {
if (csr) {
csrTagAddress = csr->getTagAddress();
contextId = csr->getOsContext().getContextId();
rootDeviceIndex = csr->getRootDeviceIndex();
deviceBitfield = csr->getOsContext().getDeviceBitfield();
tagOffset = csr->getImmWritePostSyncWriteOffset();
}
}
const void *requiredPtr = nullptr;
size_t requiredMinimalSize = 0;
volatile TagAddressType *csrTagAddress = nullptr;
NEO::AllocationType allocationType = NEO::AllocationType::unknown;
NEO::DeviceBitfield deviceBitfield = 1;
uint32_t contextId = std::numeric_limits<uint32_t>::max();
uint32_t rootDeviceIndex = 0;
uint32_t tagOffset = 0;
bool forceSystemMemoryFlag = false;
};
bool checkTagAddressReady(ReusableAllocationRequirements *requirements, NEO::GraphicsAllocation *gfxAllocation) {
auto tagAddress = requirements->csrTagAddress;
auto taskCount = gfxAllocation->getTaskCount(requirements->contextId);
for (uint32_t count = 0; count < requirements->activeTileCount; count++) {
for (uint32_t count = 0; count < requirements->deviceBitfield.count(); count++) {
if (*tagAddress < taskCount) {
return false;
}
@ -47,15 +61,8 @@ std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t req
}
std::unique_ptr<GraphicsAllocation> AllocationsList::detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, bool forceSystemMemoryFlag, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType) {
ReusableAllocationRequirements req;
req.requiredMinimalSize = requiredMinimalSize;
req.csrTagAddress = (commandStreamReceiver == nullptr) ? nullptr : commandStreamReceiver->getTagAddress();
req.allocationType = allocationType;
req.contextId = (commandStreamReceiver == nullptr) ? UINT32_MAX : commandStreamReceiver->getOsContext().getContextId();
req.requiredPtr = requiredPtr;
req.activeTileCount = (commandStreamReceiver == nullptr) ? 1u : commandStreamReceiver->getActivePartitions();
req.tagOffset = (commandStreamReceiver == nullptr) ? 0u : commandStreamReceiver->getImmWritePostSyncWriteOffset();
req.forceSystemMemoryFlag = forceSystemMemoryFlag;
ReusableAllocationRequirements req(commandStreamReceiver, requiredPtr, requiredMinimalSize, allocationType, forceSystemMemoryFlag);
GraphicsAllocation *a = nullptr;
GraphicsAllocation *retAlloc = processLocked<AllocationsList, &AllocationsList::detachAllocationImpl>(a, static_cast<void *>(&req));
return std::unique_ptr<GraphicsAllocation>(retAlloc);
@ -71,8 +78,13 @@ GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *,
if (req->csrTagAddress == nullptr) {
return removeOneImpl(curr, nullptr);
}
if ((this->allocationUsage == TEMPORARY_ALLOCATION || checkTagAddressReady(req, curr)) &&
(req->requiredPtr == nullptr || req->requiredPtr == curr->getUnderlyingBuffer())) {
bool usageMatch = (this->allocationUsage == TEMPORARY_ALLOCATION || checkTagAddressReady(req, curr));
bool ptrMatch = (req->requiredPtr == nullptr || req->requiredPtr == curr->getUnderlyingBuffer());
bool tileMatch = (req->deviceBitfield == curr->storageInfo.subDeviceBitfield) || (curr->storageInfo.subDeviceBitfield == 0);
bool placementMatch = (req->rootDeviceIndex == curr->getRootDeviceIndex()) && tileMatch;
if (usageMatch && ptrMatch && placementMatch) {
if (this->allocationUsage == TEMPORARY_ALLOCATION) {
// We may not have proper task count yet, so set notReady to avoid releasing in a different thread
curr->updateTaskCount(CompletionStamp::notReady, req->contextId);

View File

@ -159,6 +159,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation>, NEO::NonCopyableAn
bool isUsed() const { return registeredContextsNum > 0; }
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
uint32_t getNumRegisteredContexts() const { return registeredContextsNum.load(); }
MOCKABLE_VIRTUAL void updateTaskCount(TaskCountType newTaskCount, uint32_t contextId);
MOCKABLE_VIRTUAL TaskCountType getTaskCount(uint32_t contextId) const {
if (contextId >= usageInfos.size()) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -27,14 +27,22 @@ void InternalAllocationStorage::storeAllocation(std::unique_ptr<GraphicsAllocati
storeAllocationWithTaskCount(std::move(gfxAllocation), allocationUsage, taskCount);
}
void InternalAllocationStorage::storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount) {
auto memoryManager = commandStreamReceiver.getMemoryManager();
auto osContextId = commandStreamReceiver.getOsContext().getContextId();
if (allocationUsage == TEMPORARY_ALLOCATION && memoryManager->isSingleTemporaryAllocationsListEnabled()) {
memoryManager->storeTemporaryAllocation(std::move(gfxAllocation), osContextId, taskCount);
return;
}
if (allocationUsage == REUSABLE_ALLOCATION) {
if (debugManager.flags.DisableResourceRecycling.get()) {
commandStreamReceiver.getMemoryManager()->freeGraphicsMemory(gfxAllocation.release());
memoryManager->freeGraphicsMemory(gfxAllocation.release());
return;
}
}
auto &allocationsList = allocationLists[allocationUsage];
gfxAllocation->updateTaskCount(taskCount, commandStreamReceiver.getOsContext().getContextId());
gfxAllocation->updateTaskCount(taskCount, osContextId);
allocationsList.pushTailOne(*gfxAllocation.release());
}
@ -44,6 +52,12 @@ void InternalAllocationStorage::cleanAllocationList(TaskCountType waitTaskCount,
void InternalAllocationStorage::freeAllocationsList(TaskCountType waitTaskCount, AllocationsList &allocationsList) {
auto memoryManager = commandStreamReceiver.getMemoryManager();
if (&allocationsList == &allocationLists[TEMPORARY_ALLOCATION] && memoryManager->isSingleTemporaryAllocationsListEnabled()) {
memoryManager->cleanTemporaryAllocations(commandStreamReceiver, waitTaskCount);
return;
}
auto lock = memoryManager->getHostPtrManager()->obtainOwnership();
GraphicsAllocation *curr = allocationsList.detachNodes();
@ -70,10 +84,26 @@ std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainReusableAll
}
std::unique_ptr<GraphicsAllocation> InternalAllocationStorage::obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType) {
auto memoryManager = commandStreamReceiver.getMemoryManager();
if (memoryManager->isSingleTemporaryAllocationsListEnabled()) {
return memoryManager->obtainTemporaryAllocationWithPtr(&commandStreamReceiver, requiredSize, requiredPtr, allocationType);
}
auto allocation = allocationLists[TEMPORARY_ALLOCATION].detachAllocation(requiredSize, requiredPtr, &commandStreamReceiver, allocationType);
return allocation;
}
AllocationsList &InternalAllocationStorage::getTemporaryAllocations() {
auto memoryManager = commandStreamReceiver.getMemoryManager();
if (memoryManager->isSingleTemporaryAllocationsListEnabled()) {
return memoryManager->getTemporaryAllocationsList();
}
return allocationLists[TEMPORARY_ALLOCATION];
}
DeviceBitfield InternalAllocationStorage::getDeviceBitfield() const {
return commandStreamReceiver.getOsContext().getDeviceBitfield();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -21,7 +21,7 @@ class InternalAllocationStorage {
void storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t allocationUsage, TaskCountType taskCount);
std::unique_ptr<GraphicsAllocation> obtainReusableAllocation(size_t requiredSize, AllocationType allocationType);
std::unique_ptr<GraphicsAllocation> obtainTemporaryAllocationWithPtr(size_t requiredSize, const void *requiredPtr, AllocationType allocationType);
AllocationsList &getTemporaryAllocations() { return allocationLists[TEMPORARY_ALLOCATION]; }
AllocationsList &getTemporaryAllocations();
AllocationsList &getAllocationsForReuse() { return allocationLists[REUSABLE_ALLOCATION]; }
AllocationsList &getDeferredAllocations() { return allocationLists[DEFERRED_DEALLOCATION]; }
DeviceBitfield getDeviceBitfield() const;

View File

@ -28,6 +28,7 @@
#include "shared/source/helpers/string_helpers.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/source/memory_manager/compression_selector.h"
#include "shared/source/memory_manager/deferrable_allocation_deletion.h"
#include "shared/source/memory_manager/deferred_deleter.h"
@ -90,6 +91,56 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu
if (debugManager.flags.EnableMultiStorageResources.get() != -1) {
supportsMultiStorageResources = !!debugManager.flags.EnableMultiStorageResources.get();
}
if (debugManager.flags.UseSingleListForTemporaryAllocations.get() == 1) {
singleTemporaryAllocationsList = true;
temporaryAllocations = std::make_unique<AllocationsList>(AllocationUsage::TEMPORARY_ALLOCATION);
}
}
void MemoryManager::storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t osContextId, TaskCountType taskCount) {
gfxAllocation->updateTaskCount(taskCount, osContextId);
temporaryAllocations->pushTailOne(*gfxAllocation.release());
}
void MemoryManager::cleanTemporaryAllocations(const CommandStreamReceiver &csr, TaskCountType waitedTaskCount) {
auto lock = getHostPtrManager()->obtainOwnership();
GraphicsAllocation *currentAlloc = temporaryAllocations->detachNodes();
IDList<GraphicsAllocation, false, true> allocationsLeft;
while (currentAlloc != nullptr) {
const auto waitedOsContextId = csr.getOsContext().getContextId();
auto *nextAlloc = currentAlloc->next;
bool freeAllocation = false;
if (currentAlloc->isUsedByOsContext(waitedOsContextId)) {
if (currentAlloc->hostPtrTaskCountAssignment == 0 && currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) {
if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) {
freeAllocation = true;
}
}
} else if (!allocInUse(*currentAlloc)) {
freeAllocation = true;
}
if (freeAllocation) {
freeGraphicsMemory(currentAlloc);
} else {
allocationsLeft.pushTailOne(*currentAlloc);
}
currentAlloc = nextAlloc;
}
if (!allocationsLeft.peekIsEmpty()) {
temporaryAllocations->splice(*allocationsLeft.detachNodes());
}
}
std::unique_ptr<GraphicsAllocation> MemoryManager::obtainTemporaryAllocationWithPtr(CommandStreamReceiver *csr, size_t requiredSize, const void *requiredPtr, AllocationType allocationType) {
return temporaryAllocations->detachAllocation(requiredSize, requiredPtr, csr, allocationType);
}
MemoryManager::~MemoryManager() {
@ -976,14 +1027,24 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat
}
}
bool MemoryManager::allocInUse(GraphicsAllocation &graphicsAllocation) {
bool MemoryManager::allocInUse(GraphicsAllocation &graphicsAllocation) const {
uint32_t numEnginesChecked = 0;
const uint32_t numContextsToCheck = graphicsAllocation.getNumRegisteredContexts();
for (auto &engine : getRegisteredEngines(graphicsAllocation.getRootDeviceIndex())) {
auto osContextId = engine.osContext->getContextId();
auto allocationTaskCount = graphicsAllocation.getTaskCount(osContextId);
if (graphicsAllocation.isUsedByOsContext(osContextId) &&
engine.commandStreamReceiver->getTagAllocation() != nullptr &&
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
return true;
if (graphicsAllocation.isUsedByOsContext(osContextId)) {
numEnginesChecked++;
if (engine.commandStreamReceiver->getTagAddress() && (allocationTaskCount > *engine.commandStreamReceiver->getTagAddress())) {
return true;
}
}
if (numEnginesChecked == numContextsToCheck) {
return false;
}
}
return false;
@ -993,10 +1054,15 @@ void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForComplet
for (auto &engineContainer : allRegisteredEngines) {
for (auto &engine : engineContainer) {
auto csr = engine.commandStreamReceiver;
if (waitForCompletion) {
csr->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, csr->peekLatestSentTaskCount());
}
csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION);
if (isSingleTemporaryAllocationsListEnabled() && (temporaryAllocations->peekIsEmpty() || !waitForCompletion)) {
return;
}
}
}
}

View File

@ -30,6 +30,7 @@ namespace NEO {
using SubDeviceIdsVec = StackVec<uint32_t, 4>;
class AllocationsList;
class MultiGraphicsAllocation;
class CpuPageFaultManager;
class GfxPartition;
@ -205,7 +206,7 @@ class MemoryManager {
void waitForDeletions();
MOCKABLE_VIRTUAL void waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation);
MOCKABLE_VIRTUAL bool allocInUse(GraphicsAllocation &graphicsAllocation);
MOCKABLE_VIRTUAL bool allocInUse(GraphicsAllocation &graphicsAllocation) const;
void cleanTemporaryAllocationListOnAllEngines(bool waitForCompletion);
bool isAsyncDeleterEnabled() const;
@ -355,6 +356,12 @@ class MemoryManager {
std::optional<std::reference_wrapper<CustomHeapAllocatorConfig>> getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool);
void removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool);
void storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation> &&gfxAllocation, uint32_t osContextId, TaskCountType taskCount);
void cleanTemporaryAllocations(const CommandStreamReceiver &csr, TaskCountType waitTaskCount);
std::unique_ptr<GraphicsAllocation> obtainTemporaryAllocationWithPtr(CommandStreamReceiver *csr, size_t requiredSize, const void *requiredPtr, AllocationType allocationType);
bool isSingleTemporaryAllocationsListEnabled() const { return singleTemporaryAllocationsList; }
AllocationsList &getTemporaryAllocationsList() const { return *temporaryAllocations; }
protected:
bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
@ -396,6 +403,7 @@ class MemoryManager {
bool initialized = false;
bool forceNonSvmForExternalHostPtr = false;
bool force32bitAllocations = false;
bool singleTemporaryAllocationsList = false;
std::unique_ptr<DeferredDeleter> deferredDeleter;
bool asyncDeleterEnabled = false;
std::vector<bool> enable64kbpages;
@ -406,6 +414,7 @@ class MemoryManager {
MultiDeviceEngineControlContainer allRegisteredEngines;
MultiDeviceEngineControlContainer secondaryEngines;
std::unique_ptr<HostPtrManager> hostPtrManager;
std::unique_ptr<AllocationsList> temporaryAllocations;
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
std::map<uint32_t, uint32_t> rootDeviceIndexToContextId; // This map will contain initial value of latestContextId for each rootDeviceIndex
std::unique_ptr<DeferredDeleter> multiContextResourceDestructor;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -12,6 +12,7 @@
namespace NEO {
class MockInternalAllocationStorage : public InternalAllocationStorage {
public:
using InternalAllocationStorage::allocationLists;
using InternalAllocationStorage::InternalAllocationStorage;
void cleanAllocationList(TaskCountType waitTaskCount, uint32_t allocationUsage) override {
cleanAllocationsCalled++;

View File

@ -47,7 +47,9 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
using MemoryManager::overrideAllocationData;
using MemoryManager::pageFaultManager;
using MemoryManager::prefetchManager;
using MemoryManager::singleTemporaryAllocationsList;
using MemoryManager::supportsMultiStorageResources;
using MemoryManager::temporaryAllocations;
using MemoryManager::unMapPhysicalDeviceMemoryFromVirtualMemory;
using MemoryManager::unMapPhysicalHostMemoryFromVirtualMemory;
using MemoryManager::useNonSvmHostPtrAlloc;
@ -153,8 +155,13 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
OsAgnosticMemoryManager::unlockResourceImpl(gfxAllocation);
}
bool allocInUse(GraphicsAllocation &graphicsAllocation) override {
bool allocInUse(GraphicsAllocation &graphicsAllocation) const override {
allocInUseCalled++;
if (callBaseAllocInUse) {
return OsAgnosticMemoryManager::allocInUse(graphicsAllocation);
}
if (deferAllocInUse) {
return true;
}
@ -317,7 +324,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
uint32_t unlockResourceCalled = 0u;
uint32_t lockResourceCalled = 0u;
uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u;
uint32_t allocInUseCalled = 0u;
mutable uint32_t allocInUseCalled = 0u;
uint32_t registerIpcExportedAllocationCalled = 0;
int32_t overrideAllocateAsPackReturn = -1;
std::vector<GraphicsAllocation *> allocationsFromExistingStorage{};
@ -358,6 +365,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
bool singleFailureInAllocationWithHostPointer = false;
bool isMockHostMemoryManager = false;
bool deferAllocInUse = false;
bool callBaseAllocInUse = false;
bool isMockEventPoolCreateMemoryManager = false;
bool limitedGPU = false;
bool returnFakeAllocation = false;

View File

@ -664,4 +664,5 @@ UseIgcAsFcl = 0
EnablePidFdOrSocketsForIpc = -1
ExposeSingleDevice=-1
OverrideCopyOffloadMode = -1
UseSingleListForTemporaryAllocations = -1
# Please don't edit below this line

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -11,6 +11,7 @@
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/fixtures/memory_manager_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_csr.h"
@ -944,14 +945,202 @@ HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsWhenCheckIsCalledThenWa
requirements.allocationFragments[0].fragmentPosition = FragmentPosition::none;
requirements.rootDeviceIndex = csr0->getRootDeviceIndex();
memoryManager->deferAllocInUse = true;
hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements);
EXPECT_EQ(1u, csr0->waitForCompletionWithTimeoutCalled);
EXPECT_EQ(1u, csr1->waitForCompletionWithTimeoutCalled);
EXPECT_EQ(2u, storage0->cleanAllocationsCalled);
EXPECT_EQ(2u, storage0->lastCleanAllocationsTaskCount);
EXPECT_EQ(2u, storage1->cleanAllocationsCalled);
EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount);
if (memoryManager->isSingleTemporaryAllocationsListEnabled()) {
EXPECT_EQ(1u, storage1->cleanAllocationsCalled);
EXPECT_EQ(1u, storage1->lastCleanAllocationsTaskCount);
} else {
EXPECT_EQ(2u, storage1->cleanAllocationsCalled);
EXPECT_EQ(2u, storage1->lastCleanAllocationsTaskCount);
}
}
HWTEST_F(HostPtrAllocationTest, givenOverlappingFragmentsAndSingleTempAllocationsListWhenCheckIsCalledThenWaitAndCleanOnAllEngines) {
TaskCountType taskCountReady = 2;
TaskCountType taskCountNotReady = 1;
memoryManager->singleTemporaryAllocationsList = true;
memoryManager->temporaryAllocations = std::make_unique<AllocationsList>(AllocationUsage::TEMPORARY_ALLOCATION);
auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex);
EXPECT_EQ(1u, engines.size());
auto csr0 = static_cast<MockCommandStreamReceiver *>(engines[0].commandStreamReceiver);
auto csr1 = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, 1);
TaskCountType csr0GpuTag = taskCountNotReady;
TaskCountType csr1GpuTag = taskCountNotReady;
csr0->tagAddress = &csr0GpuTag;
csr1->tagAddress = &csr1GpuTag;
auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::lowPriority}));
csr1->setupContext(*osContext);
void *cpuPtr = reinterpret_cast<void *>(0x100004);
auto hostPtrManager = static_cast<MockHostPtrManager *>(memoryManager->getHostPtrManager());
auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr);
auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr);
auto storage0 = new MockInternalAllocationStorage(*csr0);
auto storage1 = new MockInternalAllocationStorage(*csr1);
csr0->internalAllocationStorage.reset(storage0);
storage0->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady);
storage0->updateCompletionAfterCleaningList(taskCountReady);
csr1->internalAllocationStorage.reset(storage1);
storage1->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
storage1->updateCompletionAfterCleaningList(taskCountReady);
csr0->setLatestSentTaskCount(taskCountNotReady);
csr1->setLatestSentTaskCount(taskCountNotReady);
AllocationRequirements requirements;
requirements.requiredFragmentsCount = 1;
requirements.totalRequiredSize = MemoryConstants::pageSize * 10;
requirements.allocationFragments[0].allocationPtr = alignDown(cpuPtr, MemoryConstants::pageSize);
requirements.allocationFragments[0].allocationSize = MemoryConstants::pageSize * 10;
requirements.allocationFragments[0].fragmentPosition = FragmentPosition::none;
requirements.rootDeviceIndex = csr0->getRootDeviceIndex();
memoryManager->deferAllocInUse = true;
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1));
// first CSR tag updated
hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements);
EXPECT_FALSE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1));
// second CSR tag updated
hostPtrManager->checkAllocationsForOverlapping(*memoryManager, &requirements);
EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty());
}
HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorageThenCleanCorrectly) {
TaskCountType taskCountReady = 2;
TaskCountType taskCountNotReady = 1;
memoryManager->singleTemporaryAllocationsList = true;
memoryManager->temporaryAllocations = std::make_unique<AllocationsList>(AllocationUsage::TEMPORARY_ALLOCATION);
memoryManager->callBaseAllocInUse = true;
auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex);
EXPECT_EQ(1u, engines.size());
auto csr0 = static_cast<MockCommandStreamReceiver *>(engines[0].commandStreamReceiver);
auto csr1 = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, 1);
TaskCountType csr0GpuTag = taskCountNotReady;
TaskCountType csr1GpuTag = taskCountNotReady;
csr0->tagAddress = &csr0GpuTag;
csr1->tagAddress = &csr1GpuTag;
auto osContext = memoryManager->createAndRegisterOsContext(csr1.get(), EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_RCS, EngineUsage::lowPriority}));
csr1->setupContext(*osContext);
void *cpuPtr = reinterpret_cast<void *>(0x100004);
auto graphicsAllocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr);
auto graphicsAllocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr);
auto storage0 = new MockInternalAllocationStorage(*csr0);
auto storage1 = new MockInternalAllocationStorage(*csr1);
csr0->internalAllocationStorage.reset(storage0);
csr1->internalAllocationStorage.reset(storage1);
EXPECT_EQ(memoryManager->temporaryAllocations.get(), &csr0->getTemporaryAllocations());
EXPECT_EQ(memoryManager->temporaryAllocations.get(), &csr1->getTemporaryAllocations());
storage0->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation0), TEMPORARY_ALLOCATION, taskCountReady);
EXPECT_TRUE(storage0->allocationLists[TEMPORARY_ALLOCATION].peekIsEmpty());
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
EXPECT_EQ(taskCountReady, graphicsAllocation0->getTaskCount(csr0->getOsContext().getContextId()));
storage1->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation1), TEMPORARY_ALLOCATION, taskCountReady);
EXPECT_TRUE(storage1->allocationLists[TEMPORARY_ALLOCATION].peekIsEmpty());
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1));
EXPECT_EQ(taskCountReady, graphicsAllocation1->getTaskCount(csr1->getOsContext().getContextId()));
csr0->setLatestSentTaskCount(taskCountNotReady);
csr1->setLatestSentTaskCount(taskCountNotReady);
storage0->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION);
storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION);
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1));
csr1GpuTag = taskCountReady;
storage0->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION);
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
EXPECT_FALSE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation1));
storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION);
EXPECT_TRUE(memoryManager->temporaryAllocations->peekContains(*graphicsAllocation0));
csr0GpuTag = taskCountReady;
storage1->cleanAllocationList(taskCountNotReady, TEMPORARY_ALLOCATION);
EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty());
}
HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorageThenObtainCorrectly) {
TaskCountType taskCountReady = 2;
TaskCountType taskCountNotReady = 1;
memoryManager->singleTemporaryAllocationsList = true;
memoryManager->temporaryAllocations = std::make_unique<AllocationsList>(AllocationUsage::TEMPORARY_ALLOCATION);
memoryManager->callBaseAllocInUse = true;
auto &engines = memoryManager->getRegisteredEngines(mockRootDeviceIndex);
auto csr = static_cast<MockCommandStreamReceiver *>(engines[0].commandStreamReceiver);
TaskCountType csrGpuTag = taskCountNotReady;
csr->tagAddress = &csrGpuTag;
void *cpuPtr = reinterpret_cast<void *>(0x100004);
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), false, MemoryConstants::pageSize, csr->getOsContext().getDeviceBitfield()}, cpuPtr);
auto storage = new MockInternalAllocationStorage(*csr);
csr->internalAllocationStorage.reset(storage);
csr->setLatestSentTaskCount(taskCountNotReady);
storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation), TEMPORARY_ALLOCATION, taskCountReady);
auto alloc = storage->obtainTemporaryAllocationWithPtr(MemoryConstants::pageSize, cpuPtr, graphicsAllocation->getAllocationType());
EXPECT_NE(nullptr, alloc.get());
EXPECT_TRUE(memoryManager->temporaryAllocations->peekIsEmpty());
alloc.release();
EXPECT_EQ(CompletionStamp::notReady, graphicsAllocation->getTaskCount(csr->getOsContext().getContextId()));
// clean on CSR destruction
storage->storeAllocationWithTaskCount(std::unique_ptr<GraphicsAllocation>(graphicsAllocation), TEMPORARY_ALLOCATION, taskCountReady);
csr->tagAddress = nullptr;
}
TEST_F(HostPtrAllocationTest, givenDebugFlagSetWhenCreatingMemoryManagerThenEnableSingleTempAllocationsList) {
DebugManagerStateRestore debugRestorer;
{
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled());
EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get());
}
debugManager.flags.UseSingleListForTemporaryAllocations.set(1);
{
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
EXPECT_TRUE(memoryManager->isSingleTemporaryAllocationsListEnabled());
EXPECT_NE(nullptr, memoryManager->temporaryAllocations.get());
}
}
TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -12,6 +12,7 @@
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/utilities/containers_tests_helpers.h"
@ -284,11 +285,17 @@ TEST_F(InternalAllocationStorageTest, givenAllocationListWhenTwoThreadsCleanConc
}
HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachingReusableAllocationThenCheckTaskCountFinishedOnAllTiles) {
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(csr);
csr->setActivePartitions(2u);
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager();
auto ultCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver);
ultCsr->setActivePartitions(2);
ultCsr->immWritePostSyncWriteOffset = 32;
auto tagAddress = csr->getTagAddress();
auto storage = ultCsr->getInternalAllocationStorage();
auto tagAddress = ultCsr->getTagAddress();
*tagAddress = 0xFF;
tagAddress = ptrOffset(tagAddress, 32);
*tagAddress = 0x0;
@ -296,19 +303,111 @@ HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachi
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
storage->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), REUSABLE_ALLOCATION);
EXPECT_EQ(allocation, csr->getAllocationsForReuse().peekHead());
EXPECT_FALSE(csr->getAllocationsForReuse().peekIsEmpty());
allocation->updateTaskCount(1u, csr->getOsContext().getContextId());
EXPECT_EQ(allocation, ultCsr->getAllocationsForReuse().peekHead());
EXPECT_FALSE(ultCsr->getAllocationsForReuse().peekIsEmpty());
allocation->updateTaskCount(1u, ultCsr->getOsContext().getContextId());
std::unique_ptr<GraphicsAllocation> allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::internalHostMemory);
std::unique_ptr<GraphicsAllocation> allocationReusable = ultCsr->getAllocationsForReuse().detachAllocation(0, nullptr, ultCsr, AllocationType::internalHostMemory);
EXPECT_EQ(nullptr, allocationReusable.get());
*tagAddress = 0x1;
allocationReusable = csr->getAllocationsForReuse().detachAllocation(0, nullptr, csr, AllocationType::internalHostMemory);
allocationReusable = ultCsr->getAllocationsForReuse().detachAllocation(0, nullptr, ultCsr, AllocationType::internalHostMemory);
EXPECT_EQ(allocation, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
}
HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringFromDifferentRootDeviceThenSelectCorrectly) {
DebugManagerStateRestore restore;
debugManager.flags.UseSingleListForTemporaryAllocations.set(1);
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(2, 1));
auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager();
auto rootCsr0 = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver);
auto rootCsr1 = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->rootDevices[1]->getDefaultEngine().commandStreamReceiver);
auto allocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootCsr0->getRootDeviceIndex(), MemoryConstants::pageSize});
auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootCsr1->getRootDeviceIndex(), MemoryConstants::pageSize});
memoryManager->storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation>(allocation0), rootCsr0->getOsContext().getContextId(), 0);
memoryManager->storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation>(allocation1), rootCsr1->getOsContext().getContextId(), 0);
std::unique_ptr<GraphicsAllocation> allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr1, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType());
EXPECT_EQ(nullptr, allocationReusable.get());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr0, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType());
EXPECT_NE(nullptr, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr0, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType());
EXPECT_EQ(nullptr, allocationReusable.get());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(rootCsr1, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType());
EXPECT_NE(nullptr, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
}
HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringFromDifferentTileThenSelectCorrectly) {
DebugManagerStateRestore restore;
debugManager.flags.UseSingleListForTemporaryAllocations.set(1);
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager();
auto csr0 = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->rootDevices[0]->getDefaultEngine().commandStreamReceiver);
auto csr1 = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->subDevices[0]->getDefaultEngine().commandStreamReceiver);
auto allocation0 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr0->getRootDeviceIndex(), MemoryConstants::pageSize});
allocation0->storageInfo.subDeviceBitfield = csr0->deviceBitfield;
auto allocation1 = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr1->getRootDeviceIndex(), MemoryConstants::pageSize});
allocation1->storageInfo.subDeviceBitfield = csr1->deviceBitfield;
memoryManager->storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation>(allocation0), csr0->getOsContext().getContextId(), 0);
memoryManager->storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation>(allocation1), csr1->getOsContext().getContextId(), 0);
std::unique_ptr<GraphicsAllocation> allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr1, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType());
EXPECT_EQ(nullptr, allocationReusable.get());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr0, MemoryConstants::pageSize, allocation0->getUnderlyingBuffer(), allocation0->getAllocationType());
EXPECT_NE(nullptr, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr0, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType());
EXPECT_EQ(nullptr, allocationReusable.get());
allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr1, MemoryConstants::pageSize, allocation1->getUnderlyingBuffer(), allocation1->getAllocationType());
EXPECT_NE(nullptr, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
}
HWTEST_F(InternalAllocationStorageTest, givenSingleTempAllocationsListWhenStoringSysMemThenObtainCorrectly) {
DebugManagerStateRestore restore;
debugManager.flags.UseSingleListForTemporaryAllocations.set(1);
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto memoryManager = deviceFactory->rootDevices[0]->getMemoryManager();
auto csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(deviceFactory->subDevices[0]->getDefaultEngine().commandStreamReceiver);
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
allocation->storageInfo.subDeviceBitfield = 0;
memoryManager->storeTemporaryAllocation(std::unique_ptr<GraphicsAllocation>(allocation), csr->getOsContext().getContextId(), 0);
std::unique_ptr<GraphicsAllocation> allocationReusable = memoryManager->obtainTemporaryAllocationWithPtr(csr, MemoryConstants::pageSize, allocation->getUnderlyingBuffer(), allocation->getAllocationType());
EXPECT_NE(nullptr, allocationReusable.get());
memoryManager->freeGraphicsMemory(allocationReusable.release());
}
TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasBeenAssignedThenAllocIsRemoved) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
uint32_t expectedTaskCount = 10u;