performance: Don't wait for taskCount for indirect allocs

Related-To: GSD-9385

In case of indirect allocations, we don't really know
their task count because we can't track their true usage
on GPU.
In case of non-blocking free, don't wait for latestSentTaskCount.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2024-07-10 13:22:44 +00:00 committed by Compute-Runtime-Automation
parent 19b6f5a258
commit 35cbbfe43a
4 changed files with 20 additions and 6 deletions

View File

@ -3969,7 +3969,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
auto allocationData1 = svmManager->getSVMAlloc(ptr1);
TaskCountType prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
svmManager->prepareIndirectAllocationForDestruction(allocationData1);
svmManager->prepareIndirectAllocationForDestruction(allocationData1, false);
TaskCountType postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1);

View File

@ -722,9 +722,16 @@ TEST(UnifiedMemoryTest, givenInternalAllocationsWhenTheyArePreparedForFreeingThe
auto allocationData = unifiedMemoryManager->getSVMAlloc(ptr);
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData);
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, false);
EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId()));
EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId()));
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateTaskCount(1u, commandStreamReceiver.getOsContext().getContextId());
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, commandStreamReceiver.getOsContext().getContextId());
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, true);
EXPECT_EQ(GraphicsAllocation::objectNotUsed, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId()));
EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId()));
unifiedMemoryManager->freeSVMAlloc(ptr);
}

View File

@ -496,7 +496,8 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) {
}
void SVMAllocsManager::freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData) {
this->prepareIndirectAllocationForDestruction(svmData);
auto allowNonBlockingFree = policy == FreePolicyType::none;
this->prepareIndirectAllocationForDestruction(svmData, allowNonBlockingFree);
if (policy == FreePolicyType::blocking) {
if (svmData->cpuAllocation) {
@ -769,7 +770,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
}
}
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData) {
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree) {
std::unique_lock<std::shared_mutex> lock(mtx);
if (this->indirectAllocationsResidency.size() > 0u) {
for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) {
@ -778,7 +779,13 @@ void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData
if (gpuAllocation == nullptr) {
continue;
}
auto desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId()));
// Marking gpuAllocation task count as objectNotUsed means we will not wait for GPU completion.
// However, if this is blocking free, we must select "safest" task count to wait for.
TaskCountType desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId()));
if (isNonBlockingFree) {
desiredTaskCount = GraphicsAllocation::objectNotUsed;
}
if (gpuAllocation->isAlwaysResident(commandStreamReceiver->getOsContext().getContextId())) {
gpuAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver->getOsContext().getContextId());
gpuAllocation->updateResidencyTaskCount(desiredTaskCount, commandStreamReceiver->getOsContext().getContextId());

View File

@ -229,7 +229,7 @@ class SVMAllocsManager {
bool hasHostAllocations();
std::atomic<uint32_t> allocationsCounter = 0;
MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount);
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
void prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree);
MOCKABLE_VIRTUAL void prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, SvmAllocationData &svmData);
void prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver);
std::unique_lock<std::mutex> obtainOwnership();