performance: Don't wait for taskCount for indirect allocs
Related-To: GSD-9385 In case of indirect allocations, we don't really know their task count because we can't track their true usage on GPU. In case of non-blocking free, don't wait for latestSentTaskCount. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
parent
19b6f5a258
commit
35cbbfe43a
|
@ -3969,7 +3969,7 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
|
|||
|
||||
auto allocationData1 = svmManager->getSVMAlloc(ptr1);
|
||||
TaskCountType prevPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
|
||||
svmManager->prepareIndirectAllocationForDestruction(allocationData1);
|
||||
svmManager->prepareIndirectAllocationForDestruction(allocationData1, false);
|
||||
TaskCountType postPeekTaskCount1 = allocationData1->gpuAllocations.getGraphicsAllocation(1u)->getTaskCount(csr0->getOsContext().getContextId());
|
||||
|
||||
EXPECT_EQ(postPeekTaskCount1, prevPeekTaskCount1);
|
||||
|
|
|
@ -722,9 +722,16 @@ TEST(UnifiedMemoryTest, givenInternalAllocationsWhenTheyArePreparedForFreeingThe
|
|||
|
||||
auto allocationData = unifiedMemoryManager->getSVMAlloc(ptr);
|
||||
|
||||
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData);
|
||||
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, false);
|
||||
EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId()));
|
||||
EXPECT_EQ(124u, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId()));
|
||||
|
||||
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateTaskCount(1u, commandStreamReceiver.getOsContext().getContextId());
|
||||
graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, commandStreamReceiver.getOsContext().getContextId());
|
||||
unifiedMemoryManager->prepareIndirectAllocationForDestruction(allocationData, true);
|
||||
EXPECT_EQ(GraphicsAllocation::objectNotUsed, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getTaskCount(commandStreamReceiver.getOsContext().getContextId()));
|
||||
EXPECT_EQ(GraphicsAllocation::objectNotResident, graphicsAllocation->gpuAllocations.getDefaultGraphicsAllocation()->getResidencyTaskCount(commandStreamReceiver.getOsContext().getContextId()));
|
||||
|
||||
unifiedMemoryManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
|
||||
|
|
|
@ -496,7 +496,8 @@ bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) {
|
|||
}
|
||||
|
||||
void SVMAllocsManager::freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData) {
|
||||
this->prepareIndirectAllocationForDestruction(svmData);
|
||||
auto allowNonBlockingFree = policy == FreePolicyType::none;
|
||||
this->prepareIndirectAllocationForDestruction(svmData, allowNonBlockingFree);
|
||||
|
||||
if (policy == FreePolicyType::blocking) {
|
||||
if (svmData->cpuAllocation) {
|
||||
|
@ -769,7 +770,7 @@ void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &co
|
|||
}
|
||||
}
|
||||
|
||||
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData) {
|
||||
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree) {
|
||||
std::unique_lock<std::shared_mutex> lock(mtx);
|
||||
if (this->indirectAllocationsResidency.size() > 0u) {
|
||||
for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) {
|
||||
|
@ -778,7 +779,13 @@ void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData
|
|||
if (gpuAllocation == nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId()));
|
||||
|
||||
// Marking gpuAllocation task count as objectNotUsed means we will not wait for GPU completion.
|
||||
// However, if this is blocking free, we must select "safest" task count to wait for.
|
||||
TaskCountType desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId()));
|
||||
if (isNonBlockingFree) {
|
||||
desiredTaskCount = GraphicsAllocation::objectNotUsed;
|
||||
}
|
||||
if (gpuAllocation->isAlwaysResident(commandStreamReceiver->getOsContext().getContextId())) {
|
||||
gpuAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver->getOsContext().getContextId());
|
||||
gpuAllocation->updateResidencyTaskCount(desiredTaskCount, commandStreamReceiver->getOsContext().getContextId());
|
||||
|
|
|
@ -229,7 +229,7 @@ class SVMAllocsManager {
|
|||
bool hasHostAllocations();
|
||||
std::atomic<uint32_t> allocationsCounter = 0;
|
||||
MOCKABLE_VIRTUAL void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount);
|
||||
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
|
||||
void prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree);
|
||||
MOCKABLE_VIRTUAL void prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, SvmAllocationData &svmData);
|
||||
void prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver);
|
||||
std::unique_lock<std::mutex> obtainOwnership();
|
||||
|
|
Loading…
Reference in New Issue