fix: re-enable staging buffer copy when ccs is busy

Related-To: NEO-11501

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-07-22 17:25:57 +00:00
committed by Compute-Runtime-Automation
parent 720108a12e
commit a7fbc90ebd
2 changed files with 0 additions and 58 deletions

View File

@@ -1619,13 +1619,6 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
// Direct transfer from mapped allocation is faster than staging buffer
return false;
}
auto rootDeviceIndex = device.getRootDeviceIndex();
auto isLocalMem = device.getMemoryManager()->isLocalMemorySupported(rootDeviceIndex);
if (isOOQEnabled() && getGpgpuCommandStreamReceiver().isBusy() && !isLocalMem) {
// It's not beneficial to make copy through staging buffers if it's OOQ,
// compute engine is busy and device is iGPU.
return false;
}
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, nullptr};
csrSelectionArgs.direction = TransferDirection::hostToLocal;
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);

View File

@@ -2608,54 +2608,3 @@ HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenSrcIsMappedThenR
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
EXPECT_FALSE(myCmdQ.isValidForStagingBufferCopy(pClDevice->getDevice(), dstPtr, mappedPtr, buffer->getSize(), false));
}
HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenIsNotLocalMemoryAndOOQAndGpuBusyThenReturnFalse) {
DebugManagerStateRestore restore{};
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
auto mockContext = std::make_unique<MockContext>(mockDevice.get());
MockCommandQueueHw<FamilyType> myCmdQ(mockContext.get(), mockDevice.get(), 0);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields());
unifiedMemoryProperties.device = &mockDevice->getDevice();
auto dstPtr = mockContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(copySize, unifiedMemoryProperties);
auto ccsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&myCmdQ.getGpgpuCommandStreamReceiver());
*ccsCsr->tagAddress = 0u;
ccsCsr->taskCount = 0u;
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
*ccsCsr->tagAddress = 0u;
ccsCsr->taskCount = 1u;
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
myCmdQ.setOoqEnabled();
EXPECT_FALSE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
mockContext->getSVMAllocsManager()->freeSVMAlloc(dstPtr);
}
HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenIsLocalMemoryAndOOQAndGpuBusyThenReturnTrue) {
DebugManagerStateRestore restore{};
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
debugManager.flags.EnableLocalMemory.set(1);
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
auto mockContext = std::make_unique<MockContext>(mockDevice.get());
MockCommandQueueHw<FamilyType> myCmdQ(mockContext.get(), mockDevice.get(), 0);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields());
unifiedMemoryProperties.device = &mockDevice->getDevice();
auto dstPtr = mockContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(copySize, unifiedMemoryProperties);
auto ccsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&myCmdQ.getGpgpuCommandStreamReceiver());
*ccsCsr->tagAddress = 0u;
ccsCsr->taskCount = 0u;
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
*ccsCsr->tagAddress = 0u;
ccsCsr->taskCount = 1u;
myCmdQ.setOoqEnabled();
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
mockContext->getSVMAllocsManager()->freeSVMAlloc(dstPtr);
}