mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
fix: re-enable staging buffer copy when ccs is busy
Related-To: NEO-11501 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
720108a12e
commit
a7fbc90ebd
@@ -1619,13 +1619,6 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
|
||||
// Direct transfer from mapped allocation is faster than staging buffer
|
||||
return false;
|
||||
}
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto isLocalMem = device.getMemoryManager()->isLocalMemorySupported(rootDeviceIndex);
|
||||
if (isOOQEnabled() && getGpgpuCommandStreamReceiver().isBusy() && !isLocalMem) {
|
||||
// It's not beneficial to make copy through staging buffers if it's OOQ,
|
||||
// compute engine is busy and device is iGPU.
|
||||
return false;
|
||||
}
|
||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, nullptr};
|
||||
csrSelectionArgs.direction = TransferDirection::hostToLocal;
|
||||
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||
|
||||
@@ -2608,54 +2608,3 @@ HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenSrcIsMappedThenR
|
||||
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
|
||||
EXPECT_FALSE(myCmdQ.isValidForStagingBufferCopy(pClDevice->getDevice(), dstPtr, mappedPtr, buffer->getSize(), false));
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenIsNotLocalMemoryAndOOQAndGpuBusyThenReturnFalse) {
|
||||
DebugManagerStateRestore restore{};
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
auto mockContext = std::make_unique<MockContext>(mockDevice.get());
|
||||
MockCommandQueueHw<FamilyType> myCmdQ(mockContext.get(), mockDevice.get(), 0);
|
||||
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields());
|
||||
unifiedMemoryProperties.device = &mockDevice->getDevice();
|
||||
auto dstPtr = mockContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(copySize, unifiedMemoryProperties);
|
||||
auto ccsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&myCmdQ.getGpgpuCommandStreamReceiver());
|
||||
|
||||
*ccsCsr->tagAddress = 0u;
|
||||
ccsCsr->taskCount = 0u;
|
||||
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
|
||||
|
||||
*ccsCsr->tagAddress = 0u;
|
||||
ccsCsr->taskCount = 1u;
|
||||
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
|
||||
|
||||
myCmdQ.setOoqEnabled();
|
||||
EXPECT_FALSE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
|
||||
mockContext->getSVMAllocsManager()->freeSVMAlloc(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(StagingBufferTest, givenIsValidForStagingBufferCopyWhenIsLocalMemoryAndOOQAndGpuBusyThenReturnTrue) {
|
||||
DebugManagerStateRestore restore{};
|
||||
debugManager.flags.EnableCopyWithStagingBuffers.set(1);
|
||||
debugManager.flags.EnableLocalMemory.set(1);
|
||||
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
auto mockContext = std::make_unique<MockContext>(mockDevice.get());
|
||||
MockCommandQueueHw<FamilyType> myCmdQ(mockContext.get(), mockDevice.get(), 0);
|
||||
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, mockContext->getRootDeviceIndices(), mockContext->getDeviceBitfields());
|
||||
unifiedMemoryProperties.device = &mockDevice->getDevice();
|
||||
auto dstPtr = mockContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(copySize, unifiedMemoryProperties);
|
||||
auto ccsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&myCmdQ.getGpgpuCommandStreamReceiver());
|
||||
|
||||
*ccsCsr->tagAddress = 0u;
|
||||
ccsCsr->taskCount = 0u;
|
||||
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
|
||||
|
||||
*ccsCsr->tagAddress = 0u;
|
||||
ccsCsr->taskCount = 1u;
|
||||
myCmdQ.setOoqEnabled();
|
||||
EXPECT_TRUE(myCmdQ.isValidForStagingBufferCopy(mockDevice->getDevice(), dstPtr, srcPtr, 1024ul, false));
|
||||
mockContext->getSVMAllocsManager()->freeSVMAlloc(dstPtr);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user