performance: adjust staging buffer usage

Related-To: NEO-11928

Don't copy through staging buffer if dst usm allocation
was not used before and transfer would be splitted.
Also, don't use staging buffers for mapped ocl buffers.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-07-09 15:55:03 +00:00
committed by Compute-Runtime-Automation
parent 99e8cb7b70
commit dbd96372be
7 changed files with 62 additions and 17 deletions

View File

@@ -1568,4 +1568,20 @@ cl_int CommandQueue::enqueueStagingBufferMemcpy(cl_bool blockingCopy, void *dstP
return ret;
}
bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies) {
GraphicsAllocation *allocation = nullptr;
context->tryGetExistingMapAllocation(srcPtr, size, allocation);
if (allocation != nullptr) {
// Direct transfer from mapped allocation is faster than staging buffer
return false;
}
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, nullptr};
csrSelectionArgs.direction = TransferDirection::hostToLocal;
auto csr = &selectCsrForBuiltinOperation(csrSelectionArgs);
auto osContextId = csr->getOsContext().getContextId();
auto stagingBufferManager = context->getStagingBufferManager();
UNRECOVERABLE_IF(stagingBufferManager == nullptr);
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
}
} // namespace NEO