performance: adjust staging buffer usage

Related-To: NEO-11928

Don't copy through staging buffer if dst usm allocation
was not used before and transfer would be splitted.
Also, don't use staging buffers for mapped ocl buffers.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-07-09 15:55:03 +00:00
committed by Compute-Runtime-Automation
parent 99e8cb7b70
commit dbd96372be
7 changed files with 62 additions and 17 deletions

View File

@@ -109,15 +109,19 @@ void StagingBufferManager::storeBuffer(void *stagingBuffer, uint64_t taskCount)
stagingBuffers.push_back({svmData, taskCount});
}
bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, bool hasDependencies) const {
bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const {
auto stagingCopyEnabled = device.getProductHelper().isStagingBuffersEnabled();
if (debugManager.flags.EnableCopyWithStagingBuffers.get() != -1) {
stagingCopyEnabled = debugManager.flags.EnableCopyWithStagingBuffers.get();
}
auto usmDstData = svmAllocsManager->getSVMAlloc(dstPtr);
auto usmSrcData = svmAllocsManager->getSVMAlloc(srcPtr);
bool hostToUsmDeviceCopy = usmSrcData == nullptr && usmDstData != nullptr;
return stagingCopyEnabled && hostToUsmDeviceCopy && !hasDependencies;
bool hostToUsmCopy = usmSrcData == nullptr && usmDstData != nullptr;
bool isUsedByOsContext = false;
if (usmDstData) {
isUsedByOsContext = usmDstData->gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex())->isUsedByOsContext(osContextId);
}
return stagingCopyEnabled && hostToUsmCopy && !hasDependencies && (isUsedByOsContext || size <= chunkSize);
}
} // namespace NEO

View File

@@ -32,7 +32,7 @@ class StagingBufferManager {
StagingBufferManager(SVMAllocsManager *svmAllocsManager, const RootDeviceIndicesContainer &rootDeviceIndices, const std::map<uint32_t, DeviceBitfield> &deviceBitfields);
~StagingBufferManager();
bool isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, bool hasDependencies) const;
bool isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const;
int32_t performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction chunkCopyFunc, CommandStreamReceiver *csr);
private: