mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
performance: adjust staging buffer usage
Related-To: NEO-11928 Don't copy through staging buffer if dst usm allocation was not used before and transfer would be splitted. Also, don't use staging buffers for mapped ocl buffers. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
99e8cb7b70
commit
dbd96372be
@@ -109,15 +109,19 @@ void StagingBufferManager::storeBuffer(void *stagingBuffer, uint64_t taskCount)
|
||||
stagingBuffers.push_back({svmData, taskCount});
|
||||
}
|
||||
|
||||
bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, bool hasDependencies) const {
|
||||
bool StagingBufferManager::isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const {
|
||||
auto stagingCopyEnabled = device.getProductHelper().isStagingBuffersEnabled();
|
||||
if (debugManager.flags.EnableCopyWithStagingBuffers.get() != -1) {
|
||||
stagingCopyEnabled = debugManager.flags.EnableCopyWithStagingBuffers.get();
|
||||
}
|
||||
auto usmDstData = svmAllocsManager->getSVMAlloc(dstPtr);
|
||||
auto usmSrcData = svmAllocsManager->getSVMAlloc(srcPtr);
|
||||
bool hostToUsmDeviceCopy = usmSrcData == nullptr && usmDstData != nullptr;
|
||||
return stagingCopyEnabled && hostToUsmDeviceCopy && !hasDependencies;
|
||||
bool hostToUsmCopy = usmSrcData == nullptr && usmDstData != nullptr;
|
||||
bool isUsedByOsContext = false;
|
||||
if (usmDstData) {
|
||||
isUsedByOsContext = usmDstData->gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex())->isUsedByOsContext(osContextId);
|
||||
}
|
||||
return stagingCopyEnabled && hostToUsmCopy && !hasDependencies && (isUsedByOsContext || size <= chunkSize);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -32,7 +32,7 @@ class StagingBufferManager {
|
||||
StagingBufferManager(SVMAllocsManager *svmAllocsManager, const RootDeviceIndicesContainer &rootDeviceIndices, const std::map<uint32_t, DeviceBitfield> &deviceBitfields);
|
||||
~StagingBufferManager();
|
||||
|
||||
bool isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, bool hasDependencies) const;
|
||||
bool isValidForCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies, uint32_t osContextId) const;
|
||||
int32_t performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction chunkCopyFunc, CommandStreamReceiver *csr);
|
||||
|
||||
private:
|
||||
|
||||
Reference in New Issue
Block a user