fix: disable staging when cpu copy is allowed

Related-To: NEO-14027

If buffer cpu copy is allowed, it means that whole
transfer can be done on cpu memcpy.
In that case, disable staging as it's less performant.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-02-14 11:10:09 +00:00
committed by Compute-Runtime-Automation
parent 3a8f748d55
commit d477efeb40
6 changed files with 37 additions and 22 deletions

View File

@@ -2542,7 +2542,7 @@ cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue,
return retVal;
}
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, numEventsInWaitList > 0)) {
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, cb, CL_COMMAND_WRITE_BUFFER, blockingWrite, numEventsInWaitList > 0)) {
retVal = pCommandQueue->enqueueStagingWriteBuffer(
pBuffer,
blockingWrite,
@@ -2899,7 +2899,7 @@ cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue commandQueue,
return retVal;
}
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, numEventsInWaitList > 0)) {
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_READ_IMAGE, blockingRead, numEventsInWaitList > 0)) {
retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, pImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, event);
} else {
retVal = pCommandQueue->enqueueReadImage(
@@ -2975,7 +2975,7 @@ cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue,
TRACING_EXIT(ClEnqueueWriteImage, &retVal);
return retVal;
}
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, numEventsInWaitList > 0)) {
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_WRITE_IMAGE, blockingWrite, numEventsInWaitList > 0)) {
retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, event);
} else {
retVal = pCommandQueue->enqueueWriteImage(

View File

@@ -408,7 +408,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_int enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, bool hasDependencies);
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies);
protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);

View File

@@ -15,6 +15,7 @@
#include "opencl/source/context/context.h"
#include "opencl/source/event/user_event.h"
#include "opencl/source/helpers/base_object.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "CL/cl_ext.h"
@@ -167,9 +168,9 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
}
bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, bool hasDependencies) {
bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies) {
GraphicsAllocation *allocation = nullptr;
context->tryGetExistingMapAllocation(ptr, memObj->getSize(), allocation);
context->tryGetExistingMapAllocation(ptr, size, allocation);
if (allocation != nullptr) {
// Direct transfer from mapped allocation is faster than staging buffer
return false;
@@ -178,11 +179,13 @@ bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, bo
if (!stagingBufferManager) {
return false;
}
auto isValidForStaging = stagingBufferManager->isValidForStagingTransfer(this->getDevice(), ptr, size, hasDependencies);
switch (memObj->peekClMemObjType()) {
case CL_MEM_OBJECT_BUFFER:
return isValidForStaging && !this->bufferCpuCopyAllowed(castToObject<Buffer>(memObj), commandType, isBlocking, size, const_cast<void *>(ptr), 0, nullptr);
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_BUFFER:
return stagingBufferManager->isValidForStagingTransfer(this->getDevice(), ptr, memObj->getSize(), hasDependencies);
return isValidForStaging;
default:
return false;
}