mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
fix: disable staging when cpu copy is allowed
Related-To: NEO-14027 If buffer cpu copy is allowed, it means that whole transfer can be done on cpu memcpy. In that case, disable staging as it's less performant. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3a8f748d55
commit
d477efeb40
@@ -2542,7 +2542,7 @@ cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue commandQueue,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, numEventsInWaitList > 0)) {
|
||||
if (pCommandQueue->isValidForStagingTransfer(pBuffer, ptr, cb, CL_COMMAND_WRITE_BUFFER, blockingWrite, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingWriteBuffer(
|
||||
pBuffer,
|
||||
blockingWrite,
|
||||
@@ -2899,7 +2899,7 @@ cl_int CL_API_CALL clEnqueueReadImage(cl_command_queue commandQueue,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, numEventsInWaitList > 0)) {
|
||||
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_READ_IMAGE, blockingRead, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_READ_IMAGE, pImage, blockingRead, origin, region, rowPitch, slicePitch, ptr, event);
|
||||
} else {
|
||||
retVal = pCommandQueue->enqueueReadImage(
|
||||
@@ -2975,7 +2975,7 @@ cl_int CL_API_CALL clEnqueueWriteImage(cl_command_queue commandQueue,
|
||||
TRACING_EXIT(ClEnqueueWriteImage, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, numEventsInWaitList > 0)) {
|
||||
if (pCommandQueue->isValidForStagingTransfer(pImage, ptr, pImage->getSize(), CL_COMMAND_WRITE_IMAGE, blockingWrite, numEventsInWaitList > 0)) {
|
||||
retVal = pCommandQueue->enqueueStagingImageTransfer(CL_COMMAND_WRITE_IMAGE, pImage, blockingWrite, origin, region, inputRowPitch, inputSlicePitch, ptr, event);
|
||||
} else {
|
||||
retVal = pCommandQueue->enqueueWriteImage(
|
||||
|
||||
@@ -408,7 +408,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_int enqueueStagingWriteBuffer(Buffer *buffer, cl_bool blockingCopy, size_t offset, size_t size, const void *ptr, cl_event *event);
|
||||
|
||||
bool isValidForStagingBufferCopy(Device &device, void *dstPtr, const void *srcPtr, size_t size, bool hasDependencies);
|
||||
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, bool hasDependencies);
|
||||
bool isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies);
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/event/user_event.h"
|
||||
#include "opencl/source/helpers/base_object.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
#include "CL/cl_ext.h"
|
||||
@@ -167,9 +168,9 @@ bool CommandQueue::isValidForStagingBufferCopy(Device &device, void *dstPtr, con
|
||||
return stagingBufferManager->isValidForCopy(device, dstPtr, srcPtr, size, hasDependencies, osContextId);
|
||||
}
|
||||
|
||||
bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, bool hasDependencies) {
|
||||
bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, size_t size, cl_command_type commandType, bool isBlocking, bool hasDependencies) {
|
||||
GraphicsAllocation *allocation = nullptr;
|
||||
context->tryGetExistingMapAllocation(ptr, memObj->getSize(), allocation);
|
||||
context->tryGetExistingMapAllocation(ptr, size, allocation);
|
||||
if (allocation != nullptr) {
|
||||
// Direct transfer from mapped allocation is faster than staging buffer
|
||||
return false;
|
||||
@@ -178,11 +179,13 @@ bool CommandQueue::isValidForStagingTransfer(MemObj *memObj, const void *ptr, bo
|
||||
if (!stagingBufferManager) {
|
||||
return false;
|
||||
}
|
||||
auto isValidForStaging = stagingBufferManager->isValidForStagingTransfer(this->getDevice(), ptr, size, hasDependencies);
|
||||
switch (memObj->peekClMemObjType()) {
|
||||
case CL_MEM_OBJECT_BUFFER:
|
||||
return isValidForStaging && !this->bufferCpuCopyAllowed(castToObject<Buffer>(memObj), commandType, isBlocking, size, const_cast<void *>(ptr), 0, nullptr);
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
case CL_MEM_OBJECT_BUFFER:
|
||||
return stagingBufferManager->isValidForStagingTransfer(this->getDevice(), ptr, memObj->getSize(), hasDependencies);
|
||||
return isValidForStaging;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user