Simplify read/write enqueue operations on CPU

Change-Id: I7f59b04d484be2699e325d10e16298016231faf2
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-05-30 14:36:12 +02:00
parent 9ecaaa731a
commit f6bf2c5d0b
15 changed files with 215 additions and 179 deletions

View File

@@ -572,4 +572,15 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &
}
return nodesCount;
}
bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
// Requested by debug variable or allowed by Buffer
bool debugVariableSet = (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get()) ||
(CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get());
return (debugVariableSet && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
}
} // namespace NEO

View File

@@ -446,6 +446,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies);
void processProperties(const cl_queue_properties *properties);
bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
Context *context = nullptr;
Device *device = nullptr;

View File

@@ -367,6 +367,15 @@ class CommandQueueHw : public CommandQueue {
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
cl_int enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
private:
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType) override;

View File

@@ -42,4 +42,61 @@ void CommandQueueHw<Family>::notifyEnqueueReadImage(Image *image, bool blockingR
}
}
template <typename Family>
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
cl_int retVal = CL_SUCCESS;
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
TransferProperties transferProperties(buffer, commandType, 0, true, &offset, &size, ptr, true);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
return retVal;
}
template <typename Family>
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
cl_int retVal = CL_SUCCESS;
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(commandType);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(buffer), ptr);
}
return retVal;
}
template <typename Family>
cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
MultiDispatchInfo multiDispatchInfo;
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blocking == CL_TRUE,
multiDispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(commandType);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(commandType, false, static_cast<cl_mem>(memObj), ptr);
}
return CL_SUCCESS;
}
} // namespace NEO

View File

@@ -164,31 +164,23 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
void CommandQueue::providePerformanceHint(TransferProperties &transferProperties) {
switch (transferProperties.cmdType) {
case CL_COMMAND_MAP_BUFFER:
if (!transferProperties.memObj->isMemObjZeroCopy()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
break;
}
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
break;
case CL_COMMAND_MAP_IMAGE:
if (!transferProperties.memObj->isMemObjZeroCopy()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
break;
}
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj));
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, !transferProperties.memObj->isMemObjZeroCopy(),
static_cast<cl_mem>(transferProperties.memObj));
break;
case CL_COMMAND_UNMAP_MEM_OBJECT:
if (!transferProperties.memObj->isMemObjZeroCopy()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true,
transferProperties.ptr, static_cast<cl_mem>(transferProperties.memObj));
break;
}
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA, transferProperties.ptr);
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, false, transferProperties.ptr);
break;
case CL_COMMAND_READ_BUFFER:
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
break;
case CL_COMMAND_WRITE_BUFFER:
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
context->providePerformanceHintForMemoryTransfer(transferProperties.cmdType, true,
static_cast<cl_mem>(transferProperties.memObj), transferProperties.ptr);
break;
}
}
} // namespace NEO

View File

@@ -37,55 +37,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
notifyEnqueueReadBuffer(buffer, !!blockingRead);
}
cl_int retVal = CL_SUCCESS;
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
if ((DebugManager.flags.DoCpuCopyOnReadBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
buffer->isReadWriteOnCpuAllowed(blockingRead, numEventsInWaitList, ptr, size)) {
if (!isMemTransferNeeded) {
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, ptr, false);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
}
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr,
numEventsInWaitList, eventWaitList);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return retVal;
if (isCpuCopyAllowed) {
if (isMemTransferNeeded) {
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
numEventsInWaitList, eventWaitList, event);
} else {
return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
numEventsInWaitList, eventWaitList, event);
}
TransferProperties transferProperties(buffer, CL_COMMAND_READ_BUFFER, 0, true, &offset, &size, ptr, true);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
return retVal;
} else if (!isMemTransferNeeded) {
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead,
numEventsInWaitList, eventWaitList, event);
}
MultiDispatchInfo dispatchInfo;
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
this->getContext(), this->getDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);
MultiDispatchInfo dispatchInfo;
void *dstPtr = ptr;
MemObjSurface bufferSurf(buffer);
@@ -121,7 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
builder.buildDispatchInfos(dispatchInfo, dc);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER, true, static_cast<cl_mem>(buffer), ptr);
if (!isL3Capable(ptr, size)) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}

View File

@@ -43,25 +43,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT);
}
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingRead == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_BUFFER_RECT);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER_RECT, blockingRead,
numEventsInWaitList, eventWaitList, event);
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
this->getContext(), this->getDevice());
@@ -109,7 +92,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
event);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);
if (!isL3Capable(ptr, hostPtrSize)) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, hostPtrSize, MemoryConstants::pageSize, MemoryConstants::pageSize);
}

View File

@@ -53,25 +53,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
isMemTransferNeeded = srcImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_READ_IMAGE);
}
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingRead == CL_TRUE,
di,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_READ_IMAGE);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(srcImage));
}
return CL_SUCCESS;
return enqueueMarkerForReadWriteOperation(srcImage, ptr, CL_COMMAND_READ_IMAGE, blockingRead,
numEventsInWaitList, eventWaitList, event);
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyImage3dToBuffer,

View File

@@ -32,58 +32,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
const cl_event *eventWaitList,
cl_event *event) {
cl_int retVal = CL_SUCCESS;
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
if ((DebugManager.flags.DoCpuCopyOnWriteBuffer.get() && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) &&
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
buffer->isReadWriteOnCpuAllowed(blockingWrite, numEventsInWaitList, const_cast<void *>(ptr), size)) {
if (!isMemTransferNeeded) {
TransferProperties transferProperties(buffer, CL_COMMAND_MARKER, 0, true, &offset, &size, const_cast<void *>(ptr), false);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return retVal;
if (isCpuCopyAllowed) {
if (isMemTransferNeeded) {
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList, event);
} else {
return enqueueReadWriteBufferOnCpuWithoutMemoryTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList, event);
}
TransferProperties transferProperties(buffer, CL_COMMAND_WRITE_BUFFER, 0, true, &offset, &size, const_cast<void *>(ptr), true);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
cpuDataTransferHandler(transferProperties, eventsRequest, retVal);
return retVal;
} else if (!isMemTransferNeeded) {
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite,
numEventsInWaitList, eventWaitList, event);
}
MultiDispatchInfo dispatchInfo;
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingWrite == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
this->getContext(), this->getDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);
MultiDispatchInfo dispatchInfo;
void *srcPtr = const_cast<void *>(ptr);

View File

@@ -42,25 +42,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_WRITE_BUFFER_RECT);
}
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingWrite == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_BUFFER_RECT);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA, static_cast<cl_mem>(buffer), ptr);
}
return CL_SUCCESS;
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER_RECT, blockingWrite,
numEventsInWaitList, eventWaitList, event);
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferRect,
this->getContext(), this->getDevice());

View File

@@ -45,25 +45,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
isMemTransferNeeded = dstImage->checkIfMemoryTransferIsRequired(hostOffset, 0, ptr, CL_COMMAND_WRITE_IMAGE);
}
if (!isMemTransferNeeded) {
NullSurface s;
Surface *surfaces[] = {&s};
enqueueHandler<CL_COMMAND_MARKER>(
surfaces,
blockingWrite == CL_TRUE,
di,
numEventsInWaitList,
eventWaitList,
event);
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_WRITE_IMAGE);
}
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
}
return CL_SUCCESS;
return enqueueMarkerForReadWriteOperation(dstImage, const_cast<void *>(ptr), CL_COMMAND_WRITE_IMAGE, blockingWrite,
numEventsInWaitList, eventWaitList, event);
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToImage3d,
this->getContext(), this->getDevice());

View File

@@ -107,6 +107,15 @@ class Context : public BaseObject<_cl_context> {
}
}
template <typename... Args>
void providePerformanceHintForMemoryTransfer(cl_command_type commandType, bool transferRequired, Args &&... args) {
cl_diagnostics_verbose_level verboseLevel = transferRequired ? CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL
: CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL;
PerformanceHints hint = driverDiagnostics->obtainHintForTransferOperation(commandType, transferRequired);
providePerformanceHint(verboseLevel, hint, args...);
}
cl_bool isProvidingPerformanceHints() const {
return driverDiagnostics != nullptr;
}

View File

@@ -7,6 +7,8 @@
#include "driver_diagnostics.h"
#include "runtime/helpers/debug_helpers.h"
namespace NEO {
DriverDiagnostics::DriverDiagnostics(cl_diagnostics_verbose_level level) {
@@ -56,4 +58,41 @@ const char *DriverDiagnostics::hintFormat[] = {
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY
"Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION
};
PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) {
PerformanceHints hint;
switch (commandType) {
case CL_COMMAND_MAP_BUFFER:
hint = transferRequired ? CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_MAP_IMAGE:
hint = transferRequired ? CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_UNMAP_MEM_OBJECT:
hint = transferRequired ? CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA : CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_WRITE_BUFFER:
hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_READ_BUFFER:
hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_WRITE_BUFFER_RECT:
hint = transferRequired ? CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA;
break;
case CL_COMMAND_READ_BUFFER_RECT:
hint = transferRequired ? CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA : CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA;
break;
case CL_COMMAND_WRITE_IMAGE:
hint = transferRequired ? CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA : CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA;
break;
case CL_COMMAND_READ_IMAGE:
UNRECOVERABLE_IF(transferRequired)
hint = CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA;
break;
default:
UNRECOVERABLE_IF(true);
}
return hint;
}
} // namespace NEO

View File

@@ -57,6 +57,7 @@ class DriverDiagnostics {
~DriverDiagnostics() = default;
static const char *hintFormat[];
static const cl_int maxHintStringSize = 1024;
PerformanceHints obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired);
protected:
cl_diagnostics_verbose_level verboseLevel;

View File

@@ -9,6 +9,8 @@
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include <tuple>
using namespace NEO;
bool containsHint(const char *providedHint, char *userData) {
@@ -521,3 +523,42 @@ INSTANTIATE_TEST_CASE_P(
TEST(PerformanceHintsDebugVariables, givenDefaultDebugManagerWhenPrintDriverDiagnosticsIsCalledThenMinusOneIsReturned) {
EXPECT_EQ(-1, DebugManager.flags.PrintDriverDiagnostics.get());
}
TEST(PerformanceHintsTransferTest, givenCommandTypeAndMemoryTransferRequiredWhenAskingForHintThenReturnCorrectValue) {
DriverDiagnostics driverDiagnostics(0);
const uint32_t numHints = 8;
std::tuple<uint32_t, PerformanceHints, PerformanceHints> commandHints[numHints] = {
// commandType, transfer required, transfer not required
std::make_tuple(CL_COMMAND_MAP_BUFFER, CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_BUFFER_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_MAP_IMAGE, CL_ENQUEUE_MAP_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_MAP_IMAGE_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_UNMAP_MEM_OBJECT, CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA, CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_WRITE_BUFFER, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_READ_BUFFER, CL_ENQUEUE_READ_BUFFER_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_WRITE_BUFFER_RECT, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_BUFFER_RECT_DOESNT_REQUIRE_COPY_DATA),
std::make_tuple(CL_COMMAND_READ_BUFFER_RECT, CL_ENQUEUE_READ_BUFFER_RECT_REQUIRES_COPY_DATA, CL_ENQUEUE_READ_BUFFER_RECT_DOESNT_REQUIRES_COPY_DATA),
std::make_tuple(CL_COMMAND_WRITE_IMAGE, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, CL_ENQUEUE_WRITE_IMAGE_DOESNT_REQUIRES_COPY_DATA),
};
for (uint32_t i = 0; i < numHints; i++) {
auto hintWithTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), true);
auto hintWithoutTransferRequired = driverDiagnostics.obtainHintForTransferOperation(std::get<0>(commandHints[i]), false);
EXPECT_EQ(std::get<1>(commandHints[i]), hintWithTransferRequired);
EXPECT_EQ(std::get<2>(commandHints[i]), hintWithoutTransferRequired);
}
EXPECT_THROW(driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, true), std::exception); // no hint for this scenario
EXPECT_EQ(CL_ENQUEUE_READ_IMAGE_DOESNT_REQUIRES_COPY_DATA,
driverDiagnostics.obtainHintForTransferOperation(CL_COMMAND_READ_IMAGE, false));
}
TEST_F(DriverDiagnosticsTest, givenInvalidCommandTypeWhenAskingForZeroCopyOperatonThenAbort) {
cl_device_id deviceId = devices[0];
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<MockContext>(validProperties, DeviceVector(&deviceId, 1),
callbackFunction, (void *)userData, retVal));
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto address = reinterpret_cast<void *>(0x12345);
EXPECT_THROW(context->providePerformanceHintForMemoryTransfer(CL_COMMAND_BARRIER, true, buffer.get(), address), std::exception);
}