Revert "Create single point for selecting engine for builtin ops"

This reverts commit 6513bd371a69cb821ea45c7c1964ea1f51dd75b2.

Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban
2021-09-02 10:14:12 +00:00
committed by Compute-Runtime-Automation
parent ea33fa5707
commit 3e6b3cd780
28 changed files with 180 additions and 267 deletions

View File

@@ -144,17 +144,13 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
return nullptr;
}
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const {
const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams());
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
const bool blitRequired = isCopyOnly;
const bool blit = blitAllowed && (blitPreferred || blitRequired);
if (blit) {
return *bcsEngine->commandStreamReceiver;
} else {
return getGpgpuCommandStreamReceiver();
CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed) const {
if (blitAllowed) {
auto csr = getBcsCommandStreamReceiver();
UNRECOVERABLE_IF(!csr);
return *csr;
}
return getGpgpuCommandStreamReceiver();
}
Device &CommandQueue::getDevice() const noexcept {
@@ -725,17 +721,12 @@ bool CommandQueue::queueDependenciesClearRequired() const {
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
}
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const {
if (bcsEngine == nullptr) {
return false;
}
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
auto blitterSupported = bcsEngine != nullptr;
bool allowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
allowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
}
if (!allowed) {
return false;
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
}
switch (cmdType) {
@@ -746,14 +737,10 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpPa
case CL_COMMAND_WRITE_BUFFER_RECT:
case CL_COMMAND_COPY_BUFFER_RECT:
case CL_COMMAND_SVM_MEMCPY:
return true;
case CL_COMMAND_READ_IMAGE:
return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj));
case CL_COMMAND_WRITE_IMAGE:
return blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
case CL_COMMAND_COPY_IMAGE:
return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj)) &&
blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
return blitterSupported && blitEnqueueAllowed;
default:
return false;
}
@@ -784,7 +771,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp
return true;
}
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const {
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) {
const auto &hwInfo = device->getHardwareInfo();
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo);

View File

@@ -224,7 +224,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
CommandStreamReceiver *getBcsForAuxTranslation() const;
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const;
MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const;
Device &getDevice() const noexcept;
ClDevice &getClDevice() const { return *device; }
Context &getContext() const { return *context; }
@@ -353,9 +353,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const;
bool blitEnqueueAllowed(cl_command_type cmdType) const;
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image);
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
void waitForLatestTaskCount();

View File

@@ -365,13 +365,10 @@ class CommandQueueHw : public CommandQueue {
cl_event *event);
template <uint32_t cmdType, size_t surfaceCount>
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount],
EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event,
bool blocking, CommandStreamReceiver &csr);
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed);
template <uint32_t cmdType>
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList,
cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking);
template <uint32_t commandType>
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,

View File

@@ -1162,9 +1162,9 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
template <typename GfxFamily>
template <uint32_t cmdType>
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) {
auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
auto &bcsCsr = *getBcsCommandStreamReceiver();
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
EventBuilder eventBuilder;
@@ -1251,11 +1251,13 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
template <typename GfxFamily>
template <uint32_t cmdType, size_t surfaceCount>
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation,
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) {
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
const bool blitRequired = isCopyOnly;
const bool blit = blitAllowed && (blitPreferred || blitRequired);
if (blit) {
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking);
} else {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
this->getClDevice());

View File

@@ -47,9 +47,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
MemObjSurface s1(srcBuffer);
MemObjSurface s2(dstBuffer);
Surface *surfaces[] = {&s1, &s2};
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
return CL_SUCCESS;
}

View File

@@ -54,8 +54,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
dc.dstSlicePitch = dstSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
return CL_SUCCESS;
}

View File

@@ -50,8 +50,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
}
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
return CL_SUCCESS;
}

View File

@@ -36,6 +36,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
auto &csr = getCommandStreamReceiver(blitAllowed);
if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
@@ -61,9 +67,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
if (isCpuCopyAllowed) {
if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, false);
}
if (isMemTransferNeeded) {
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr,
numEventsInWaitList, eventWaitList, event);
@@ -99,7 +102,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -125,12 +128,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
if (nullptr == mapAllocation) {
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
return CL_SUCCESS;
}

View File

@@ -59,11 +59,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
MemObjSurface bufferSurf(buffer);
HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -87,8 +89,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.dstSlicePitch = hostSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);

View File

@@ -41,6 +41,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
const cl_event *eventWaitList,
cl_event *event) {
cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage);
auto &csr = getCommandStreamReceiver(blitAllowed);
if (nullptr == mapAllocation) {
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}
auto isMemTransferNeeded = true;
if (srcImage->isMemObjZeroCopy()) {
@@ -72,7 +78,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -99,11 +105,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo);
if (nullptr == mapAllocation) {
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed);
if (context->isProvidingPerformanceHints()) {
if (!isL3Capable(ptr, hostPtrSize)) {

View File

@@ -126,8 +126,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed);
if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
@@ -210,8 +210,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
if (event) {
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
@@ -330,29 +330,32 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
if (copyType == SvmToHost) {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
HostPtrSurface dstHostPtrSurf(dstPtr, size);
cmdType = CL_COMMAND_READ_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
}
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation());
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
} else if (copyType == HostToSvm) {
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
cmdType = CL_COMMAND_WRITE_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -364,8 +367,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &srcHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
} else if (copyType == SvmToSvm) {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
@@ -376,15 +378,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstSvmSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
} else {
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
HostPtrSurface dstHostPtrSurf(dstPtr, size);
cmdType = CL_COMMAND_WRITE_BUFFER;
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (size != 0) {
auto &csr = getGpgpuCommandStreamReceiver();
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) {
@@ -398,8 +401,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
surfaces[1] = &dstHostPtrSurf;
dispatchInfo.setBuiltinOpParams(operationParams);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
}
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);

View File

@@ -82,6 +82,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
MemObjSurface bufferSurf(buffer);
GeneralSurface mapSurface;
Surface *surfaces[] = {&bufferSurf, nullptr};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (mapAllocation) {
surfaces[1] = &mapSurface;
@@ -94,7 +95,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
} else {
surfaces[1] = &hostPtrSurf;
if (size != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -113,8 +116,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -58,11 +58,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
MemObjSurface dstBufferSurf(buffer);
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
auto blitAllowed = blitEnqueueAllowed(cmdType);
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -86,8 +88,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
dc.dstSlicePitch = bufferSlicePitch;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));

View File

@@ -54,6 +54,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
GeneralSurface mapSurface;
Surface *surfaces[] = {&dstImgSurf, nullptr};
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage);
if (mapAllocation) {
surfaces[1] = &mapSurface;
mapSurface.setGraphicsAllocation(mapAllocation);
@@ -65,7 +66,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
auto &csr = getCommandStreamReceiver(blitAllowed);
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@@ -92,8 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
MultiDispatchInfo dispatchInfo(dc);
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed);
if (context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));

View File

@@ -54,11 +54,11 @@ void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &cs
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
auto taskCountPreviousRootDevice = event->peekTaskCount();
auto tagAddressPreviousRootDevice = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagAddress();
auto tagAddressPreviousRootDevice = event->getCommandQueue()->getCommandStreamReceiver(false).getTagAddress();
csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast<uint64_t>(tagAddressPreviousRootDevice)});
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
auto graphicsAllocation = event->getCommandQueue()->getCommandStreamReceiver(false).getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
}
}

View File

@@ -1065,7 +1065,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
workDim,
localWorkSize);
auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced();
auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced();
maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
return maxWorkGroupCount;
}