mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 18:37:46 +08:00
Create single point for selecting engine for builtin ops
- selectCsrForBuiltinOperation selects proper CSR - selected CSR is passed to dispatchBcsOrGpgpuEnqueue - Notifications such as notifyEnqueueReadBuffer are now made on correct engine - Temporary allocs for hostptrs are now created on gpgpuCsr Related-To: NEO-6057 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e96f1afa6f
commit
a03b5f8a95
@@ -144,13 +144,17 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed) const {
|
||||
if (blitAllowed) {
|
||||
auto csr = getBcsCommandStreamReceiver();
|
||||
UNRECOVERABLE_IF(!csr);
|
||||
return *csr;
|
||||
CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const {
|
||||
const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
const bool blitRequired = isCopyOnly;
|
||||
const bool blit = blitAllowed && (blitPreferred || blitRequired);
|
||||
|
||||
if (blit) {
|
||||
return *bcsEngine->commandStreamReceiver;
|
||||
} else {
|
||||
return getGpgpuCommandStreamReceiver();
|
||||
}
|
||||
return getGpgpuCommandStreamReceiver();
|
||||
}
|
||||
|
||||
Device &CommandQueue::getDevice() const noexcept {
|
||||
@@ -721,12 +725,17 @@ bool CommandQueue::queueDependenciesClearRequired() const {
|
||||
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
|
||||
auto blitterSupported = bcsEngine != nullptr;
|
||||
bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const {
|
||||
if (bcsEngine == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
|
||||
bool allowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
|
||||
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
|
||||
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
|
||||
allowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
|
||||
}
|
||||
if (!allowed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (cmdType) {
|
||||
@@ -737,10 +746,14 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
|
||||
case CL_COMMAND_WRITE_BUFFER_RECT:
|
||||
case CL_COMMAND_COPY_BUFFER_RECT:
|
||||
case CL_COMMAND_SVM_MEMCPY:
|
||||
return true;
|
||||
case CL_COMMAND_READ_IMAGE:
|
||||
return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast<Image *>(params.srcMemObj));
|
||||
case CL_COMMAND_WRITE_IMAGE:
|
||||
return blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast<Image *>(params.dstMemObj));
|
||||
case CL_COMMAND_COPY_IMAGE:
|
||||
return blitterSupported && blitEnqueueAllowed;
|
||||
return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast<Image *>(params.srcMemObj)) &&
|
||||
blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast<Image *>(params.dstMemObj));
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -771,7 +784,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) {
|
||||
bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const {
|
||||
const auto &hwInfo = device->getHardwareInfo();
|
||||
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo);
|
||||
|
||||
@@ -224,7 +224,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
|
||||
CommandStreamReceiver *getBcsCommandStreamReceiver() const;
|
||||
CommandStreamReceiver *getBcsForAuxTranslation() const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const;
|
||||
MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const;
|
||||
Device &getDevice() const noexcept;
|
||||
ClDevice &getClDevice() const { return *device; }
|
||||
Context &getContext() const { return *context; }
|
||||
@@ -353,9 +353,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList);
|
||||
void providePerformanceHint(TransferProperties &transferProperties);
|
||||
bool queueDependenciesClearRequired() const;
|
||||
bool blitEnqueueAllowed(cl_command_type cmdType) const;
|
||||
bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const;
|
||||
bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image);
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
||||
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||
void waitForLatestTaskCount();
|
||||
|
||||
@@ -365,10 +365,13 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_event *event);
|
||||
|
||||
template <uint32_t cmdType, size_t surfaceCount>
|
||||
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed);
|
||||
void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount],
|
||||
EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event,
|
||||
bool blocking, CommandStreamReceiver &csr);
|
||||
|
||||
template <uint32_t cmdType>
|
||||
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking);
|
||||
void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList,
|
||||
cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
|
||||
|
||||
template <uint32_t commandType>
|
||||
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
|
||||
|
||||
@@ -1159,9 +1159,9 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) {
|
||||
void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
|
||||
auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||
auto bcsCsr = getBcsCommandStreamReceiver();
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
EventBuilder eventBuilder;
|
||||
@@ -1187,8 +1187,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
BlitPropertiesContainer blitPropertiesContainer;
|
||||
CsrDependencies csrDeps;
|
||||
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
auto allocator = bcsCsr->getTimestampPacketAllocator();
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
|
||||
auto allocator = bcsCsr.getTimestampPacketAllocator();
|
||||
|
||||
if (isCacheFlushForBcsRequired() && isGpgpuSubmissionForBcsRequired(blockQueue)) {
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
@@ -1198,7 +1198,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
timestampPacketDependencies.barrierNodes.add(allocator->getTag());
|
||||
}
|
||||
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, *bcsCsr);
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
|
||||
LinearStream *gpgpuCommandStream = {};
|
||||
@@ -1212,7 +1212,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
|
||||
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(*bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
|
||||
|
||||
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
|
||||
@@ -1222,7 +1222,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
if (!blockQueue) {
|
||||
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
|
||||
enqueueProperties, timestampPacketDependencies, eventsRequest,
|
||||
eventBuilder, taskLevel, csrDeps, bcsCsr);
|
||||
eventBuilder, taskLevel, csrDeps, &bcsCsr);
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
||||
@@ -1248,13 +1248,11 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType, size_t surfaceCount>
|
||||
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) {
|
||||
const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
|
||||
const bool blitRequired = isCopyOnly;
|
||||
const bool blit = blitAllowed && (blitPreferred || blitRequired);
|
||||
|
||||
void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation,
|
||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
|
||||
const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
|
||||
if (blit) {
|
||||
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking);
|
||||
enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
} else {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
|
||||
this->getClDevice());
|
||||
|
||||
@@ -47,8 +47,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
MemObjSurface s1(srcBuffer);
|
||||
MemObjSurface s2(dstBuffer);
|
||||
Surface *surfaces[] = {&s1, &s2};
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -54,8 +54,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
|
||||
dc.dstSlicePitch = dstSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -50,10 +50,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
|
||||
}
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -36,12 +36,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
|
||||
@@ -67,6 +61,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
}
|
||||
|
||||
if (isCpuCopyAllowed) {
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, false);
|
||||
}
|
||||
if (isMemTransferNeeded) {
|
||||
return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr,
|
||||
numEventsInWaitList, eventWaitList, event);
|
||||
@@ -102,7 +99,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -128,7 +125,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
}
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -59,13 +59,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
|
||||
Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -89,7 +87,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
|
||||
dc.dstSlicePitch = hostSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);
|
||||
|
||||
@@ -41,12 +41,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
const cl_event *eventWaitList,
|
||||
cl_event *event) {
|
||||
cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage);
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
auto isMemTransferNeeded = true;
|
||||
if (srcImage->isMemObjZeroCopy()) {
|
||||
@@ -78,7 +72,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -105,7 +99,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo);
|
||||
if (nullptr == mapAllocation) {
|
||||
notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
if (!isL3Capable(ptr, hostPtrSize)) {
|
||||
|
||||
@@ -126,8 +126,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
|
||||
if (event) {
|
||||
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
|
||||
@@ -210,8 +210,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
|
||||
dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
|
||||
|
||||
if (event) {
|
||||
castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
|
||||
@@ -330,32 +330,29 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
if (copyType == SvmToHost) {
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
cmdType = CL_COMMAND_READ_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
|
||||
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
}
|
||||
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation());
|
||||
surfaces[0] = &srcSvmSurf;
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
|
||||
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == HostToSvm) {
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -367,7 +364,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &srcHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else if (copyType == SvmToSvm) {
|
||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
||||
@@ -378,16 +376,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstSvmSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
|
||||
} else {
|
||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
if (size != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
auto &csr = getGpgpuCommandStreamReceiver();
|
||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||
status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||
if (!status) {
|
||||
@@ -401,7 +398,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||
surfaces[1] = &dstHostPtrSurf;
|
||||
|
||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||
}
|
||||
if (event) {
|
||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||
|
||||
@@ -82,7 +82,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
MemObjSurface bufferSurf(buffer);
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&bufferSurf, nullptr};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
@@ -95,9 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
} else {
|
||||
surfaces[1] = &hostPtrSurf;
|
||||
if (size != 0) {
|
||||
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -116,7 +113,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
||||
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
|
||||
|
||||
@@ -58,13 +58,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
MemObjSurface dstBufferSurf(buffer);
|
||||
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
|
||||
Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType);
|
||||
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -88,7 +86,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
|
||||
dc.dstSlicePitch = bufferSlicePitch;
|
||||
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
|
||||
|
||||
@@ -54,7 +54,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
|
||||
GeneralSurface mapSurface;
|
||||
Surface *surfaces[] = {&dstImgSurf, nullptr};
|
||||
auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage);
|
||||
if (mapAllocation) {
|
||||
surfaces[1] = &mapSurface;
|
||||
mapSurface.setGraphicsAllocation(mapAllocation);
|
||||
@@ -66,8 +65,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
if (region[0] != 0 &&
|
||||
region[1] != 0 &&
|
||||
region[2] != 0) {
|
||||
auto &csr = getCommandStreamReceiver(blitAllowed);
|
||||
bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
|
||||
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
|
||||
if (!status) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -94,7 +92,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
|
||||
auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
|
||||
MultiDispatchInfo dispatchInfo(dc);
|
||||
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed);
|
||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo);
|
||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
|
||||
|
||||
if (context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
|
||||
|
||||
Reference in New Issue
Block a user