From a03b5f8a95599bab76db70beaf1c0f0f7574efea Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Thu, 26 Aug 2021 11:53:25 +0000 Subject: [PATCH] Create single point for selecting engine for builtin ops - selectCsrForBuiltinOperation selects proper CSR - selected CSR is passed to dispatchBcsOrGpgpuEnqueue - Notifications such as notifyEnqueueReadBuffer are now made on correct engine - Temporary allocs for hostptrs are now created on gpgpuCsr Related-To: NEO-6057 Signed-off-by: Maciej Dziuban --- opencl/source/command_queue/command_queue.cpp | 37 ++++-- opencl/source/command_queue/command_queue.h | 6 +- .../source/command_queue/command_queue_hw.h | 7 +- opencl/source/command_queue/enqueue_common.h | 24 ++-- .../command_queue/enqueue_copy_buffer.h | 5 +- .../command_queue/enqueue_copy_buffer_rect.h | 4 +- .../source/command_queue/enqueue_copy_image.h | 6 +- .../command_queue/enqueue_read_buffer.h | 18 +-- .../command_queue/enqueue_read_buffer_rect.h | 7 +- .../source/command_queue/enqueue_read_image.h | 14 +-- opencl/source/command_queue/enqueue_svm.h | 36 +++--- .../command_queue/enqueue_write_buffer.h | 8 +- .../command_queue/enqueue_write_buffer_rect.h | 7 +- .../command_queue/enqueue_write_image.h | 7 +- opencl/source/helpers/properties_helper.cpp | 4 +- opencl/source/kernel/kernel.cpp | 2 +- .../command_queue/blit_enqueue_tests.cpp | 1 + .../command_queue/command_queue_hw_tests.cpp | 2 +- .../command_queue/command_queue_tests.cpp | 113 ++++++++++++++---- .../enqueue_copy_image_tests.cpp | 6 +- .../enqueue_read_image_tests.cpp | 6 +- .../enqueue_svm_mem_copy_tests.cpp | 5 - .../enqueue_write_image_tests.cpp | 6 +- ...and_stream_receiver_flush_task_4_tests.cpp | 26 ++-- .../libult/ult_command_stream_receiver.h | 7 ++ .../unit_test/mem_obj/buffer_bcs_tests.cpp | 87 +++++++------- .../test/unit_test/mocks/mock_command_queue.h | 4 +- shared/source/helpers/vec.h | 2 +- 28 files changed, 272 insertions(+), 185 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 5cb01818e7..bff1a8d698 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -144,13 +144,17 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const { return nullptr; } -CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed) const { - if (blitAllowed) { - auto csr = getBcsCommandStreamReceiver(); - UNRECOVERABLE_IF(!csr); - return *csr; +CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const { + const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams()); + const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams()); + const bool blitRequired = isCopyOnly; + const bool blit = blitAllowed && (blitPreferred || blitRequired); + + if (blit) { + return *bcsEngine->commandStreamReceiver; + } else { + return getGpgpuCommandStreamReceiver(); } - return getGpgpuCommandStreamReceiver(); } Device &CommandQueue::getDevice() const noexcept { @@ -721,12 +725,17 @@ bool CommandQueue::queueDependenciesClearRequired() const { return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); } -bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { - auto blitterSupported = bcsEngine != nullptr; +bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const { + if (bcsEngine == nullptr) { + return false; + } - bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly; + bool allowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly; if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) { - blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get(); + allowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get(); + } + if (!allowed) { + return false; } switch (cmdType) { @@ -737,10 +746,14 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { case CL_COMMAND_WRITE_BUFFER_RECT: case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_SVM_MEMCPY: + return true; case CL_COMMAND_READ_IMAGE: + return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast(params.srcMemObj)); case CL_COMMAND_WRITE_IMAGE: + return blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast(params.dstMemObj)); case CL_COMMAND_COPY_IMAGE: - return blitterSupported && blitEnqueueAllowed; + return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast(params.srcMemObj)) && + blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast(params.dstMemObj)); default: return false; } @@ -771,7 +784,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp return true; } -bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) { +bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const { const auto &hwInfo = device->getHardwareInfo(); const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 59e86ab098..da79c5cef3 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -224,7 +224,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; CommandStreamReceiver *getBcsCommandStreamReceiver() const; CommandStreamReceiver *getBcsForAuxTranslation() const; - MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const; + MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const; Device &getDevice() const noexcept; ClDevice &getClDevice() const { return *device; } Context &getContext() const { return *context; } @@ -353,9 +353,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; - bool blitEnqueueAllowed(cl_command_type cmdType) const; + bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const; bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const; - MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image); + MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; void waitForLatestTaskCount(); diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index b57384a963..7ac442a11e 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -365,10 +365,13 @@ class CommandQueueHw : public CommandQueue { cl_event *event); template - void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed); + void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], + EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, + bool blocking, CommandStreamReceiver &csr); template - void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking); + void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, + cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr); template CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency, diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 5a7f415b84..4d668008b0 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -1159,9 +1159,9 @@ size_t CommandQueueHw::calculateHostPtrSizeForImage(const size_t *reg template template -void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) { +void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, + const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) { auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); - auto bcsCsr = getBcsCommandStreamReceiver(); EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event); EventBuilder eventBuilder; @@ -1187,8 +1187,8 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat BlitPropertiesContainer blitPropertiesContainer; CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsr, CsrDependencies::DependenciesType::All); - auto allocator = bcsCsr->getTimestampPacketAllocator(); + eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); + auto allocator = bcsCsr.getTimestampPacketAllocator(); if (isCacheFlushForBcsRequired() && isGpgpuSubmissionForBcsRequired(blockQueue)) { timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag()); @@ -1198,7 +1198,7 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat timestampPacketDependencies.barrierNodes.add(allocator->getTag()); } - obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, *bcsCsr); + obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr); csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); LinearStream *gpgpuCommandStream = {}; @@ -1212,7 +1212,7 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); } - blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(*bcsCsr, multiDispatchInfo, timestampPacketDependencies, + blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, eventsRequest, gpgpuCommandStream, cmdType, blockQueue)); CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; @@ -1222,7 +1222,7 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat if (!blockQueue) { completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking, enqueueProperties, timestampPacketDependencies, eventsRequest, - eventBuilder, taskLevel, csrDeps, bcsCsr); + eventBuilder, taskLevel, csrDeps, &bcsCsr); if (eventBuilder.getEvent()) { eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); @@ -1248,13 +1248,11 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat template template -void CommandQueueHw::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) { - const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams()); - const bool blitRequired = isCopyOnly; - const bool blit = blitAllowed && (blitPreferred || blitRequired); - +void CommandQueueHw::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, + cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) { + const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType()); if (blit) { - enqueueBlit(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking); + enqueueBlit(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr); } else { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation, this->getClDevice()); diff --git a/opencl/source/command_queue/enqueue_copy_buffer.h b/opencl/source/command_queue/enqueue_copy_buffer.h index a42424da1e..a152605978 100644 --- a/opencl/source/command_queue/enqueue_copy_buffer.h +++ b/opencl/source/command_queue/enqueue_copy_buffer.h @@ -47,8 +47,9 @@ cl_int CommandQueueHw::enqueueCopyBuffer( MemObjSurface s1(srcBuffer); MemObjSurface s2(dstBuffer); Surface *surfaces[] = {&s1, &s2}; - auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed); + + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; } diff --git a/opencl/source/command_queue/enqueue_copy_buffer_rect.h b/opencl/source/command_queue/enqueue_copy_buffer_rect.h index 794c237c98..45f4e2cf6f 100644 --- a/opencl/source/command_queue/enqueue_copy_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_copy_buffer_rect.h @@ -54,8 +54,8 @@ cl_int CommandQueueHw::enqueueCopyBufferRect( dc.dstSlicePitch = dstSlicePitch; MultiDispatchInfo dispatchInfo(dc); - auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; } diff --git a/opencl/source/command_queue/enqueue_copy_image.h b/opencl/source/command_queue/enqueue_copy_image.h index e99aaecf29..e06014b988 100644 --- a/opencl/source/command_queue/enqueue_copy_image.h +++ b/opencl/source/command_queue/enqueue_copy_image.h @@ -50,10 +50,8 @@ cl_int CommandQueueHw::enqueueCopyImage( } MultiDispatchInfo dispatchInfo(dc); - cl_command_type cmdType = CL_COMMAND_COPY_IMAGE; - auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage); - - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; } diff --git a/opencl/source/command_queue/enqueue_read_buffer.h b/opencl/source/command_queue/enqueue_read_buffer.h index de13138549..f440be0e03 100644 --- a/opencl/source/command_queue/enqueue_read_buffer.h +++ b/opencl/source/command_queue/enqueue_read_buffer.h @@ -36,12 +36,6 @@ cl_int CommandQueueHw::enqueueReadBuffer( cl_event *event) { const cl_command_type cmdType = CL_COMMAND_READ_BUFFER; - auto blitAllowed = blitEnqueueAllowed(cmdType); - auto &csr = getCommandStreamReceiver(blitAllowed); - - if (nullptr == mapAllocation) { - notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); - } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; @@ -67,6 +61,9 @@ cl_int CommandQueueHw::enqueueReadBuffer( } if (isCpuCopyAllowed) { + if (nullptr == mapAllocation) { + notifyEnqueueReadBuffer(buffer, !!blockingRead, false); + } if (isMemTransferNeeded) { return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr, numEventsInWaitList, eventWaitList, event); @@ -102,7 +99,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( } else { surfaces[1] = &hostPtrSurf; if (size != 0) { - bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -128,7 +125,12 @@ cl_int CommandQueueHw::enqueueReadBuffer( context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed); + + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + if (nullptr == mapAllocation) { + notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); + } + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr); return CL_SUCCESS; } diff --git a/opencl/source/command_queue/enqueue_read_buffer_rect.h b/opencl/source/command_queue/enqueue_read_buffer_rect.h index 82bf4e4bc8..f6921b2428 100644 --- a/opencl/source/command_queue/enqueue_read_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_read_buffer_rect.h @@ -59,13 +59,11 @@ cl_int CommandQueueHw::enqueueReadBufferRect( MemObjSurface bufferSurf(buffer); HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize); Surface *surfaces[] = {&bufferSurf, &hostPtrSurf}; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -89,7 +87,8 @@ cl_int CommandQueueHw::enqueueReadBufferRect( dc.dstSlicePitch = hostSlicePitch; MultiDispatchInfo dispatchInfo(dc); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr); if (context->isProvidingPerformanceHints()) { context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast(buffer), ptr); diff --git a/opencl/source/command_queue/enqueue_read_image.h b/opencl/source/command_queue/enqueue_read_image.h index d57b2697d9..c33894f486 100644 --- a/opencl/source/command_queue/enqueue_read_image.h +++ b/opencl/source/command_queue/enqueue_read_image.h @@ -41,12 +41,6 @@ cl_int CommandQueueHw::enqueueReadImage( const cl_event *eventWaitList, cl_event *event) { cl_command_type cmdType = CL_COMMAND_READ_IMAGE; - auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage); - auto &csr = getCommandStreamReceiver(blitAllowed); - - if (nullptr == mapAllocation) { - notifyEnqueueReadImage(srcImage, static_cast(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType())); - } auto isMemTransferNeeded = true; if (srcImage->isMemObjZeroCopy()) { @@ -78,7 +72,7 @@ cl_int CommandQueueHw::enqueueReadImage( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -105,7 +99,11 @@ cl_int CommandQueueHw::enqueueReadImage( auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer; MultiDispatchInfo dispatchInfo(dc); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo); + if (nullptr == mapAllocation) { + notifyEnqueueReadImage(srcImage, static_cast(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType())); + } + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr); if (context->isProvidingPerformanceHints()) { if (!isL3Capable(ptr, hostPtrSize)) { diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index 2ad4f3df8a..d7e934d401 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -126,8 +126,8 @@ cl_int CommandQueueHw::enqueueSVMMap(cl_bool blockingMap, dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); - auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr); if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_MAP); @@ -210,8 +210,8 @@ cl_int CommandQueueHw::enqueueSVMUnmap(void *svmPtr, dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); - auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr); if (event) { castToObjectOrAbort(*event)->setCmdType(CL_COMMAND_SVM_UNMAP); @@ -330,32 +330,29 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, if (copyType == SvmToHost) { GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); HostPtrSurface dstHostPtrSurf(dstPtr, size); - cmdType = CL_COMMAND_READ_BUFFER; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (size != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); - notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); + + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == HostToSvm) { HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); cmdType = CL_COMMAND_WRITE_BUFFER; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (size != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -367,7 +364,8 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &srcHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == SvmToSvm) { GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); @@ -378,16 +376,15 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &dstSvmSurf; dispatchInfo.setBuiltinOpParams(operationParams); - auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else { HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); HostPtrSurface dstHostPtrSurf(dstPtr, size); cmdType = CL_COMMAND_WRITE_BUFFER; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (size != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); + auto &csr = getGpgpuCommandStreamReceiver(); bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { @@ -401,7 +398,8 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &dstHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } if (event) { auto pEvent = castToObjectOrAbort(*event); diff --git a/opencl/source/command_queue/enqueue_write_buffer.h b/opencl/source/command_queue/enqueue_write_buffer.h index 44595fdf73..d516ee302e 100644 --- a/opencl/source/command_queue/enqueue_write_buffer.h +++ b/opencl/source/command_queue/enqueue_write_buffer.h @@ -82,7 +82,6 @@ cl_int CommandQueueHw::enqueueWriteBuffer( MemObjSurface bufferSurf(buffer); GeneralSurface mapSurface; Surface *surfaces[] = {&bufferSurf, nullptr}; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (mapAllocation) { surfaces[1] = &mapSurface; @@ -95,9 +94,7 @@ cl_int CommandQueueHw::enqueueWriteBuffer( } else { surfaces[1] = &hostPtrSurf; if (size != 0) { - - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -116,7 +113,8 @@ cl_int CommandQueueHw::enqueueWriteBuffer( dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo(dc); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast(buffer)); diff --git a/opencl/source/command_queue/enqueue_write_buffer_rect.h b/opencl/source/command_queue/enqueue_write_buffer_rect.h index 0ee8cfb618..af1577469b 100644 --- a/opencl/source/command_queue/enqueue_write_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_write_buffer_rect.h @@ -58,13 +58,11 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( MemObjSurface dstBufferSurf(buffer); HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf}; - auto blitAllowed = blitEnqueueAllowed(cmdType); if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -88,7 +86,8 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( dc.dstSlicePitch = bufferSlicePitch; MultiDispatchInfo dispatchInfo(dc); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast(buffer)); diff --git a/opencl/source/command_queue/enqueue_write_image.h b/opencl/source/command_queue/enqueue_write_image.h index 1700c28cbc..859d467ab0 100644 --- a/opencl/source/command_queue/enqueue_write_image.h +++ b/opencl/source/command_queue/enqueue_write_image.h @@ -54,7 +54,6 @@ cl_int CommandQueueHw::enqueueWriteImage( HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true); GeneralSurface mapSurface; Surface *surfaces[] = {&dstImgSurf, nullptr}; - auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage); if (mapAllocation) { surfaces[1] = &mapSurface; mapSurface.setGraphicsAllocation(mapAllocation); @@ -66,8 +65,7 @@ cl_int CommandQueueHw::enqueueWriteImage( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - auto &csr = getCommandStreamReceiver(blitAllowed); - bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); + bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -94,7 +92,8 @@ cl_int CommandQueueHw::enqueueWriteImage( auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d; MultiDispatchInfo dispatchInfo(dc); - dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr); if (context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast(dstImage)); diff --git a/opencl/source/helpers/properties_helper.cpp b/opencl/source/helpers/properties_helper.cpp index 1cf01e8f02..c2c8be3e5f 100644 --- a/opencl/source/helpers/properties_helper.cpp +++ b/opencl/source/helpers/properties_helper.cpp @@ -54,11 +54,11 @@ void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &cs if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) { auto taskCountPreviousRootDevice = event->peekTaskCount(); - auto tagAddressPreviousRootDevice = event->getCommandQueue()->getCommandStreamReceiver(false).getTagAddress(); + auto tagAddressPreviousRootDevice = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagAddress(); csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast(tagAddressPreviousRootDevice)}); - auto graphicsAllocation = event->getCommandQueue()->getCommandStreamReceiver(false).getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex()); + auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex()); currentCsr.getResidencyAllocations().push_back(graphicsAllocation); } } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 8b706ed978..350fb29305 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1065,7 +1065,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local hwHelper.getBarriersCountFromHasBarriers(barrierCount), workDim, localWorkSize); - auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced(); + auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced(); maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced); return maxWorkGroupCount; } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 95a63c032b..dd95806611 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -87,6 +87,7 @@ struct BlitEnqueueTests : public ::testing::Test { auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo()); if (createBcsEngine) { auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp index 2350f28717..63d78b27bd 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp @@ -197,7 +197,7 @@ HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenProcessDispatchForMarkerCalled MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); myCmdQ->csr = &csr; - csr.osContext = &pCmdQ->getCommandStreamReceiver(false).getOsContext(); + csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); ASSERT_NE(nullptr, event); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 5f7864ed48..d2ee6e1f65 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -228,7 +228,46 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); } -struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam {}; +struct BuiltinOpParamsFixture { + BuiltinOpParams createParams(cl_command_type cmdType) { + BuiltinOpParams params{}; + switch (cmdType) { + case CL_COMMAND_READ_IMAGE: + params.srcMemObj = &correctImage; + params.srcOffset = correctOrigin; + params.size = correctRegion; + break; + case CL_COMMAND_WRITE_IMAGE: + params.dstMemObj = &correctImage; + params.dstOffset = correctOrigin; + params.size = correctRegion; + break; + case CL_COMMAND_COPY_IMAGE: + params.srcMemObj = &correctImage; + params.dstMemObj = &correctImage; + params.srcOffset = correctOrigin; + params.dstOffset = correctOrigin; + params.size = correctRegion; + break; + case CL_COMMAND_COPY_BUFFER: + params.srcMemObj = &correctBuffer; + params.dstMemObj = &correctBuffer; + break; + case CL_COMMAND_SVM_MEMCPY: + params.srcSvmAlloc = correctBuffer.getGraphicsAllocation(0); + params.dstSvmAlloc = correctBuffer.getGraphicsAllocation(0); + break; + } + return params; + } + + size_t correctRegion[3] = {10u, 10u, 0}; + size_t correctOrigin[3] = {1u, 1u, 0}; + MockImageBase correctImage = {}; + MockBuffer correctBuffer = {}; +}; + +struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam, BuiltinOpParamsFixture {}; TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) { HardwareInfo hwInfo = *defaultHwInfo; @@ -242,8 +281,9 @@ TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperatio auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver; EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver()); - auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType); - EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver(blitAllowed)); + BuiltinOpParams params = createParams(cmdType); + MultiDispatchInfo dispatchInfo{params}; + EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo)); } HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) { @@ -264,12 +304,13 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin MockCommandQueue cmdQ(nullptr, device.get(), 0, false); auto cmdType = GetParam(); - auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType); + BuiltinOpParams params = createParams(cmdType); + MultiDispatchInfo dispatchInfo{params}; EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver()); EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver()); - EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.getCommandStreamReceiver(blitAllowed)); - EXPECT_EQ(bcsEngine.osContext, &cmdQ.getCommandStreamReceiver(blitAllowed).getOsContext()); + EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo)); + EXPECT_EQ(bcsEngine.osContext, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo).getOsContext()); } INSTANTIATE_TEST_CASE_P(uint32_t, @@ -1165,13 +1206,18 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue queue.isCopyOnly = false; EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(), - queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); + queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); queue.isCopyOnly = true; - EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); + EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); } -TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { +struct CommandQueueBuiltinTest : BuiltinOpParamsFixture, ::testing::Test {}; + +TEST_F(CommandQueueBuiltinTest, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { + DebugManagerStateRestore restore{}; + DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); + MockContext context{}; MockCommandQueue queue(&context, context.getDevice(0), 0, false); @@ -1181,17 +1227,44 @@ TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectV bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE)); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE)); - EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, {})); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, createParams(CL_COMMAND_READ_IMAGE))); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, createParams(CL_COMMAND_WRITE_IMAGE))); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, createParams(CL_COMMAND_COPY_IMAGE))); + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, {})); +} + +TEST_F(CommandQueueBuiltinTest, givenCopyImageCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { + DebugManagerStateRestore restore{}; + DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); + + MockContext context{}; + + MockCommandQueue queue(&context, context.getDevice(0), 0, false); + if (!queue.bcsEngine) { + queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); + } + if (!queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + GTEST_SKIP(); + } + + auto builtinOpParams = createParams(CL_COMMAND_COPY_IMAGE); + EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); + + builtinOpParams.srcOffset[0] = 0x9999; + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); + + builtinOpParams.dstOffset[0] = 0x9999; + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); + + builtinOpParams.srcOffset = correctOrigin; + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); } TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp index 99ee975b7e..2ce477ea24 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp @@ -210,13 +210,15 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne) } HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImageThenBlitEnqueueImageAllowedReturnsCorrectResult) { + auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + REQUIRE_BLITTER_OR_SKIP(hwInfo); + DebugManagerStateRestore restorer; DebugManager.flags.OverrideInvalidEngineWithDefault.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); - auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); - auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); hwInfo->capabilityTable.blitterOperationsSupported = true; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 018a97ca99..49e2df69ce 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -761,14 +761,16 @@ HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSam } HWTEST_F(EnqueueReadImageTest, givenDeviceWithBlitterSupportWhenEnqueueReadImageThenBlitEnqueueImageAllowedReturnsCorrectResult) { + auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(hwInfo); + DebugManagerStateRestore restorer; DebugManager.flags.OverrideInvalidEngineWithDefault.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); - auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); - hwInfo->capabilityTable.blitterOperationsSupported = true; size_t origin[] = {0, 0, 0}; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr image(Image2dHelper<>::create(context)); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index f52079bb28..b7be2a1df3 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -353,11 +353,6 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThe EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled); - auto blitAllowed = mockCmdQ->blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY); - - auto &csr = mockCmdQ->getCommandStreamReceiver(blitAllowed); - EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled); - alignedFree(dstHostPtr); } diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 9dfca5704c..079dd6a1df 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -209,14 +209,16 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaVfeS } HWTEST_F(EnqueueWriteImageTest, givenDeviceWithBlitterSupportWhenEnqueueWriteImageThenBlitEnqueueImageAllowedReturnsCorrectResult) { + auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + hwInfo->capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(hwInfo); + DebugManagerStateRestore restorer; DebugManager.flags.OverrideInvalidEngineWithDefault.set(1); DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); - auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); - hwInfo->capabilityTable.blitterOperationsSupported = true; size_t origin[] = {0, 0, 0}; auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); std::unique_ptr image(Image2dHelper<>::create(context)); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 091e3f5d3e..280f086de4 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -84,11 +84,11 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { @@ -112,11 +112,11 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } @@ -187,15 +187,15 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -212,15 +212,15 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -246,7 +246,7 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } } @@ -310,7 +310,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getCommandStreamReceiver(false)); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); EXPECT_EQ(0u, csrDeps.taskCountContainer.size()); EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); @@ -336,7 +336,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getCommandStreamReceiver(false)); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); EXPECT_EQ(3u, csrDeps.taskCountContainer.size()); EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h index beb764296b..5127c0acc7 100644 --- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h +++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h @@ -272,6 +272,12 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ ensureCommandBufferAllocationCalled++; BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize); } + + void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { + BaseClass::waitForTaskCountAndCleanTemporaryAllocationList(requiredTaskCount); + waitForTaskCountAndCleanAllocationListCalled++; + } + std::vector aubCommentMessages; BatchBuffer latestFlushedBatchBuffer = {}; @@ -281,6 +287,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ LinearStream *lastFlushedCommandStream = nullptr; + uint32_t waitForTaskCountAndCleanAllocationListCalled = 0; uint32_t makeSurfacePackNonResidentCalled = false; uint32_t latestSentTaskCountValueDuringFlush = 0; uint32_t blitBufferCalled = 0; diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index f11fb5c406..ec81051594 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -80,10 +80,9 @@ struct BcsBufferTests : public ::testing::Test { void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount); - waitForTaskCountAndCleanAllocationListCalled++; + UltCommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(requiredTaskCount); } - uint32_t waitForTaskCountAndCleanAllocationListCalled = 0; uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0; CommandStreamReceiver *gpgpuCsr = nullptr; }; @@ -867,7 +866,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; @@ -876,12 +875,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -892,7 +891,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) { @@ -906,7 +905,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; @@ -921,12 +920,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -939,7 +938,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenCallWait) { @@ -953,7 +952,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; @@ -968,12 +967,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC cmdQ->enqueueWriteBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -986,7 +985,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC cmdQ->enqueueWriteBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) { @@ -1000,7 +999,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; @@ -1009,12 +1008,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -1025,7 +1024,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCalledWhenUsingBcsThenCallWait) { @@ -1039,7 +1038,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pDstSVM = std::make_unique(256); @@ -1047,16 +1046,16 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } @@ -1072,24 +1071,24 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pSrcSVM = std::make_unique(256); auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } @@ -1105,7 +1104,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pSrcSVM = std::make_unique(256); @@ -1113,17 +1112,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); + auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->next->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) { @@ -1159,7 +1158,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSV auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); @@ -1287,7 +1286,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); cmdQ->bcsEngine = &bcsEngineControl; - auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver(); myMockCsr->gpgpuCsr = &gpgpuCsr; cl_int retVal = CL_SUCCESS; diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index a32b46fadd..deeb563111 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -311,7 +311,7 @@ class MockCommandQueueHw : public CommandQueueHw { return BaseClass::isCacheFlushForBcsRequired(); } - bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) override { + bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const override { isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image); return isBlitEnqueueImageAllowed; } @@ -329,7 +329,7 @@ class MockCommandQueueHw : public CommandQueueHw { bool notifyEnqueueSVMMemcpyCalled = false; bool cpuDataTransferHandlerCalled = false; bool useBcsCsrOnNotifyEnabled = false; - bool isBlitEnqueueImageAllowed = false; + mutable bool isBlitEnqueueImageAllowed = false; struct OverrideReturnValue { bool enabled = false; bool returnValue = false; diff --git a/shared/source/helpers/vec.h b/shared/source/helpers/vec.h index 8a769e859a..98b5358d12 100644 --- a/shared/source/helpers/vec.h +++ b/shared/source/helpers/vec.h @@ -56,7 +56,7 @@ struct Vec3 { return values[i]; } - T operator[](uint32_t i) const { + const T &operator[](uint32_t i) const { UNRECOVERABLE_IF(i > 2); return values[i]; }