From 69ae9dc9c2c65b9ca0a600954284a9ddfff65fc0 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Tue, 31 Aug 2021 19:22:58 +0000 Subject: [PATCH] Correct CSR used for hostptr allocations in builtins Signed-off-by: Maciej Dziuban --- opencl/source/command_queue/CMakeLists.txt | 1 + opencl/source/command_queue/command_queue.cpp | 30 +-- opencl/source/command_queue/command_queue.h | 7 +- .../command_queue/enqueue_copy_buffer.h | 5 +- .../command_queue/enqueue_copy_buffer_rect.h | 6 +- .../source/command_queue/enqueue_copy_image.h | 7 +- .../command_queue/enqueue_read_buffer.h | 5 +- .../command_queue/enqueue_read_buffer_rect.h | 6 +- .../source/command_queue/enqueue_read_image.h | 7 +- opencl/source/command_queue/enqueue_svm.h | 25 +- .../command_queue/enqueue_write_buffer.h | 5 +- .../command_queue/enqueue_write_buffer_rect.h | 6 +- .../command_queue/enqueue_write_image.h | 7 +- .../source/command_queue/transfer_direction.h | 53 +++++ .../command_queue/command_queue_tests.cpp | 220 ++++++------------ .../enqueue_copy_image_tests.cpp | 2 +- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 66 +++--- 17 files changed, 229 insertions(+), 229 deletions(-) create mode 100644 opencl/source/command_queue/transfer_direction.h diff --git a/opencl/source/command_queue/CMakeLists.txt b/opencl/source/command_queue/CMakeLists.txt index e22a8ace01..b11e0287b7 100644 --- a/opencl/source/command_queue/CMakeLists.txt +++ b/opencl/source/command_queue/CMakeLists.txt @@ -43,6 +43,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h + ${CMAKE_CURRENT_SOURCE_DIR}/transfer_direction.h ) if(SUPPORT_XEHP_AND_LATER) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index d4bd3fbc1b..be374668a2 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -144,9 +144,9 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const { return nullptr; } -CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const { - const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams()); - const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams()); +CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const { + const bool blitAllowed = blitEnqueueAllowed(cmdType, imagesValidForBlit); + const bool blitPreferred = blitEnqueuePreferred(transferDirection); const bool blitRequired = isCopyOnly; const bool blit = blitAllowed && (blitPreferred || blitRequired); @@ -725,7 +725,7 @@ bool CommandQueue::queueDependenciesClearRequired() const { return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get(); } -bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const { +bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const { if (bcsEngine == nullptr) { return false; } @@ -748,32 +748,16 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpPa case CL_COMMAND_SVM_MEMCPY: return true; case CL_COMMAND_READ_IMAGE: - return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast(params.srcMemObj)); case CL_COMMAND_WRITE_IMAGE: - return blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast(params.dstMemObj)); case CL_COMMAND_COPY_IMAGE: - return blitEnqueueImageAllowed(¶ms.srcOffset[0], ¶ms.size[0], *static_cast(params.srcMemObj)) && - blitEnqueueImageAllowed(¶ms.dstOffset[0], ¶ms.size[0], *static_cast(params.dstMemObj)); + return imagesValidForBlit; default: return false; } } -bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const { - bool isLocalToLocal = false; - - if (cmdType == CL_COMMAND_COPY_BUFFER && - builtinOpParams.srcMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool() && - builtinOpParams.dstMemObj->getGraphicsAllocation(device->getRootDeviceIndex())->isAllocatedInLocalMemoryPool()) { - isLocalToLocal = true; - } - if (cmdType == CL_COMMAND_SVM_MEMCPY && - builtinOpParams.srcSvmAlloc->isAllocatedInLocalMemoryPool() && - builtinOpParams.dstSvmAlloc->isAllocatedInLocalMemoryPool()) { - isLocalToLocal = true; - } - - if (isLocalToLocal) { +bool CommandQueue::blitEnqueuePreferred(TransferDirection transferDirection) const { + if (transferDirection == TransferDirection::LocalToLocal) { if (DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get() != -1) { return static_cast(DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.get()); } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index da79c5cef3..1335527db2 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/helpers/engine_control.h" +#include "opencl/source/command_queue/transfer_direction.h" #include "opencl/source/event/event.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/dispatch_info.h" @@ -224,7 +225,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; CommandStreamReceiver *getBcsCommandStreamReceiver() const; CommandStreamReceiver *getBcsForAuxTranslation() const; - MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const; + MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, TransferDirection transferDirection, bool imagesValidForBlit) const; Device &getDevice() const noexcept; ClDevice &getClDevice() const { return *device; } Context &getContext() const { return *context; } @@ -353,8 +354,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { cl_uint numEventsInWaitList, const cl_event *eventWaitList); void providePerformanceHint(TransferProperties &transferProperties); bool queueDependenciesClearRequired() const; - bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams ¶ms) const; - bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const; + bool blitEnqueueAllowed(cl_command_type cmdType, bool imagesValidForBlit) const; + bool blitEnqueuePreferred(TransferDirection transferDirection) const; MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; diff --git a/opencl/source/command_queue/enqueue_copy_buffer.h b/opencl/source/command_queue/enqueue_copy_buffer.h index a152605978..d625292ce3 100644 --- a/opencl/source/command_queue/enqueue_copy_buffer.h +++ b/opencl/source/command_queue/enqueue_copy_buffer.h @@ -31,6 +31,10 @@ cl_int CommandQueueHw::enqueueCopyBuffer( cl_event *event) { auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer; + GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); + GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false); + if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless; } @@ -48,7 +52,6 @@ cl_int CommandQueueHw::enqueueCopyBuffer( MemObjSurface s2(dstBuffer); Surface *surfaces[] = {&s1, &s2}; - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; diff --git a/opencl/source/command_queue/enqueue_copy_buffer_rect.h b/opencl/source/command_queue/enqueue_copy_buffer_rect.h index 45f4e2cf6f..0efcc2d1d2 100644 --- a/opencl/source/command_queue/enqueue_copy_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_copy_buffer_rect.h @@ -34,6 +34,10 @@ cl_int CommandQueueHw::enqueueCopyBufferRect( cl_event *event) { auto eBuiltInOps = EBuiltInOps::CopyBufferRect; + GraphicsAllocation *srcBufferAlloc = srcBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); + GraphicsAllocation *dstBufferAlloc = dstBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false); + if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; } @@ -54,7 +58,7 @@ cl_int CommandQueueHw::enqueueCopyBufferRect( dc.dstSlicePitch = dstSlicePitch; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; diff --git a/opencl/source/command_queue/enqueue_copy_image.h b/opencl/source/command_queue/enqueue_copy_image.h index e06014b988..4cfc612a14 100644 --- a/opencl/source/command_queue/enqueue_copy_image.h +++ b/opencl/source/command_queue/enqueue_copy_image.h @@ -32,6 +32,11 @@ cl_int CommandQueueHw::enqueueCopyImage( const cl_event *eventWaitList, cl_event *event) { + const bool validImages = blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage); + GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcImageAlloc, *dstImageAlloc), validImages); + MemObjSurface srcImgSurf(srcImage); MemObjSurface dstImgSurf(dstImage); Surface *surfaces[] = {&srcImgSurf, &dstImgSurf}; @@ -50,7 +55,7 @@ cl_int CommandQueueHw::enqueueCopyImage( } MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo); + dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr); return CL_SUCCESS; diff --git a/opencl/source/command_queue/enqueue_read_buffer.h b/opencl/source/command_queue/enqueue_read_buffer.h index f440be0e03..07f0b43dd6 100644 --- a/opencl/source/command_queue/enqueue_read_buffer.h +++ b/opencl/source/command_queue/enqueue_read_buffer.h @@ -41,6 +41,8 @@ cl_int CommandQueueHw::enqueueReadBuffer( bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr, numEventsInWaitList, eventWaitList); + GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; //check if we are dealing with SVM pointer here for which we already have an allocation @@ -99,7 +101,7 @@ cl_int CommandQueueHw::enqueueReadBuffer( } else { surfaces[1] = &hostPtrSurf; if (size != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -126,7 +128,6 @@ cl_int CommandQueueHw::enqueueReadBuffer( } } - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); if (nullptr == mapAllocation) { notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } diff --git a/opencl/source/command_queue/enqueue_read_buffer_rect.h b/opencl/source/command_queue/enqueue_read_buffer_rect.h index f6921b2428..2d5b1df70f 100644 --- a/opencl/source/command_queue/enqueue_read_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_read_buffer_rect.h @@ -37,6 +37,9 @@ cl_int CommandQueueHw::enqueueReadBufferRect( const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; auto isMemTransferNeeded = true; + GraphicsAllocation *srcBufferAlloc = buffer->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false); + if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; @@ -63,7 +66,7 @@ cl_int CommandQueueHw::enqueueReadBufferRect( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -87,7 +90,6 @@ cl_int CommandQueueHw::enqueueReadBufferRect( dc.dstSlicePitch = hostSlicePitch; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr); if (context->isProvidingPerformanceHints()) { diff --git a/opencl/source/command_queue/enqueue_read_image.h b/opencl/source/command_queue/enqueue_read_image.h index c33894f486..f443559a60 100644 --- a/opencl/source/command_queue/enqueue_read_image.h +++ b/opencl/source/command_queue/enqueue_read_image.h @@ -42,6 +42,10 @@ cl_int CommandQueueHw::enqueueReadImage( cl_event *event) { cl_command_type cmdType = CL_COMMAND_READ_IMAGE; + const bool validImages = blitEnqueueImageAllowed(origin, region, *srcImage); + GraphicsAllocation *srcImageAlloc = srcImage->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromGfxAllocToHost(*srcImageAlloc), validImages); + auto isMemTransferNeeded = true; if (srcImage->isMemObjZeroCopy()) { size_t hostOffset; @@ -72,7 +76,7 @@ cl_int CommandQueueHw::enqueueReadImage( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -99,7 +103,6 @@ cl_int CommandQueueHw::enqueueReadImage( auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo); if (nullptr == mapAllocation) { notifyEnqueueReadImage(srcImage, static_cast(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index d7e934d401..68dce9e754 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -126,7 +126,7 @@ cl_int CommandQueueHw::enqueueSVMMap(cl_bool blockingMap, dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MAP, TransferDirectionHelper::fromGfxAllocToHost(*gpuAllocation), false); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr); if (event) { @@ -210,7 +210,7 @@ cl_int CommandQueueHw::enqueueSVMUnmap(void *svmPtr, dc.unifiedMemoryArgsRequireMemSync = externalAppCall; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_UNMAP, TransferDirectionHelper::fromHostToGfxAlloc(*gpuAllocation), false); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr); if (event) { @@ -328,10 +328,13 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, cl_command_type cmdType; if (copyType == SvmToHost) { + GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, TransferDirectionHelper::fromGfxAllocToHost(*srcBufferAlloc), false); + GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); HostPtrSurface dstHostPtrSurf(dstPtr, size); if (size != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true); + bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -343,16 +346,18 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, dispatchInfo.setBuiltinOpParams(operationParams); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo); notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == HostToSvm) { + GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false); + HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false); + bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -364,10 +369,13 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &srcHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == SvmToSvm) { + GraphicsAllocation *srcBufferAlloc = srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + GraphicsAllocation *dstBufferAlloc = dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirectionHelper::fromGfxAllocToGfxAlloc(*srcBufferAlloc, *dstBufferAlloc), false); + GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), @@ -376,15 +384,15 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &dstSvmSurf; dispatchInfo.setBuiltinOpParams(operationParams); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else { + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, TransferDirection::HostToHost, false); + HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); HostPtrSurface dstHostPtrSurf(dstPtr, size); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { - auto &csr = getGpgpuCommandStreamReceiver(); bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { @@ -398,7 +406,6 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, surfaces[1] = &dstHostPtrSurf; dispatchInfo.setBuiltinOpParams(operationParams); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } if (event) { diff --git a/opencl/source/command_queue/enqueue_write_buffer.h b/opencl/source/command_queue/enqueue_write_buffer.h index d516ee302e..1333e7c7f0 100644 --- a/opencl/source/command_queue/enqueue_write_buffer.h +++ b/opencl/source/command_queue/enqueue_write_buffer.h @@ -38,6 +38,8 @@ cl_int CommandQueueHw::enqueueWriteBuffer( auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true; bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast(ptr), numEventsInWaitList, eventWaitList); + GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(rootDeviceIndex); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false); InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED; //check if we are dealing with SVM pointer here for which we already have an allocation @@ -94,7 +96,7 @@ cl_int CommandQueueHw::enqueueWriteBuffer( } else { surfaces[1] = &hostPtrSurf; if (size != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -113,7 +115,6 @@ cl_int CommandQueueHw::enqueueWriteBuffer( dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation(); MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (context->isProvidingPerformanceHints()) { diff --git a/opencl/source/command_queue/enqueue_write_buffer_rect.h b/opencl/source/command_queue/enqueue_write_buffer_rect.h index af1577469b..621754e32f 100644 --- a/opencl/source/command_queue/enqueue_write_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_write_buffer_rect.h @@ -36,6 +36,9 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT; auto isMemTransferNeeded = true; + GraphicsAllocation *dstBufferAlloc = buffer->getGraphicsAllocation(device->getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstBufferAlloc), false); + if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; @@ -62,7 +65,7 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -86,7 +89,6 @@ cl_int CommandQueueHw::enqueueWriteBufferRect( dc.dstSlicePitch = bufferSlicePitch; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr); if (context->isProvidingPerformanceHints()) { diff --git a/opencl/source/command_queue/enqueue_write_image.h b/opencl/source/command_queue/enqueue_write_image.h index 859d467ab0..517b077965 100644 --- a/opencl/source/command_queue/enqueue_write_image.h +++ b/opencl/source/command_queue/enqueue_write_image.h @@ -37,6 +37,10 @@ cl_int CommandQueueHw::enqueueWriteImage( auto cmdType = CL_COMMAND_WRITE_IMAGE; auto isMemTransferNeeded = true; + const bool validImages = blitEnqueueImageAllowed(origin, region, *dstImage); + GraphicsAllocation *dstImageAlloc = dstImage->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + CommandStreamReceiver &csr = selectCsrForBuiltinOperation(cmdType, TransferDirectionHelper::fromHostToGfxAlloc(*dstImageAlloc), validImages); + if (dstImage->isMemObjZeroCopy()) { size_t hostOffset; Image::calculateHostPtrOffset(&hostOffset, origin, region, inputRowPitch, inputSlicePitch, dstImage->getImageDesc().image_type, dstImage->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes); @@ -65,7 +69,7 @@ cl_int CommandQueueHw::enqueueWriteImage( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -92,7 +96,6 @@ cl_int CommandQueueHw::enqueueWriteImage( auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d; MultiDispatchInfo dispatchInfo(dc); - CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr); if (context->isProvidingPerformanceHints()) { diff --git a/opencl/source/command_queue/transfer_direction.h b/opencl/source/command_queue/transfer_direction.h new file mode 100644 index 0000000000..97904a50e9 --- /dev/null +++ b/opencl/source/command_queue/transfer_direction.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/memory_manager/graphics_allocation.h" + +namespace NEO { +enum class TransferDirection { + HostToHost, + HostToLocal, + LocalToHost, + LocalToLocal, +}; + +struct TransferDirectionHelper { + static inline TransferDirection fromGfxAllocToHost(const GraphicsAllocation &src) { + const bool srcLocal = src.isAllocatedInLocalMemoryPool(); + return create(srcLocal, false); + } + + static inline TransferDirection fromHostToGfxAlloc(const GraphicsAllocation &dst) { + const bool dstLocal = dst.isAllocatedInLocalMemoryPool(); + return create(false, dstLocal); + } + + static inline TransferDirection fromGfxAllocToGfxAlloc(const GraphicsAllocation &src, const GraphicsAllocation &dst) { + const bool srcLocal = src.isAllocatedInLocalMemoryPool(); + const bool dstLocal = dst.isAllocatedInLocalMemoryPool(); + return create(srcLocal, dstLocal); + } + + static inline TransferDirection create(bool srcLocal, bool dstLocal) { + if (srcLocal) { + if (dstLocal) { + return TransferDirection::LocalToLocal; + } else { + return TransferDirection::LocalToHost; + } + } else { + if (dstLocal) { + return TransferDirection::HostToLocal; + } else { + return TransferDirection::HostToHost; + } + } + } +}; +} // namespace NEO diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index d2ee6e1f65..c9235dbdb1 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -283,7 +283,7 @@ TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperatio BuiltinOpParams params = createParams(cmdType); MultiDispatchInfo dispatchInfo{params}; - EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo)); + EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, TransferDirection::LocalToHost, true)); } HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) { @@ -304,13 +304,10 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin MockCommandQueue cmdQ(nullptr, device.get(), 0, false); auto cmdType = GetParam(); - BuiltinOpParams params = createParams(cmdType); - MultiDispatchInfo dispatchInfo{params}; - EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver()); - EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver()); - EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo)); - EXPECT_EQ(bcsEngine.osContext, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo).getOsContext()); + auto &csr = cmdQ.selectCsrForBuiltinOperation(cmdType, TransferDirection::LocalToHost, true); + EXPECT_EQ(bcsEngine.commandStreamReceiver, &csr); + EXPECT_EQ(bcsEngine.osContext, &csr.getOsContext()); } INSTANTIATE_TEST_CASE_P(uint32_t, @@ -1206,10 +1203,10 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue queue.isCopyOnly = false; EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(), - queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); + queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false)); queue.isCopyOnly = true; - EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); + EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false)); } struct CommandQueueBuiltinTest : BuiltinOpParamsFixture, ::testing::Test {}; @@ -1227,59 +1224,20 @@ TEST_F(CommandQueueBuiltinTest, givenClCommandWhenCallingBlitEnqueueAllowedThenR bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, {})); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, createParams(CL_COMMAND_READ_IMAGE))); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, createParams(CL_COMMAND_WRITE_IMAGE))); - EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, createParams(CL_COMMAND_COPY_IMAGE))); - EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, {})); -} - -TEST_F(CommandQueueBuiltinTest, givenCopyImageCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) { - DebugManagerStateRestore restore{}; - DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); - - MockContext context{}; - - MockCommandQueue queue(&context, context.getDevice(0), 0, false); - if (!queue.bcsEngine) { - queue.bcsEngine = &context.getDevice(0)->getDefaultEngine(); - } - if (!queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - GTEST_SKIP(); - } - - auto builtinOpParams = createParams(CL_COMMAND_COPY_IMAGE); - EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); - - builtinOpParams.srcOffset[0] = 0x9999; - EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); - - builtinOpParams.dstOffset[0] = 0x9999; - EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); - - builtinOpParams.srcOffset = correctOrigin; - EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams)); -} - -TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) { - MockContext context{}; - MockCommandQueue queue{context}; - BuiltinOpParams builtinOpParams{}; - - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_BUFFER_RECT, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_BUFFER_RECT, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER_RECT, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_READ_IMAGE, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_WRITE_IMAGE, builtinOpParams)); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_IMAGE, builtinOpParams)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, false)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, true)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, true)); + EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, true)); + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, false)); + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, false)); + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, false)); + EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, false)); } TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { @@ -1287,103 +1245,34 @@ TEST(CommandQueue, givenLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePrefe DebugManagerStateRestore restore{}; MockContext context{}; MockCommandQueue queue{context}; - BuiltinOpParams builtinOpParams{}; - MockGraphicsAllocation srcGraphicsAllocation{}; - MockGraphicsAllocation dstGraphicsAllocation{}; - MockBuffer srcMemObj{srcGraphicsAllocation}; - MockBuffer dstMemObj{dstGraphicsAllocation}; - builtinOpParams.srcMemObj = &srcMemObj; - builtinOpParams.dstMemObj = &dstMemObj; - srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; - dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(TransferDirection::LocalToLocal)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_FALSE(queue.blitEnqueuePreferred(TransferDirection::LocalToLocal)); DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToLocal)); } TEST(CommandQueue, givenNotLocalToLocalCopyBufferCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) { DebugManagerStateRestore restore{}; MockContext context{}; MockCommandQueue queue{context}; - BuiltinOpParams builtinOpParams{}; - MockGraphicsAllocation srcGraphicsAllocation{}; - MockGraphicsAllocation dstGraphicsAllocation{}; - MockBuffer srcMemObj{srcGraphicsAllocation}; - MockBuffer dstMemObj{dstGraphicsAllocation}; - builtinOpParams.srcMemObj = &srcMemObj; - builtinOpParams.dstMemObj = &dstMemObj; - srcGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; - dstGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost)); + DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); - - srcGraphicsAllocation.memoryPool = MemoryPool::LocalMemory; - dstGraphicsAllocation.memoryPool = MemoryPool::System4KBPages; - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_COPY_BUFFER, builtinOpParams)); -} - -TEST(CommandQueue, givenLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnValueBasedOnDebugFlagAndHwPreference) { - const bool preferBlitterHw = ClHwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily).preferBlitterForLocalToLocalTransfers(); - DebugManagerStateRestore restore{}; - MockContext context{}; - MockCommandQueue queue{context}; - BuiltinOpParams builtinOpParams{}; - MockGraphicsAllocation srcSvmAlloc{}; - MockGraphicsAllocation dstSvmAlloc{}; - builtinOpParams.srcSvmAlloc = &srcSvmAlloc; - builtinOpParams.dstSvmAlloc = &dstSvmAlloc; - - srcSvmAlloc.memoryPool = MemoryPool::LocalMemory; - dstSvmAlloc.memoryPool = MemoryPool::LocalMemory; - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_EQ(preferBlitterHw, queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_FALSE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); -} - -TEST(CommandQueue, givenNotLocalToLocalSvmCopyCommandWhenCallingBlitEnqueuePreferredThenReturnTrueRegardlessOfDebugFlag) { - DebugManagerStateRestore restore{}; - MockContext context{}; - MockCommandQueue queue{context}; - BuiltinOpParams builtinOpParams{}; - MockGraphicsAllocation srcSvmAlloc{}; - MockGraphicsAllocation dstSvmAlloc{}; - builtinOpParams.srcSvmAlloc = &srcSvmAlloc; - builtinOpParams.dstSvmAlloc = &dstSvmAlloc; - - srcSvmAlloc.memoryPool = MemoryPool::System4KBPages; - dstSvmAlloc.memoryPool = MemoryPool::LocalMemory; - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - - srcSvmAlloc.memoryPool = MemoryPool::LocalMemory; - dstSvmAlloc.memoryPool = MemoryPool::System4KBPages; - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(-1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(0); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); - DebugManager.flags.PreferCopyEngineForCopyBufferToBuffer.set(1); - EXPECT_TRUE(queue.blitEnqueuePreferred(CL_COMMAND_SVM_MEMCPY, builtinOpParams)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToLocal)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::LocalToHost)); + EXPECT_TRUE(queue.blitEnqueuePreferred(TransferDirection::HostToHost)); } TEST(CommandQueue, givenCopySizeAndOffsetWhenCallingBlitEnqueueImageAllowedThenReturnCorrectValue) { @@ -1987,3 +1876,44 @@ TEST_F(MultiTileFixture, givenNotDefaultContextWithRootDeviceAndTileIdMaskWhenQu EXPECT_EQ(rootCsr->isMultiOsContextCapable(), queue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()); EXPECT_EQ(rootCsr, queue.gpgpuEngine->commandStreamReceiver); } + +TEST(TransferDirectionHelperTest, givenAllocationWhenGfxAllocToHostCalledThenReturnCorrectResult) { + MockGraphicsAllocation srcAlloc{}; + + srcAlloc.memoryPool = MemoryPool::System4KBPages; + EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromGfxAllocToHost(srcAlloc)); + + srcAlloc.memoryPool = MemoryPool::LocalMemory; + EXPECT_EQ(TransferDirection::LocalToHost, TransferDirectionHelper::fromGfxAllocToHost(srcAlloc)); +} + +TEST(TransferDirectionHelperTest, givenAllocationWhenHostToGfxAllocCalledThenReturnCorrectResult) { + MockGraphicsAllocation dstAlloc{}; + + dstAlloc.memoryPool = MemoryPool::System4KBPages; + EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromHostToGfxAlloc(dstAlloc)); + + dstAlloc.memoryPool = MemoryPool::LocalMemory; + EXPECT_EQ(TransferDirection::HostToLocal, TransferDirectionHelper::fromHostToGfxAlloc(dstAlloc)); +} + +TEST(TransferDirectionHelperTest, givenAllocationWhenGfxAllocToGfxAllocCalledThenReturnCorrectResult) { + MockGraphicsAllocation srcAlloc{}; + MockGraphicsAllocation dstAlloc{}; + + srcAlloc.memoryPool = MemoryPool::System4KBPages; + dstAlloc.memoryPool = MemoryPool::System4KBPages; + EXPECT_EQ(TransferDirection::HostToHost, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc)); + + srcAlloc.memoryPool = MemoryPool::LocalMemory; + dstAlloc.memoryPool = MemoryPool::System4KBPages; + EXPECT_EQ(TransferDirection::LocalToHost, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc)); + + srcAlloc.memoryPool = MemoryPool::System4KBPages; + dstAlloc.memoryPool = MemoryPool::LocalMemory; + EXPECT_EQ(TransferDirection::HostToLocal, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc)); + + srcAlloc.memoryPool = MemoryPool::LocalMemory; + dstAlloc.memoryPool = MemoryPool::LocalMemory; + EXPECT_EQ(TransferDirection::LocalToLocal, TransferDirectionHelper::fromGfxAllocToGfxAlloc(srcAlloc, dstAlloc)); +} diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp index 2ce477ea24..3e41c8d66f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp @@ -212,6 +212,7 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne) HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImageThenBlitEnqueueImageAllowedReturnsCorrectResult) { auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + hwInfo->capabilityTable.blitterOperationsSupported = true; REQUIRE_BLITTER_OR_SKIP(hwInfo); DebugManagerStateRestore restorer; @@ -219,7 +220,6 @@ HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImage DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1); - hwInfo->capabilityTable.blitterOperationsSupported = true; size_t srcOrigin[] = {0, 0, 0}; size_t dstOrigin[] = {0, 0, 0}; diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 11fc2782d3..a0f426b727 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -875,12 +875,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -891,7 +891,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) { @@ -920,12 +920,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -938,7 +938,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenCallWait) { @@ -967,12 +967,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC cmdQ->enqueueWriteBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -985,7 +985,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC cmdQ->enqueueWriteBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) { @@ -1008,12 +1008,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa void *hostPtr = reinterpret_cast(0x12340000); cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); bool tempAllocationFound = false; - auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); while (tempAllocation) { if (tempAllocation->getUnderlyingBuffer() == hostPtr) { tempAllocationFound = true; @@ -1024,7 +1024,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa EXPECT_TRUE(tempAllocationFound); cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCalledWhenUsingBcsThenCallWait) { @@ -1046,16 +1046,16 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM); } @@ -1078,17 +1078,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields()); cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(0u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM); } @@ -1112,17 +1112,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); - EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); - auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead(); + auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead(); EXPECT_EQ(1u, tempAlloc->countSuccessors()); EXPECT_EQ(pSrcSVM.get(), reinterpret_cast(tempAlloc->getGpuAddress())); EXPECT_EQ(pDstSVM.get(), reinterpret_cast(tempAlloc->next->getGpuAddress())); cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr); - EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) {