From a03b5f8a95599bab76db70beaf1c0f0f7574efea Mon Sep 17 00:00:00 2001
From: Maciej Dziuban <maciej.dziuban@intel.com>
Date: Thu, 26 Aug 2021 11:53:25 +0000
Subject: [PATCH] Create single point for selecting engine for builtin ops

- selectCsrForBuiltinOperation selects proper CSR
- selected CSR is passed to dispatchBcsOrGpgpuEnqueue
- Notifications such as notifyEnqueueReadBuffer are now made on correct engine
- Temporary allocs for hostptrs are now created on gpgpuCsr

Related-To: NEO-6057
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
---
 opencl/source/command_queue/command_queue.cpp |  37 ++++--
 opencl/source/command_queue/command_queue.h   |   6 +-
 .../source/command_queue/command_queue_hw.h   |   7 +-
 opencl/source/command_queue/enqueue_common.h  |  24 ++--
 .../command_queue/enqueue_copy_buffer.h       |   5 +-
 .../command_queue/enqueue_copy_buffer_rect.h  |   4 +-
 .../source/command_queue/enqueue_copy_image.h |   6 +-
 .../command_queue/enqueue_read_buffer.h       |  18 +--
 .../command_queue/enqueue_read_buffer_rect.h  |   7 +-
 .../source/command_queue/enqueue_read_image.h |  14 +--
 opencl/source/command_queue/enqueue_svm.h     |  36 +++---
 .../command_queue/enqueue_write_buffer.h      |   8 +-
 .../command_queue/enqueue_write_buffer_rect.h |   7 +-
 .../command_queue/enqueue_write_image.h       |   7 +-
 opencl/source/helpers/properties_helper.cpp   |   4 +-
 opencl/source/kernel/kernel.cpp               |   2 +-
 .../command_queue/blit_enqueue_tests.cpp      |   1 +
 .../command_queue/command_queue_hw_tests.cpp  |   2 +-
 .../command_queue/command_queue_tests.cpp     | 113 ++++++++++++++----
 .../enqueue_copy_image_tests.cpp              |   6 +-
 .../enqueue_read_image_tests.cpp              |   6 +-
 .../enqueue_svm_mem_copy_tests.cpp            |   5 -
 .../enqueue_write_image_tests.cpp             |   6 +-
 ...and_stream_receiver_flush_task_4_tests.cpp |  26 ++--
 .../libult/ult_command_stream_receiver.h      |   7 ++
 .../unit_test/mem_obj/buffer_bcs_tests.cpp    |  87 +++++++-------
 .../test/unit_test/mocks/mock_command_queue.h |   4 +-
 shared/source/helpers/vec.h                   |   2 +-
 28 files changed, 272 insertions(+), 185 deletions(-)

diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp
index 5cb01818e7..bff1a8d698 100644
--- a/opencl/source/command_queue/command_queue.cpp
+++ b/opencl/source/command_queue/command_queue.cpp
@@ -144,13 +144,17 @@ CommandStreamReceiver *CommandQueue::getBcsForAuxTranslation() const {
     return nullptr;
 }
 
-CommandStreamReceiver &CommandQueue::getCommandStreamReceiver(bool blitAllowed) const {
-    if (blitAllowed) {
-        auto csr = getBcsCommandStreamReceiver();
-        UNRECOVERABLE_IF(!csr);
-        return *csr;
+CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const {
+    const bool blitAllowed = blitEnqueueAllowed(cmdType, dispatchInfo.peekBuiltinOpParams());
+    const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
+    const bool blitRequired = isCopyOnly;
+    const bool blit = blitAllowed && (blitPreferred || blitRequired);
+
+    if (blit) {
+        return *bcsEngine->commandStreamReceiver;
+    } else {
+        return getGpgpuCommandStreamReceiver();
     }
-    return getGpgpuCommandStreamReceiver();
 }
 
 Device &CommandQueue::getDevice() const noexcept {
@@ -721,12 +725,17 @@ bool CommandQueue::queueDependenciesClearRequired() const {
     return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
 }
 
-bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
-    auto blitterSupported = bcsEngine != nullptr;
+bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const {
+    if (bcsEngine == nullptr) {
+        return false;
+    }
 
-    bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
+    bool allowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
     if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
-        blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
+        allowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
+    }
+    if (!allowed) {
+        return false;
     }
 
     switch (cmdType) {
@@ -737,10 +746,14 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
     case CL_COMMAND_WRITE_BUFFER_RECT:
     case CL_COMMAND_COPY_BUFFER_RECT:
     case CL_COMMAND_SVM_MEMCPY:
+        return true;
     case CL_COMMAND_READ_IMAGE:
+        return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj));
     case CL_COMMAND_WRITE_IMAGE:
+        return blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
     case CL_COMMAND_COPY_IMAGE:
-        return blitterSupported && blitEnqueueAllowed;
+        return blitEnqueueImageAllowed(&params.srcOffset[0], &params.size[0], *static_cast<Image *>(params.srcMemObj)) &&
+               blitEnqueueImageAllowed(&params.dstOffset[0], &params.size[0], *static_cast<Image *>(params.dstMemObj));
     default:
         return false;
     }
@@ -771,7 +784,7 @@ bool CommandQueue::blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOp
     return true;
 }
 
-bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) {
+bool CommandQueue::blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const {
     const auto &hwInfo = device->getHardwareInfo();
     const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
     auto blitEnqueueImageAllowed = hwHelper.isBlitterForImagesSupported(hwInfo);
diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h
index 59e86ab098..da79c5cef3 100644
--- a/opencl/source/command_queue/command_queue.h
+++ b/opencl/source/command_queue/command_queue.h
@@ -224,7 +224,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
     MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const;
     CommandStreamReceiver *getBcsCommandStreamReceiver() const;
     CommandStreamReceiver *getBcsForAuxTranslation() const;
-    MOCKABLE_VIRTUAL CommandStreamReceiver &getCommandStreamReceiver(bool blitAllowed) const;
+    MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(cl_command_type cmdType, const MultiDispatchInfo &dispatchInfo) const;
     Device &getDevice() const noexcept;
     ClDevice &getClDevice() const { return *device; }
     Context &getContext() const { return *context; }
@@ -353,9 +353,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
                               cl_uint numEventsInWaitList, const cl_event *eventWaitList);
     void providePerformanceHint(TransferProperties &transferProperties);
     bool queueDependenciesClearRequired() const;
-    bool blitEnqueueAllowed(cl_command_type cmdType) const;
+    bool blitEnqueueAllowed(cl_command_type cmdType, const BuiltinOpParams &params) const;
     bool blitEnqueuePreferred(cl_command_type cmdType, const BuiltinOpParams &builtinOpParams) const;
-    MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image);
+    MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
     void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
     virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
     void waitForLatestTaskCount();
diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h
index b57384a963..7ac442a11e 100644
--- a/opencl/source/command_queue/command_queue_hw.h
+++ b/opencl/source/command_queue/command_queue_hw.h
@@ -365,10 +365,13 @@ class CommandQueueHw : public CommandQueue {
                         cl_event *event);
 
     template <uint32_t cmdType, size_t surfaceCount>
-    void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed);
+    void dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount],
+                                   EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event,
+                                   bool blocking, CommandStreamReceiver &csr);
 
     template <uint32_t cmdType>
-    void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking);
+    void enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList,
+                     cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
 
     template <uint32_t commandType>
     CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h
index 5a7f415b84..4d668008b0 100644
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@@ -1159,9 +1159,9 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
 
 template <typename GfxFamily>
 template <uint32_t cmdType>
-void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking) {
+void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList,
+                                            const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
     auto commandStreamRecieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
-    auto bcsCsr = getBcsCommandStreamReceiver();
 
     EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
     EventBuilder eventBuilder;
@@ -1187,8 +1187,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
     BlitPropertiesContainer blitPropertiesContainer;
     CsrDependencies csrDeps;
 
-    eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsr, CsrDependencies::DependenciesType::All);
-    auto allocator = bcsCsr->getTimestampPacketAllocator();
+    eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
+    auto allocator = bcsCsr.getTimestampPacketAllocator();
 
     if (isCacheFlushForBcsRequired() && isGpgpuSubmissionForBcsRequired(blockQueue)) {
         timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
@@ -1198,7 +1198,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
         timestampPacketDependencies.barrierNodes.add(allocator->getTag());
     }
 
-    obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, *bcsCsr);
+    obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
     csrDeps.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);
 
     LinearStream *gpgpuCommandStream = {};
@@ -1212,7 +1212,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
         eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
     }
 
-    blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(*bcsCsr, multiDispatchInfo, timestampPacketDependencies,
+    blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
                                                                     eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
 
     CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
@@ -1222,7 +1222,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
     if (!blockQueue) {
         completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
                                                       enqueueProperties, timestampPacketDependencies, eventsRequest,
-                                                      eventBuilder, taskLevel, csrDeps, bcsCsr);
+                                                      eventBuilder, taskLevel, csrDeps, &bcsCsr);
 
         if (eventBuilder.getEvent()) {
             eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
@@ -1248,13 +1248,11 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
 
 template <typename GfxFamily>
 template <uint32_t cmdType, size_t surfaceCount>
-void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, bool blitAllowed) {
-    const bool blitPreferred = blitEnqueuePreferred(cmdType, dispatchInfo.peekBuiltinOpParams());
-    const bool blitRequired = isCopyOnly;
-    const bool blit = blitAllowed && (blitPreferred || blitRequired);
-
+void CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation,
+                                                          cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr) {
+    const bool blit = EngineHelpers::isBcs(csr.getOsContext().getEngineType());
     if (blit) {
-        enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking);
+        enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
     } else {
         auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(builtInOperation,
                                                                                 this->getClDevice());
diff --git a/opencl/source/command_queue/enqueue_copy_buffer.h b/opencl/source/command_queue/enqueue_copy_buffer.h
index a42424da1e..a152605978 100644
--- a/opencl/source/command_queue/enqueue_copy_buffer.h
+++ b/opencl/source/command_queue/enqueue_copy_buffer.h
@@ -47,8 +47,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
     MemObjSurface s1(srcBuffer);
     MemObjSurface s2(dstBuffer);
     Surface *surfaces[] = {&s1, &s2};
-    auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER);
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
+
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER>(dispatchInfo, surfaces, eBuiltInOpsType, numEventsInWaitList, eventWaitList, event, false, csr);
 
     return CL_SUCCESS;
 }
diff --git a/opencl/source/command_queue/enqueue_copy_buffer_rect.h b/opencl/source/command_queue/enqueue_copy_buffer_rect.h
index 794c237c98..45f4e2cf6f 100644
--- a/opencl/source/command_queue/enqueue_copy_buffer_rect.h
+++ b/opencl/source/command_queue/enqueue_copy_buffer_rect.h
@@ -54,8 +54,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
     dc.dstSlicePitch = dstSlicePitch;
 
     MultiDispatchInfo dispatchInfo(dc);
-    auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT);
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_BUFFER_RECT, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, false, csr);
 
     return CL_SUCCESS;
 }
diff --git a/opencl/source/command_queue/enqueue_copy_image.h b/opencl/source/command_queue/enqueue_copy_image.h
index e99aaecf29..e06014b988 100644
--- a/opencl/source/command_queue/enqueue_copy_image.h
+++ b/opencl/source/command_queue/enqueue_copy_image.h
@@ -50,10 +50,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyImage(
     }
 
     MultiDispatchInfo dispatchInfo(dc);
-    cl_command_type cmdType = CL_COMMAND_COPY_IMAGE;
-    auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(srcOrigin, region, *srcImage) && blitEnqueueImageAllowed(dstOrigin, region, *dstImage);
-
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_COPY_IMAGE, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_COPY_IMAGE>(dispatchInfo, surfaces, EBuiltInOps::CopyImageToImage3d, numEventsInWaitList, eventWaitList, event, false, csr);
 
     return CL_SUCCESS;
 }
diff --git a/opencl/source/command_queue/enqueue_read_buffer.h b/opencl/source/command_queue/enqueue_read_buffer.h
index de13138549..f440be0e03 100644
--- a/opencl/source/command_queue/enqueue_read_buffer.h
+++ b/opencl/source/command_queue/enqueue_read_buffer.h
@@ -36,12 +36,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
     cl_event *event) {
 
     const cl_command_type cmdType = CL_COMMAND_READ_BUFFER;
-    auto blitAllowed = blitEnqueueAllowed(cmdType);
-    auto &csr = getCommandStreamReceiver(blitAllowed);
-
-    if (nullptr == mapAllocation) {
-        notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
-    }
 
     auto rootDeviceIndex = getDevice().getRootDeviceIndex();
     bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
@@ -67,6 +61,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
     }
 
     if (isCpuCopyAllowed) {
+        if (nullptr == mapAllocation) {
+            notifyEnqueueReadBuffer(buffer, !!blockingRead, false);
+        }
         if (isMemTransferNeeded) {
             return enqueueReadWriteBufferOnCpuWithMemoryTransfer(cmdType, buffer, offset, size, ptr,
                                                                  numEventsInWaitList, eventWaitList, event);
@@ -102,7 +99,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
     } else {
         surfaces[1] = &hostPtrSurf;
         if (size != 0) {
-            bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
@@ -128,7 +125,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
             context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_ENQUEUE_READ_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, ptr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
         }
     }
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
+
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
+    if (nullptr == mapAllocation) {
+        notifyEnqueueReadBuffer(buffer, !!blockingRead, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
+    }
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
 
     return CL_SUCCESS;
 }
diff --git a/opencl/source/command_queue/enqueue_read_buffer_rect.h b/opencl/source/command_queue/enqueue_read_buffer_rect.h
index 82bf4e4bc8..f6921b2428 100644
--- a/opencl/source/command_queue/enqueue_read_buffer_rect.h
+++ b/opencl/source/command_queue/enqueue_read_buffer_rect.h
@@ -59,13 +59,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
     MemObjSurface bufferSurf(buffer);
     HostPtrSurface hostPtrSurf(dstPtr, hostPtrSize);
     Surface *surfaces[] = {&bufferSurf, &hostPtrSurf};
-    auto blitAllowed = blitEnqueueAllowed(cmdType);
 
     if (region[0] != 0 &&
         region[1] != 0 &&
         region[2] != 0) {
-        auto &csr = getCommandStreamReceiver(blitAllowed);
-        bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
+        bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
         if (!status) {
             return CL_OUT_OF_RESOURCES;
         }
@@ -89,7 +87,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
     dc.dstSlicePitch = hostSlicePitch;
 
     MultiDispatchInfo dispatchInfo(dc);
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER_RECT, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead, csr);
 
     if (context->isProvidingPerformanceHints()) {
         context->providePerformanceHintForMemoryTransfer(CL_COMMAND_READ_BUFFER_RECT, true, static_cast<cl_mem>(buffer), ptr);
diff --git a/opencl/source/command_queue/enqueue_read_image.h b/opencl/source/command_queue/enqueue_read_image.h
index d57b2697d9..c33894f486 100644
--- a/opencl/source/command_queue/enqueue_read_image.h
+++ b/opencl/source/command_queue/enqueue_read_image.h
@@ -41,12 +41,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
     const cl_event *eventWaitList,
     cl_event *event) {
     cl_command_type cmdType = CL_COMMAND_READ_IMAGE;
-    auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *srcImage);
-    auto &csr = getCommandStreamReceiver(blitAllowed);
-
-    if (nullptr == mapAllocation) {
-        notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
-    }
 
     auto isMemTransferNeeded = true;
     if (srcImage->isMemObjZeroCopy()) {
@@ -78,7 +72,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
         if (region[0] != 0 &&
             region[1] != 0 &&
             region[2] != 0) {
-            bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
@@ -105,7 +99,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
     auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
     MultiDispatchInfo dispatchInfo(dc);
 
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_IMAGE, dispatchInfo);
+    if (nullptr == mapAllocation) {
+        notifyEnqueueReadImage(srcImage, static_cast<bool>(blockingRead), EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
+    }
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingRead == CL_TRUE, csr);
 
     if (context->isProvidingPerformanceHints()) {
         if (!isL3Capable(ptr, hostPtrSize)) {
diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h
index 2ad4f3df8a..d7e934d401 100644
--- a/opencl/source/command_queue/enqueue_svm.h
+++ b/opencl/source/command_queue/enqueue_svm.h
@@ -126,8 +126,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMap(cl_bool blockingMap,
         dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
 
         MultiDispatchInfo dispatchInfo(dc);
-        auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, blitAllowed);
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, blocking, csr);
 
         if (event) {
             castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_MAP);
@@ -210,8 +210,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMUnmap(void *svmPtr,
         dc.unifiedMemoryArgsRequireMemSync = externalAppCall;
 
         MultiDispatchInfo dispatchInfo(dc);
-        auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_READ_BUFFER);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, blitAllowed);
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, EBuiltInOps::CopyBufferToBuffer, numEventsInWaitList, eventWaitList, event, false, csr);
 
         if (event) {
             castToObjectOrAbort<Event>(*event)->setCmdType(CL_COMMAND_SVM_UNMAP);
@@ -330,32 +330,29 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
     if (copyType == SvmToHost) {
         GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
         HostPtrSurface dstHostPtrSurf(dstPtr, size);
-        cmdType = CL_COMMAND_READ_BUFFER;
-        auto blitAllowed = blitEnqueueAllowed(cmdType);
         if (size != 0) {
-            auto &csr = getCommandStreamReceiver(blitAllowed);
-            bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
             dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
-            notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
         }
         setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation());
         surfaces[0] = &srcSvmSurf;
         surfaces[1] = &dstHostPtrSurf;
 
         dispatchInfo.setBuiltinOpParams(operationParams);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
+
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_READ_BUFFER, dispatchInfo);
+        notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
 
     } else if (copyType == HostToSvm) {
         HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
         GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
         cmdType = CL_COMMAND_WRITE_BUFFER;
-        auto blitAllowed = blitEnqueueAllowed(cmdType);
         if (size != 0) {
-            auto &csr = getCommandStreamReceiver(blitAllowed);
-            bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
@@ -367,7 +364,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
         surfaces[1] = &srcHostPtrSurf;
 
         dispatchInfo.setBuiltinOpParams(operationParams);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
 
     } else if (copyType == SvmToSvm) {
         GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
@@ -378,16 +376,15 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
         surfaces[1] = &dstSvmSurf;
 
         dispatchInfo.setBuiltinOpParams(operationParams);
-        auto blitAllowed = blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_SVM_MEMCPY, dispatchInfo);
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_SVM_MEMCPY>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
 
     } else {
         HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
         HostPtrSurface dstHostPtrSurf(dstPtr, size);
         cmdType = CL_COMMAND_WRITE_BUFFER;
-        auto blitAllowed = blitEnqueueAllowed(cmdType);
         if (size != 0) {
-            auto &csr = getCommandStreamReceiver(blitAllowed);
+            auto &csr = getGpgpuCommandStreamReceiver();
             bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
             status &= csr.createAllocationForHostSurface(dstHostPtrSurf, true);
             if (!status) {
@@ -401,7 +398,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
         surfaces[1] = &dstHostPtrSurf;
 
         dispatchInfo.setBuiltinOpParams(operationParams);
-        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, blitAllowed);
+        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
+        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
     }
     if (event) {
         auto pEvent = castToObjectOrAbort<Event>(*event);
diff --git a/opencl/source/command_queue/enqueue_write_buffer.h b/opencl/source/command_queue/enqueue_write_buffer.h
index 44595fdf73..d516ee302e 100644
--- a/opencl/source/command_queue/enqueue_write_buffer.h
+++ b/opencl/source/command_queue/enqueue_write_buffer.h
@@ -82,7 +82,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
     MemObjSurface bufferSurf(buffer);
     GeneralSurface mapSurface;
     Surface *surfaces[] = {&bufferSurf, nullptr};
-    auto blitAllowed = blitEnqueueAllowed(cmdType);
 
     if (mapAllocation) {
         surfaces[1] = &mapSurface;
@@ -95,9 +94,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
     } else {
         surfaces[1] = &hostPtrSurf;
         if (size != 0) {
-
-            auto &csr = getCommandStreamReceiver(blitAllowed);
-            bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
@@ -116,7 +113,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
     dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
 
     MultiDispatchInfo dispatchInfo(dc);
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
 
     if (context->isProvidingPerformanceHints()) {
         context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
diff --git a/opencl/source/command_queue/enqueue_write_buffer_rect.h b/opencl/source/command_queue/enqueue_write_buffer_rect.h
index 0ee8cfb618..af1577469b 100644
--- a/opencl/source/command_queue/enqueue_write_buffer_rect.h
+++ b/opencl/source/command_queue/enqueue_write_buffer_rect.h
@@ -58,13 +58,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
     MemObjSurface dstBufferSurf(buffer);
     HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
     Surface *surfaces[] = {&dstBufferSurf, &hostPtrSurf};
-    auto blitAllowed = blitEnqueueAllowed(cmdType);
 
     if (region[0] != 0 &&
         region[1] != 0 &&
         region[2] != 0) {
-        auto &csr = getCommandStreamReceiver(blitAllowed);
-        bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
+        bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
         if (!status) {
             return CL_OUT_OF_RESOURCES;
         }
@@ -88,7 +86,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
     dc.dstSlicePitch = bufferSlicePitch;
 
     MultiDispatchInfo dispatchInfo(dc);
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_BUFFER_RECT, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER_RECT>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite, csr);
 
     if (context->isProvidingPerformanceHints()) {
         context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_BUFFER_RECT_REQUIRES_COPY_DATA, static_cast<cl_mem>(buffer));
diff --git a/opencl/source/command_queue/enqueue_write_image.h b/opencl/source/command_queue/enqueue_write_image.h
index 1700c28cbc..859d467ab0 100644
--- a/opencl/source/command_queue/enqueue_write_image.h
+++ b/opencl/source/command_queue/enqueue_write_image.h
@@ -54,7 +54,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
     HostPtrSurface hostPtrSurf(srcPtr, hostPtrSize, true);
     GeneralSurface mapSurface;
     Surface *surfaces[] = {&dstImgSurf, nullptr};
-    auto blitAllowed = blitEnqueueAllowed(cmdType) && blitEnqueueImageAllowed(origin, region, *dstImage);
     if (mapAllocation) {
         surfaces[1] = &mapSurface;
         mapSurface.setGraphicsAllocation(mapAllocation);
@@ -66,8 +65,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
         if (region[0] != 0 &&
             region[1] != 0 &&
             region[2] != 0) {
-            auto &csr = getCommandStreamReceiver(blitAllowed);
-            bool status = csr.createAllocationForHostSurface(hostPtrSurf, false);
+            bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, false);
             if (!status) {
                 return CL_OUT_OF_RESOURCES;
             }
@@ -94,7 +92,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteImage(
     auto eBuiltInOps = EBuiltInOps::CopyBufferToImage3d;
     MultiDispatchInfo dispatchInfo(dc);
 
-    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, blitAllowed);
+    CommandStreamReceiver &csr = selectCsrForBuiltinOperation(CL_COMMAND_WRITE_IMAGE, dispatchInfo);
+    dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_IMAGE>(dispatchInfo, surfaces, eBuiltInOps, numEventsInWaitList, eventWaitList, event, blockingWrite == CL_TRUE, csr);
 
     if (context->isProvidingPerformanceHints()) {
         context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, CL_ENQUEUE_WRITE_IMAGE_REQUIRES_COPY_DATA, static_cast<cl_mem>(dstImage));
diff --git a/opencl/source/helpers/properties_helper.cpp b/opencl/source/helpers/properties_helper.cpp
index 1cf01e8f02..c2c8be3e5f 100644
--- a/opencl/source/helpers/properties_helper.cpp
+++ b/opencl/source/helpers/properties_helper.cpp
@@ -54,11 +54,11 @@ void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &cs
 
         if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
             auto taskCountPreviousRootDevice = event->peekTaskCount();
-            auto tagAddressPreviousRootDevice = event->getCommandQueue()->getCommandStreamReceiver(false).getTagAddress();
+            auto tagAddressPreviousRootDevice = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagAddress();
 
             csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast<uint64_t>(tagAddressPreviousRootDevice)});
 
-            auto graphicsAllocation = event->getCommandQueue()->getCommandStreamReceiver(false).getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
+            auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
             currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
         }
     }
diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp
index 8b706ed978..350fb29305 100644
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@@ -1065,7 +1065,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
                                                                 hwHelper.getBarriersCountFromHasBarriers(barrierCount),
                                                                 workDim,
                                                                 localWorkSize);
-    auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced();
+    auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced();
     maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
     return maxWorkGroupCount;
 }
diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp
index 95a63c032b..dd95806611 100644
--- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp
+++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp
@@ -87,6 +87,7 @@ struct BlitEnqueueTests : public ::testing::Test {
         auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable;
         bool createBcsEngine = !capabilityTable.blitterOperationsSupported;
         capabilityTable.blitterOperationsSupported = true;
+        REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo());
 
         if (createBcsEngine) {
             auto &engine = device->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority);
diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
index 2350f28717..63d78b27bd 100644
--- a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
@@ -197,7 +197,7 @@ HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenProcessDispatchForMarkerCalled
     MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
     auto myCmdQ = std::make_unique<MockCommandQueueHwWithOverwrittenCsr<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr, false);
     myCmdQ->csr = &csr;
-    csr.osContext = &pCmdQ->getCommandStreamReceiver(false).getOsContext();
+    csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext();
     std::unique_ptr<Event> event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
     ASSERT_NE(nullptr, event);
 
diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
index 5f7864ed48..d2ee6e1f65 100644
--- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
@@ -228,7 +228,46 @@ TEST(CommandQueue, givenDeviceWhenCreatingCommandQueueThenPickCsrFromDefaultEngi
     EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver());
 }
 
-struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t> {};
+struct BuiltinOpParamsFixture {
+    BuiltinOpParams createParams(cl_command_type cmdType) {
+        BuiltinOpParams params{};
+        switch (cmdType) {
+        case CL_COMMAND_READ_IMAGE:
+            params.srcMemObj = &correctImage;
+            params.srcOffset = correctOrigin;
+            params.size = correctRegion;
+            break;
+        case CL_COMMAND_WRITE_IMAGE:
+            params.dstMemObj = &correctImage;
+            params.dstOffset = correctOrigin;
+            params.size = correctRegion;
+            break;
+        case CL_COMMAND_COPY_IMAGE:
+            params.srcMemObj = &correctImage;
+            params.dstMemObj = &correctImage;
+            params.srcOffset = correctOrigin;
+            params.dstOffset = correctOrigin;
+            params.size = correctRegion;
+            break;
+        case CL_COMMAND_COPY_BUFFER:
+            params.srcMemObj = &correctBuffer;
+            params.dstMemObj = &correctBuffer;
+            break;
+        case CL_COMMAND_SVM_MEMCPY:
+            params.srcSvmAlloc = correctBuffer.getGraphicsAllocation(0);
+            params.dstSvmAlloc = correctBuffer.getGraphicsAllocation(0);
+            break;
+        }
+        return params;
+    }
+
+    size_t correctRegion[3] = {10u, 10u, 0};
+    size_t correctOrigin[3] = {1u, 1u, 0};
+    MockImageBase correctImage = {};
+    MockBuffer correctBuffer = {};
+};
+
+struct CommandQueueWithBlitOperationsTests : public ::testing::TestWithParam<uint32_t>, BuiltinOpParamsFixture {};
 
 TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperationsWhenQueueIsCreatedThenDontRegisterBcsCsr) {
     HardwareInfo hwInfo = *defaultHwInfo;
@@ -242,8 +281,9 @@ TEST_P(CommandQueueWithBlitOperationsTests, givenDeviceNotSupportingBlitOperatio
     auto defaultCsr = mockDevice->getDefaultEngine().commandStreamReceiver;
     EXPECT_EQ(defaultCsr, &cmdQ.getGpgpuCommandStreamReceiver());
 
-    auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
-    EXPECT_EQ(defaultCsr, &cmdQ.getCommandStreamReceiver(blitAllowed));
+    BuiltinOpParams params = createParams(cmdType);
+    MultiDispatchInfo dispatchInfo{params};
+    EXPECT_EQ(defaultCsr, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo));
 }
 
 HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsCreatedThenBcsIsTakenFromFirstSubDevice) {
@@ -264,12 +304,13 @@ HWTEST_P(CommandQueueWithBlitOperationsTests, givenDeviceWithSubDevicesSupportin
 
     MockCommandQueue cmdQ(nullptr, device.get(), 0, false);
     auto cmdType = GetParam();
-    auto blitAllowed = cmdQ.blitEnqueueAllowed(cmdType);
+    BuiltinOpParams params = createParams(cmdType);
+    MultiDispatchInfo dispatchInfo{params};
 
     EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver());
     EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver());
-    EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.getCommandStreamReceiver(blitAllowed));
-    EXPECT_EQ(bcsEngine.osContext, &cmdQ.getCommandStreamReceiver(blitAllowed).getOsContext());
+    EXPECT_EQ(bcsEngine.commandStreamReceiver, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo));
+    EXPECT_EQ(bcsEngine.osContext, &cmdQ.selectCsrForBuiltinOperation(cmdType, dispatchInfo).getOsContext());
 }
 
 INSTANTIATE_TEST_CASE_P(uint32_t,
@@ -1165,13 +1206,18 @@ TEST(CommandQueue, givenCopyOnlyQueueWhenCallingBlitEnqueueAllowedThenReturnTrue
 
     queue.isCopyOnly = false;
     EXPECT_EQ(queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled(),
-              queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
+              queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
 
     queue.isCopyOnly = true;
-    EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
+    EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
 }
 
-TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
+struct CommandQueueBuiltinTest : BuiltinOpParamsFixture, ::testing::Test {};
+
+TEST_F(CommandQueueBuiltinTest, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
+    DebugManagerStateRestore restore{};
+    DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
+
     MockContext context{};
 
     MockCommandQueue queue(&context, context.getDevice(0), 0, false);
@@ -1181,17 +1227,44 @@ TEST(CommandQueue, givenClCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectV
 
     bool supported = queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled();
 
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE));
-    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE));
-    EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_BUFFER_RECT, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_BUFFER_RECT, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_BUFFER_RECT, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY, {}));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_READ_IMAGE, createParams(CL_COMMAND_READ_IMAGE)));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_WRITE_IMAGE, createParams(CL_COMMAND_WRITE_IMAGE)));
+    EXPECT_EQ(supported, queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, createParams(CL_COMMAND_COPY_IMAGE)));
+    EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE_TO_BUFFER, {}));
+}
+
+TEST_F(CommandQueueBuiltinTest, givenCopyImageCommandWhenCallingBlitEnqueueAllowedThenReturnCorrectValue) {
+    DebugManagerStateRestore restore{};
+    DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
+
+    MockContext context{};
+
+    MockCommandQueue queue(&context, context.getDevice(0), 0, false);
+    if (!queue.bcsEngine) {
+        queue.bcsEngine = &context.getDevice(0)->getDefaultEngine();
+    }
+    if (!queue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
+        GTEST_SKIP();
+    }
+
+    auto builtinOpParams = createParams(CL_COMMAND_COPY_IMAGE);
+    EXPECT_TRUE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
+
+    builtinOpParams.srcOffset[0] = 0x9999;
+    EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
+
+    builtinOpParams.dstOffset[0] = 0x9999;
+    EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
+
+    builtinOpParams.srcOffset = correctOrigin;
+    EXPECT_FALSE(queue.blitEnqueueAllowed(CL_COMMAND_COPY_IMAGE, builtinOpParams));
 }
 
 TEST(CommandQueue, givenRegularClCommandWhenCallingBlitEnqueuePreferredThenReturnCorrectValue) {
diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp
index 99ee975b7e..2ce477ea24 100644
--- a/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_copy_image_tests.cpp
@@ -210,13 +210,15 @@ HWTEST_F(EnqueueCopyImageTest, WhenCopyingImageThenNumberOfPipelineSelectsIsOne)
 }
 
 HWTEST_F(EnqueueCopyImageTest, givenDeviceWithBlitterSupportWhenEnqueueCopyImageThenBlitEnqueueImageAllowedReturnsCorrectResult) {
+    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
+    auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
+    REQUIRE_BLITTER_OR_SKIP(hwInfo);
+
     DebugManagerStateRestore restorer;
     DebugManager.flags.OverrideInvalidEngineWithDefault.set(1);
     DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
     DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
 
-    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
-    auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
     hwInfo->capabilityTable.blitterOperationsSupported = true;
     size_t srcOrigin[] = {0, 0, 0};
     size_t dstOrigin[] = {0, 0, 0};
diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
index 018a97ca99..49e2df69ce 100644
--- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp
@@ -761,14 +761,16 @@ HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSam
 }
 
 HWTEST_F(EnqueueReadImageTest, givenDeviceWithBlitterSupportWhenEnqueueReadImageThenBlitEnqueueImageAllowedReturnsCorrectResult) {
+    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
+    hwInfo->capabilityTable.blitterOperationsSupported = true;
+    REQUIRE_BLITTER_OR_SKIP(hwInfo);
+
     DebugManagerStateRestore restorer;
     DebugManager.flags.OverrideInvalidEngineWithDefault.set(1);
     DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
     DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
 
-    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
     auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
-    hwInfo->capabilityTable.blitterOperationsSupported = true;
     size_t origin[] = {0, 0, 0};
     auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
     std::unique_ptr<Image> image(Image2dHelper<>::create(context));
diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp
index f52079bb28..b7be2a1df3 100644
--- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp
@@ -353,11 +353,6 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenCommandQueueWhenEnqueueSVMMemcpyIsCalledThe
     EXPECT_EQ(CL_SUCCESS, retVal);
     EXPECT_TRUE(mockCmdQ->notifyEnqueueSVMMemcpyCalled);
 
-    auto blitAllowed = mockCmdQ->blitEnqueueAllowed(CL_COMMAND_SVM_MEMCPY);
-
-    auto &csr = mockCmdQ->getCommandStreamReceiver(blitAllowed);
-    EXPECT_EQ(EngineHelpers::isBcs(csr.getOsContext().getEngineType()), mockCmdQ->useBcsCsrOnNotifyEnabled);
-
     alignedFree(dstHostPtr);
 }
 
diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp
index 9dfca5704c..079dd6a1df 100644
--- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp
@@ -209,14 +209,16 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueWriteImageTest, WhenWritingImageThenMediaVfeS
 }
 
 HWTEST_F(EnqueueWriteImageTest, givenDeviceWithBlitterSupportWhenEnqueueWriteImageThenBlitEnqueueImageAllowedReturnsCorrectResult) {
+    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
+    hwInfo->capabilityTable.blitterOperationsSupported = true;
+    REQUIRE_BLITTER_OR_SKIP(hwInfo);
+
     DebugManagerStateRestore restorer;
     DebugManager.flags.OverrideInvalidEngineWithDefault.set(1);
     DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
     DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
 
-    auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
     auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
-    hwInfo->capabilityTable.blitterOperationsSupported = true;
     size_t origin[] = {0, 0, 0};
     auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
     std::unique_ptr<Image> image(Image2dHelper<>::create(context));
diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
index 091e3f5d3e..280f086de4 100644
--- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
+++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp
@@ -84,11 +84,11 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
 
         auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
         EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
         EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
     }
 
     {
@@ -112,11 +112,11 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
 
         auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
         EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
         EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
     }
     alignedFree(svmPtr);
 }
@@ -187,15 +187,15 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
 
         auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
         EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
         EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
         EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
     }
 
     {
@@ -212,15 +212,15 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
 
         auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
         EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
         EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
 
         auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
         EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
     }
 
     {
@@ -246,7 +246,7 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
 
         auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
         EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
-        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getCommandStreamReceiver(false).getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
+        EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
     }
 }
 
@@ -310,7 +310,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
 
         EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
         CsrDependencies csrDeps;
-        eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getCommandStreamReceiver(false));
+        eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver());
 
         EXPECT_EQ(0u, csrDeps.taskCountContainer.size());
         EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
@@ -336,7 +336,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
 
         EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
         CsrDependencies csrDeps;
-        eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getCommandStreamReceiver(false));
+        eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
 
         EXPECT_EQ(3u, csrDeps.taskCountContainer.size());
         EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
diff --git a/opencl/test/unit_test/libult/ult_command_stream_receiver.h b/opencl/test/unit_test/libult/ult_command_stream_receiver.h
index beb764296b..5127c0acc7 100644
--- a/opencl/test/unit_test/libult/ult_command_stream_receiver.h
+++ b/opencl/test/unit_test/libult/ult_command_stream_receiver.h
@@ -272,6 +272,12 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
         ensureCommandBufferAllocationCalled++;
         BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize);
     }
+
+    void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override {
+        BaseClass::waitForTaskCountAndCleanTemporaryAllocationList(requiredTaskCount);
+        waitForTaskCountAndCleanAllocationListCalled++;
+    }
+
     std::vector<std::string> aubCommentMessages;
 
     BatchBuffer latestFlushedBatchBuffer = {};
@@ -281,6 +287,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
 
     LinearStream *lastFlushedCommandStream = nullptr;
 
+    uint32_t waitForTaskCountAndCleanAllocationListCalled = 0;
     uint32_t makeSurfacePackNonResidentCalled = false;
     uint32_t latestSentTaskCountValueDuringFlush = 0;
     uint32_t blitBufferCalled = 0;
diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
index f11fb5c406..ec81051594 100644
--- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp
@@ -80,10 +80,9 @@ struct BcsBufferTests : public ::testing::Test {
         void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override {
             EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled);
             EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount);
-            waitForTaskCountAndCleanAllocationListCalled++;
+            UltCommandStreamReceiver<FamilyType>::waitForTaskCountAndCleanTemporaryAllocationList(requiredTaskCount);
         }
 
-        uint32_t waitForTaskCountAndCleanAllocationListCalled = 0;
         uint32_t waitForTaskCountWithKmdNotifyFallbackCalled = 0;
         CommandStreamReceiver *gpgpuCsr = nullptr;
     };
@@ -867,7 +866,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     cl_int retVal = CL_SUCCESS;
@@ -876,12 +875,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
     void *hostPtr = reinterpret_cast<void *>(0x12340000);
 
     cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
-    EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
     bool tempAllocationFound = false;
-    auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
     while (tempAllocation) {
         if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
             tempAllocationFound = true;
@@ -892,7 +891,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
     EXPECT_TRUE(tempAllocationFound);
 
     cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 }
 
 HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) {
@@ -906,7 +905,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     cl_int retVal = CL_SUCCESS;
@@ -921,12 +920,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
     cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region,
                                 MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
                                 MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
-    EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
     bool tempAllocationFound = false;
-    auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
     while (tempAllocation) {
         if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
             tempAllocationFound = true;
@@ -939,7 +938,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCa
     cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region,
                                 MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
                                 MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 }
 
 HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenCallWait) {
@@ -953,7 +952,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     cl_int retVal = CL_SUCCESS;
@@ -968,12 +967,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
     cmdQ->enqueueWriteBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region,
                                  MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
                                  MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
-    EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
     bool tempAllocationFound = false;
-    auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
     while (tempAllocation) {
         if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
             tempAllocationFound = true;
@@ -986,7 +985,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferRectWhenUsingBcsThenC
     cmdQ->enqueueWriteBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region,
                                  MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
                                  MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 }
 
 HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) {
@@ -1000,7 +999,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     cl_int retVal = CL_SUCCESS;
@@ -1009,12 +1008,12 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
     void *hostPtr = reinterpret_cast<void *>(0x12340000);
 
     cmdQ->enqueueReadBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
-    EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
     bool tempAllocationFound = false;
-    auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAllocation = gpgpuCsr.getTemporaryAllocations().peekHead();
     while (tempAllocation) {
         if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
             tempAllocationFound = true;
@@ -1025,7 +1024,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWa
     EXPECT_TRUE(tempAllocationFound);
 
     cmdQ->enqueueReadBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 }
 
 HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCalledWhenUsingBcsThenCallWait) {
@@ -1039,7 +1038,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     auto pDstSVM = std::make_unique<char[]>(256);
@@ -1047,16 +1046,16 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingSVMMemcpyAndEnqueuReadBufferIsCa
 
     cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr);
     EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
-    auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
 
     EXPECT_EQ(0u, tempAlloc->countSuccessors());
     EXPECT_EQ(pDstSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
 
     cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM, 256, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 
     bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
 }
@@ -1072,24 +1071,24 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSrcHostPtrBlockingEnqueueSVMMemcpyAndEnq
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     auto pSrcSVM = std::make_unique<char[]>(256);
     auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields());
 
     cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr);
-    EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_EQ(0u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
-    auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
 
     EXPECT_EQ(0u, tempAlloc->countSuccessors());
     EXPECT_EQ(pSrcSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
 
     cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM.get(), 256, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 
     bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM);
 }
@@ -1105,7 +1104,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     auto pSrcSVM = std::make_unique<char[]>(256);
@@ -1113,17 +1112,17 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV
 
     cmdQ->enqueueSVMMemcpy(false, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr);
     EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
-    EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
-    EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
+    EXPECT_FALSE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
+    EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
 
-    auto tempAlloc = myMockCsr->getTemporaryAllocations().peekHead();
+    auto tempAlloc = gpgpuCsr.getTemporaryAllocations().peekHead();
 
     EXPECT_EQ(1u, tempAlloc->countSuccessors());
     EXPECT_EQ(pSrcSVM.get(), reinterpret_cast<void *>(tempAlloc->getGpuAddress()));
     EXPECT_EQ(pDstSVM.get(), reinterpret_cast<void *>(tempAlloc->next->getGpuAddress()));
 
     cmdQ->enqueueSVMMemcpy(true, pDstSVM.get(), pSrcSVM.get(), 256, 0, nullptr, nullptr);
-    EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
+    EXPECT_EQ(1u, gpgpuCsr.waitForTaskCountAndCleanAllocationListCalled);
 }
 
 HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) {
@@ -1159,7 +1158,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSV
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(256, {}, bcsMockContext->getRootDeviceIndices(), bcsMockContext->getDeviceBitfields());
@@ -1287,7 +1286,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal
 
     auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
     cmdQ->bcsEngine = &bcsEngineControl;
-    auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
+    auto &gpgpuCsr = cmdQ->getUltCommandStreamReceiver();
     myMockCsr->gpgpuCsr = &gpgpuCsr;
 
     cl_int retVal = CL_SUCCESS;
diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h
index a32b46fadd..deeb563111 100644
--- a/opencl/test/unit_test/mocks/mock_command_queue.h
+++ b/opencl/test/unit_test/mocks/mock_command_queue.h
@@ -311,7 +311,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
         return BaseClass::isCacheFlushForBcsRequired();
     }
 
-    bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) override {
+    bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const override {
         isBlitEnqueueImageAllowed = BaseClass::blitEnqueueImageAllowed(origin, region, image);
         return isBlitEnqueueImageAllowed;
     }
@@ -329,7 +329,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
     bool notifyEnqueueSVMMemcpyCalled = false;
     bool cpuDataTransferHandlerCalled = false;
     bool useBcsCsrOnNotifyEnabled = false;
-    bool isBlitEnqueueImageAllowed = false;
+    mutable bool isBlitEnqueueImageAllowed = false;
     struct OverrideReturnValue {
         bool enabled = false;
         bool returnValue = false;
diff --git a/shared/source/helpers/vec.h b/shared/source/helpers/vec.h
index 8a769e859a..98b5358d12 100644
--- a/shared/source/helpers/vec.h
+++ b/shared/source/helpers/vec.h
@@ -56,7 +56,7 @@ struct Vec3 {
         return values[i];
     }
 
-    T operator[](uint32_t i) const {
+    const T &operator[](uint32_t i) const {
         UNRECOVERABLE_IF(i > 2);
         return values[i];
     }