Improve obtaining LinearStream during enqueue call

- Move logic to enqueueHandler to cover all scenarios - Create BlockedCommandsData not only for Kernel enqueue - KernelOperation cleanup Change-Id: Ie4a673cbbc986c685996a38ab296444d38e7bbd5 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
2026-01-06 10:26:29 +08:00 · 2019-07-18 21:15:50 +02:00
parent 1460713d69
commit 95c2dcd8b0
18 changed files with 264 additions and 220 deletions
--- a/runtime/command_queue/command_queue_hw.h
+++ b/runtime/command_queue/command_queue_hw.h
@@ -337,7 +337,7 @@ class CommandQueueHw : public CommandQueue {
                        bool &blocking,
                        const MultiDispatchInfo &multiDispatchInfo,
                        TimestampPacketContainer *previousTimestampPacketNodes,
-                        KernelOperation *blockedCommandsData,
+                        std::unique_ptr<KernelOperation> &blockedCommandsData,
                        EventsRequest &eventsRequest,
                        bool slmUsed,
                        EventBuilder &externalEventBuilder,
@@ -385,6 +385,29 @@ class CommandQueueHw : public CommandQueue {
    MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
                                                 AuxTranslationDirection auxTranslationDirection);
    template <uint32_t commandType>
    LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool profilingRequired,
                                      bool perfCountersRequired, bool blitEnqueue, bool blockedQueue,
                                      const MultiDispatchInfo &multiDispatchInfo,
                                      std::unique_ptr<KernelOperation> &blockedCommandsData,
                                      Surface **surfaces, size_t numSurfaces) {
        LinearStream *commandStream = nullptr;
        if (blockedQueue && !multiDispatchInfo.empty()) {
            constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize;
            constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
            commandStream = new LinearStream();
            auto &gpgpuCsr = getGpgpuCommandStreamReceiver();
            gpgpuCsr.ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize);
            blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
        } else {
            commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
                                                                      blitEnqueue, multiDispatchInfo, surfaces, numSurfaces);
        }
        return commandStream;
    }
  private:
    bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
    void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
@@ -414,7 +437,7 @@ class CommandQueueHw : public CommandQueue {
                                   bool blockQueue,
                                   DeviceQueueHw<GfxFamily> *devQueueHw,
                                   CsrDependencies &csrDeps,
-                                   KernelOperation *&blockedCommandsData,
+                                   KernelOperation *blockedCommandsData,
                                   TimestampPacketContainer &previousTimestampPacketNodes,
                                   PreemptionMode preemption);
 };
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -165,7 +165,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
    bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
    bool perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
-    KernelOperation *blockedCommandsData = nullptr;
+    std::unique_ptr<KernelOperation> blockedCommandsData;
    std::unique_ptr<PrintfHandler> printfHandler;
    bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
    auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
@@ -227,8 +227,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        }
    }
-    auto &commandStream = getCommandStream<GfxFamily, commandType>(*this, csrDeps, profilingRequired, perfCountersRequired,
+    auto &commandStream = *obtainCommandStream<commandType>(csrDeps, profilingRequired, perfCountersRequired, blitEnqueue, blockQueue,
-                                                                   blitEnqueue, multiDispatchInfo, surfacesForResidency, numSurfaceForResidency);
+                                                            multiDispatchInfo, blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
    auto commandStreamStart = commandStream.getUsed();
    if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
@@ -241,7 +241,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
    } else if (multiDispatchInfo.empty() == false) {
        processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
-                                               hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
+                                               hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
                                               previousTimestampPacketNodes, preemption);
    } else if (isCacheFlushCommand(commandType)) {
        processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
@@ -396,7 +396,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
                                                          bool blockQueue,
                                                          DeviceQueueHw<GfxFamily> *devQueueHw,
                                                          CsrDependencies &csrDeps,
-                                                          KernelOperation *&blockedCommandsData,
+                                                          KernelOperation *blockedCommandsData,
                                                          TimestampPacketContainer &previousTimestampPacketNodes,
                                                          PreemptionMode preemption) {
    TagNode<HwPerfCounter> *hwPerfCounter = nullptr;
@@ -437,13 +437,12 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
        *this,
        multiDispatchInfo,
        csrDeps,
-        &blockedCommandsData,
+        blockedCommandsData,
        hwTimeStamps,
        hwPerfCounter,
        &previousTimestampPacketNodes,
        timestampPacketContainer.get(),
        preemption,
        blockQueue,
        commandType);
    if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
@@ -738,7 +737,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
    bool &blocking,
    const MultiDispatchInfo &multiDispatchInfo,
    TimestampPacketContainer *previousTimestampPacketNodes,
-    KernelOperation *blockedCommandsData,
+    std::unique_ptr<KernelOperation> &blockedCommandsData,
    EventsRequest &eventsRequest,
    bool slmUsed,
    EventBuilder &externalEventBuilder,
@@ -795,10 +794,9 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
            allSurfaces.push_back(surface->duplicate());
        }
        PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
        auto kernelOperation = std::unique_ptr<KernelOperation>(blockedCommandsData); // marking ownership
        auto cmd = std::make_unique<CommandComputeKernel>(
            *this,
-            std::move(kernelOperation),
+            std::move(blockedCommandsData),
            allSurfaces,
            shouldFlushDC(commandType, printfHandler.get()),
            slmUsed,
--- a/runtime/command_queue/hardware_interface.h
+++ b/runtime/command_queue/hardware_interface.h
@@ -39,14 +39,13 @@ class HardwareInterface {
        CommandQueue &commandQueue,
        const MultiDispatchInfo &multiDispatchInfo,
        const CsrDependencies &csrDependencies,
-        KernelOperation **blockedCommandsData,
+        KernelOperation *blockedCommandsData,
        TagNode<HwTimeStamps> *hwTimeStamps,
        TagNode<HwPerfCounter> *hwPerfCounter,
        TimestampPacketContainer *previousTimestampPacketNodes,
        TimestampPacketContainer *currentTimestampPacketNodes,
        PreemptionMode preemptionMode,
-        bool blockQueue,
+        uint32_t commandType);
        uint32_t commandType = 0);
    static void getDefaultDshSpace(
        const size_t &offsetInterfaceDescriptorTable,
--- a/runtime/command_queue/hardware_interface_base.inl
+++ b/runtime/command_queue/hardware_interface_base.inl
@@ -26,13 +26,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
    CommandQueue &commandQueue,
    const MultiDispatchInfo &multiDispatchInfo,
    const CsrDependencies &csrDependencies,
-    KernelOperation **blockedCommandsData,
+    KernelOperation *blockedCommandsData,
    TagNode<HwTimeStamps> *hwTimeStamps,
    TagNode<HwPerfCounter> *hwPerfCounter,
    TimestampPacketContainer *previousTimestampPacketNodes,
    TimestampPacketContainer *currentTimestampPacketNodes,
    PreemptionMode preemptionMode,
    bool blockQueue,
    uint32_t commandType) {
    LinearStream *commandStream = nullptr;
@@ -49,19 +48,11 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
    }
    // Allocate command stream and indirect heaps
-    obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockQueue, dsh, ioh, ssh);
+    bool blockedQueue = (blockedCommandsData != nullptr);
-    if (blockQueue) {
+    obtainIndirectHeaps(commandQueue, multiDispatchInfo, blockedQueue, dsh, ioh, ssh);
-        constexpr static auto additionalAllocationSize = CSRequirements::csOverfetchSize;
+    if (blockedQueue) {
-        constexpr static auto allocationSize = MemoryConstants::pageSize64k - additionalAllocationSize;
+        blockedCommandsData->setHeaps(dsh, ioh, ssh);
-        commandStream = new LinearStream();
+        commandStream = blockedCommandsData->commandStream.get();
        commandQueue.getGpgpuCommandStreamReceiver().ensureCommandBufferAllocation(*commandStream, allocationSize, additionalAllocationSize);
        using UniqueIH = std::unique_ptr<IndirectHeap>;
        *blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh),
                                                   UniqueIH(ssh), *commandQueue.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        if (parentKernel) {
            (*blockedCommandsData)->doNotFreeISH = true;
        }
    } else {
        commandStream = &commandQueue.getCS(0);
    }
--- a/runtime/helpers/CMakeLists.txt
+++ b/runtime/helpers/CMakeLists.txt
@@ -86,6 +86,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
  ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
  ${CMAKE_CURRENT_SOURCE_DIR}/task_information.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/task_information.h
  ${CMAKE_CURRENT_SOURCE_DIR}/task_information.inl
  ${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h
  ${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4.h
  ${CMAKE_CURRENT_SOURCE_DIR}/validators.cpp
--- a/runtime/helpers/task_information.cpp
+++ b/runtime/helpers/task_information.cpp
@@ -18,23 +18,14 @@
 #include "runtime/gtpin/gtpin_notify.h"
 #include "runtime/helpers/aligned_memory.h"
 #include "runtime/helpers/csr_deps.h"
 #include "runtime/helpers/task_information.inl"
 #include "runtime/mem_obj/mem_obj.h"
 #include "runtime/memory_manager/internal_allocation_storage.h"
 #include "runtime/memory_manager/surface.h"
 namespace NEO {
-KernelOperation::~KernelOperation() {
+template void KernelOperation::ResourceCleaner::operator()<LinearStream>(LinearStream *);
-    storageForAllocations.storeAllocation(std::unique_ptr<GraphicsAllocation>(dsh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
+template void KernelOperation::ResourceCleaner::operator()<IndirectHeap>(IndirectHeap *);
    if (ioh.get() == dsh.get()) {
        ioh.release();
    }
    if (ioh) {
        storageForAllocations.storeAllocation(std::unique_ptr<GraphicsAllocation>(ioh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
    }
    storageForAllocations.storeAllocation(std::unique_ptr<GraphicsAllocation>(ssh->getGraphicsAllocation()), REUSABLE_ALLOCATION);
    storageForAllocations.storeAllocation(std::unique_ptr<GraphicsAllocation>(commandStream->getGraphicsAllocation()), REUSABLE_ALLOCATION);
 }
 CommandMapUnmap::CommandMapUnmap(MapOperationType op, MemObj &memObj, MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset, bool readOnly,
                                 CommandStreamReceiver &csr, CommandQueue &cmdQ)
@@ -105,9 +96,6 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::uniq
 }
 CommandComputeKernel::~CommandComputeKernel() {
    if (kernelOperation->ioh.get() == kernelOperation->dsh.get()) {
        kernelOperation->doNotFreeISH = true;
    }
    kernel->decRefInternal();
    auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
--- a/runtime/helpers/task_information.h
+++ b/runtime/helpers/task_information.h
@@ -66,23 +66,45 @@ class CommandMapUnmap : public Command {
 };
 struct KernelOperation {
-    KernelOperation(std::unique_ptr<LinearStream> commandStream, std::unique_ptr<IndirectHeap> dsh, std::unique_ptr<IndirectHeap> ioh, std::unique_ptr<IndirectHeap> ssh,
+  protected:
-                    InternalAllocationStorage &storageForAllocations)
+    struct ResourceCleaner {
-        : commandStream(std::move(commandStream)), dsh(std::move(dsh)),
+        ResourceCleaner() = delete;
-          ioh(std::move(ioh)), ssh(std::move(ssh)),
+        ResourceCleaner(InternalAllocationStorage *storageForAllocations) : storageForAllocations(storageForAllocations){};
-          surfaceStateHeapSizeEM(0), doNotFreeISH(false), storageForAllocations(storageForAllocations) {
+
        template <typename ObjectT>
        void operator()(ObjectT *object);
        InternalAllocationStorage *storageForAllocations = nullptr;
    } resourceCleaner{nullptr};
    using LinearStreamUniquePtrT = std::unique_ptr<LinearStream, ResourceCleaner>;
    using IndirectHeapUniquePtrT = std::unique_ptr<IndirectHeap, ResourceCleaner>;
  public:
    KernelOperation() = delete;
    KernelOperation(LinearStream *commandStream, InternalAllocationStorage &storageForAllocations) {
        resourceCleaner.storageForAllocations = &storageForAllocations;
        this->commandStream = LinearStreamUniquePtrT(commandStream, resourceCleaner);
    }
-    ~KernelOperation();
+    void setHeaps(IndirectHeap *dsh, IndirectHeap *ioh, IndirectHeap *ssh) {
        this->dsh = IndirectHeapUniquePtrT(dsh, resourceCleaner);
        this->ioh = IndirectHeapUniquePtrT(ioh, resourceCleaner);
        this->ssh = IndirectHeapUniquePtrT(ssh, resourceCleaner);
    }
-    std::unique_ptr<LinearStream> commandStream;
+    ~KernelOperation() {
-    std::unique_ptr<IndirectHeap> dsh;
+        if (ioh.get() == dsh.get()) {
-    std::unique_ptr<IndirectHeap> ioh;
+            ioh.release();
-    std::unique_ptr<IndirectHeap> ssh;
+        }
    }
-    size_t surfaceStateHeapSizeEM;
+    LinearStreamUniquePtrT commandStream{nullptr, resourceCleaner};
-    bool doNotFreeISH;
+    IndirectHeapUniquePtrT dsh{nullptr, resourceCleaner};
-    InternalAllocationStorage &storageForAllocations;
+    IndirectHeapUniquePtrT ioh{nullptr, resourceCleaner};
    IndirectHeapUniquePtrT ssh{nullptr, resourceCleaner};
    size_t surfaceStateHeapSizeEM = 0;
 };
 class CommandComputeKernel : public Command {
--- a/runtime/helpers/task_information.inl
+++ b/runtime/helpers/task_information.inl
@@ -0,0 +1,17 @@
 /*
 * Copyright (C) 2017-2019 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */
 #include "runtime/helpers/task_information.h"
 namespace NEO {
 template <typename ObjectT>
 void KernelOperation::ResourceCleaner::operator()(ObjectT *object) {
    storageForAllocations->storeAllocation(std::unique_ptr<GraphicsAllocation>(object->getGraphicsAllocation()),
                                           REUSABLE_ALLOCATION);
    delete object;
 }
 } // namespace NEO
--- a/unit_tests/command_queue/dispatch_walker_tests.cpp
+++ b/unit_tests/command_queue/dispatch_walker_tests.cpp
@@ -83,6 +83,15 @@ struct DispatchWalkerTest : public CommandQueueFixture, public DeviceFixture, pu
        DeviceFixture::TearDown();
    }
    std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
        auto commandStream = new LinearStream();
        auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
        gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
        return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
    }
    std::unique_ptr<MockProgram> program;
    SKernelBinaryHeaderCommon kernelHeader = {};
@@ -148,7 +157,7 @@ HWTEST_F(DispatchWalkerTest, shouldntChangeCommandStreamMemory) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
    EXPECT_LT(commandStreamStart, commandStream.getUsed());
@@ -196,7 +205,7 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
    EXPECT_LT(commandStreamStart, commandStream.getUsed());
@@ -226,7 +235,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(dimension, *kernel.workDim);
    }
@@ -257,7 +266,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(dimension, *kernel.workDim);
    }
 }
@@ -286,7 +295,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(dimension, *kernel.workDim);
    }
 }
@@ -316,7 +325,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(dimension, *kernel.workDim);
    }
 }
@@ -346,7 +355,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(2u, *kernel.numWorkGroupsX);
    EXPECT_EQ(5u, *kernel.numWorkGroupsY);
@@ -378,7 +387,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(2u, *kernel.localWorkSizeX);
    EXPECT_EQ(5u, *kernel.localWorkSizeY);
    EXPECT_EQ(1u, *kernel.localWorkSizeZ);
@@ -409,7 +418,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(2u, *kernel.localWorkSizeX);
    EXPECT_EQ(5u, *kernel.localWorkSizeY);
    EXPECT_EQ(10u, *kernel.localWorkSizeZ);
@@ -441,7 +450,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(2u, *kernel.localWorkSizeX);
    EXPECT_EQ(5u, *kernel.localWorkSizeY);
    EXPECT_EQ(1u, *kernel.localWorkSizeZ);
@@ -473,7 +482,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(2u, *kernel.localWorkSizeX);
    EXPECT_EQ(5u, *kernel.localWorkSizeY);
    EXPECT_EQ(1u, *kernel.localWorkSizeZ);
@@ -503,7 +512,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(1u, *kernel.localWorkSizeX);
    EXPECT_EQ(2u, *kernel.localWorkSizeY);
    EXPECT_EQ(3u, *kernel.localWorkSizeZ);
@@ -536,7 +545,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_EQ(1u, *kernel.localWorkSizeX);
    EXPECT_EQ(2u, *kernel.localWorkSizeY);
    EXPECT_EQ(3u, *kernel.localWorkSizeZ);
@@ -573,7 +582,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto dispatchId = 0;
    for (auto &dispatchInfo : multiDispatchInfo) {
@@ -624,7 +633,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    for (auto &dispatchInfo : multiDispatchInfo) {
        auto &kernel = *dispatchInfo.getKernel();
@@ -661,10 +670,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
    size_t workGroupSize[3] = {2, 5, 10};
    cl_uint dimensions = 1;
-    //block the queue
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
    auto blockQueue = true;
    KernelOperation *blockedCommandsData = nullptr;
    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
    MultiDispatchInfo multiDispatchInfo;
@@ -673,13 +679,13 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
        *pCmdQ,
        multiDispatchInfo,
        CsrDependencies(),
-        &blockedCommandsData,
+        blockedCommandsData.get(),
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        blockQueue);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto &commandStream = pCmdQ->getCS(1024);
    EXPECT_EQ(0u, commandStream.getUsed());
@@ -688,8 +694,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
    EXPECT_NE(nullptr, blockedCommandsData->dsh);
    EXPECT_NE(nullptr, blockedCommandsData->ioh);
    EXPECT_NE(nullptr, blockedCommandsData->ssh);
    delete blockedCommandsData;
 }
 HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelWhenQueueIsBlocked) {
@@ -701,41 +705,53 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
    size_t workGroupSize[3] = {2, 5, 10};
    cl_uint dimensions = 1;
-    //block the queue
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
    auto blockQueue = true;
    KernelOperation *blockedCommandsData = nullptr;
    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
-    MultiDispatchInfo multiDispatchInfo;
+    MultiDispatchInfo multiDispatchInfo(&kernel);
    multiDispatchInfo.push(dispatchInfo);
    HardwareInterface<FamilyType>::dispatchWalker(
        *pCmdQ,
        multiDispatchInfo,
        CsrDependencies(),
-        &blockedCommandsData,
+        blockedCommandsData.get(),
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        blockQueue);
+        CL_COMMAND_NDRANGE_KERNEL);
    Vec3<size_t> localWorkgroupSize(workGroupSize);
    auto expectedSizeCSAllocation = MemoryConstants::pageSize64k;
    auto expectedSizeCS = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
    auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(kernel);
    auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernel, Math::computeTotalElementsCount(localWorkgroupSize));
    auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
    EXPECT_EQ(expectedSizeCSAllocation, blockedCommandsData->commandStream->getGraphicsAllocation()->getUnderlyingBufferSize());
    EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
 }
-    delete blockedCommandsData;
+HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAllocateEnoughSpaceAndBlockedKernelData) {
    DispatchInfo dispatchInfo;
    MultiDispatchInfo multiDispatchInfo;
    multiDispatchInfo.push(dispatchInfo);
    std::unique_ptr<KernelOperation> blockedKernelData;
    MockCommandQueueHw<FamilyType> mockCmdQ(nullptr, pDevice, nullptr);
    auto expectedSizeCSAllocation = MemoryConstants::pageSize64k;
    auto expectedSizeCS = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
    CsrDependencies csrDependencies;
    auto cmdStream = mockCmdQ.template obtainCommandStream<CL_COMMAND_NDRANGE_KERNEL>(csrDependencies, false, false, false, true,
                                                                                      multiDispatchInfo, blockedKernelData, nullptr, 0u);
    EXPECT_EQ(expectedSizeCS, cmdStream->getMaxAvailableSpace());
    EXPECT_EQ(expectedSizeCSAllocation, cmdStream->getGraphicsAllocation()->getUnderlyingBufferSize());
    EXPECT_NE(nullptr, blockedKernelData);
    EXPECT_EQ(cmdStream, blockedKernelData->commandStream.get());
 }
 HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhenQueueIsBlocked) {
@@ -744,36 +760,27 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
    MockMultiDispatchInfo multiDispatchInfo(&kernel);
-    //block the queue
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
    auto blockQueue = true;
    KernelOperation *blockedCommandsData = nullptr;
    HardwareInterface<FamilyType>::dispatchWalker(
        *pCmdQ,
        multiDispatchInfo,
        CsrDependencies(),
-        &blockedCommandsData,
+        blockedCommandsData.get(),
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        blockQueue);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto expectedSizeCSAllocation = MemoryConstants::pageSize64k;
    auto expectedSizeCS = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
    auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
    auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
    auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
    EXPECT_EQ(expectedSizeCSAllocation, blockedCommandsData->commandStream->getGraphicsAllocation()->getUnderlyingBufferSize());
    EXPECT_EQ(expectedSizeCS, blockedCommandsData->commandStream->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());
    EXPECT_LE(expectedSizeSSH, blockedCommandsData->ssh->getMaxAvailableSpace());
    delete blockedCommandsData;
 }
 HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenCommandStreamHasGpuAddress) {
@@ -781,24 +788,21 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm
    ASSERT_EQ(CL_SUCCESS, kernel.initialize());
    MockMultiDispatchInfo multiDispatchInfo(&kernel);
-    const auto blockQueue = true;
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
    KernelOperation *blockedCommandsData = nullptr;
    HardwareInterface<FamilyType>::dispatchWalker(
        *pCmdQ,
        multiDispatchInfo,
        CsrDependencies(),
-        &blockedCommandsData,
+        blockedCommandsData.get(),
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        blockQueue);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation());
    EXPECT_NE(0ull, blockedCommandsData->commandStream->getGraphicsAllocation()->getGpuAddress());
    delete blockedCommandsData;
 }
 HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) {
@@ -812,24 +816,21 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI
    csr.getInternalAllocationStorage()->storeAllocation(std::unique_ptr<GraphicsAllocation>{allocation}, REUSABLE_ALLOCATION);
    ASSERT_FALSE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty());
-    const auto blockQueue = true;
+    auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
    KernelOperation *blockedCommandsData = nullptr;
    HardwareInterface<FamilyType>::dispatchWalker(
        *pCmdQ,
        multiDispatchInfo,
        CsrDependencies(),
-        &blockedCommandsData,
+        blockedCommandsData.get(),
        nullptr,
        nullptr,
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        blockQueue);
+        CL_COMMAND_NDRANGE_KERNEL);
    EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty());
    EXPECT_EQ(allocation, blockedCommandsData->commandStream->getGraphicsAllocation());
    delete blockedCommandsData;
 }
 HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) {
@@ -850,7 +851,7 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) {
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    for (auto &dispatchInfo : multiDispatchInfo) {
        auto &kernel = *dispatchInfo.getKernel();
@@ -892,7 +893,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto dshAfterMultiDisptach = indirectHeap.getUsed();
@@ -977,7 +978,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -1023,7 +1024,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -1074,7 +1075,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -1127,7 +1128,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParse;
    hwParse.parseCommands<FamilyType>(cmdStream);
@@ -1166,7 +1167,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParse;
    hwParse.parseCommands<FamilyType>(cmdStream);
@@ -1206,7 +1207,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareInterface<FamilyType>::dispatchWalker(
        *pCmdQ,
@@ -1218,7 +1219,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParse;
    hwParse.parseCommands<FamilyType>(cmdStream);
@@ -1241,7 +1242,8 @@ HWTEST_F(DispatchWalkerTest, givenMultiDispatchWhenWhitelistedRegisterForCoheren
    DispatchInfo di2(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
    MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2}));
-    HardwareInterface<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr, pDevice->getPreemptionMode(), false);
+    HardwareInterface<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr,
                                                  pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL);
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -1305,7 +1307,7 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto sizeUsed = cmdStream.getUsed();
    GenCmdList cmdList;
@@ -1362,7 +1364,7 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh
        nullptr,
        nullptr,
        pDevice->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    auto sizeUsed = cmdStream.getUsed();
    GenCmdList cmdList;
--- a/unit_tests/command_stream/command_stream_receiver_flush_task_3_tests.cpp
+++ b/unit_tests/command_stream/command_stream_receiver_flush_task_3_tests.cpp
@@ -1449,10 +1449,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke
    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
    using UniqueIH = std::unique_ptr<IndirectHeap>;
-    auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
+    auto blockedCommandsData = new KernelOperation(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-                                                   UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
+    blockedCommandsData->setHeaps(dsh, ioh, ssh);
    std::vector<Surface *> surfaces;
    event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandsData), surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel.get(), 1));
--- a/unit_tests/event/event_builder_tests.cpp
+++ b/unit_tests/event/event_builder_tests.cpp
@@ -69,7 +69,6 @@ TEST(EventBuilder, whenCreatingNewEventForwardsArgumentsToEventConstructor) {
 }
 TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
    using UniqueIH = std::unique_ptr<IndirectHeap>;
    class MockCommandComputeKernel : public CommandComputeKernel {
      public:
        using CommandComputeKernel::eventsWaitlist;
@@ -88,8 +87,8 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
    auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
    std::vector<Surface *> surfaces;
-    auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
+    auto kernelOperation = new KernelOperation(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
-                                               *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
+    kernelOperation->setHeaps(ih1, ih2, ih3);
    std::unique_ptr<MockCommandComputeKernel> command = std::make_unique<MockCommandComputeKernel>(cmdQ, kernelOperation, surfaces, kernel);
@@ -119,7 +118,6 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
        using VirtualEvent::submittedCmd;
    };
    using UniqueIH = std::unique_ptr<IndirectHeap>;
    class MockCommandComputeKernel : public CommandComputeKernel {
      public:
        using CommandComputeKernel::eventsWaitlist;
@@ -138,8 +136,8 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
    auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
    std::vector<Surface *> surfaces;
-    auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
+    auto kernelOperation = new KernelOperation(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
-                                               *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
+    kernelOperation->setHeaps(ih1, ih2, ih3);
    std::unique_ptr<MockCommandComputeKernel> command = std::make_unique<MockCommandComputeKernel>(cmdQ, kernelOperation, surfaces, kernel);
--- a/unit_tests/event/event_tests.cpp
+++ b/unit_tests/event/event_tests.cpp
@@ -465,10 +465,9 @@ TEST_F(InternalsEventTest, processBlockedCommandsKernelOperation) {
    cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
    cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
    cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
-    using UniqueIH = std::unique_ptr<IndirectHeap>;
+
-    auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
+    auto blockedCommandsData = new KernelOperation(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-                                                   UniqueIH(ioh), UniqueIH(ssh),
+    blockedCommandsData->setHeaps(dsh, ioh, ssh);
                                                   *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
    MockKernelWithInternals mockKernelWithInternals(*pDevice);
    auto pKernel = mockKernelWithInternals.mockKernel;
@@ -518,10 +517,9 @@ TEST_F(InternalsEventTest, processBlockedCommandsAbortKernelOperation) {
    cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
    cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
    cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
-    using UniqueIH = std::unique_ptr<IndirectHeap>;
+
-    auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
+    auto blockedCommandsData = new KernelOperation(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-                                                   UniqueIH(ioh), UniqueIH(ssh),
+    blockedCommandsData->setHeaps(dsh, ioh, ssh);
                                                   *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
    MockKernelWithInternals mockKernelWithInternals(*pDevice);
    auto pKernel = mockKernelWithInternals.mockKernel;
@@ -553,10 +551,9 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
    cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
    cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
    cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
-    using UniqueIH = std::unique_ptr<IndirectHeap>;
+
-    auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
+    auto blockedCommandsData = new KernelOperation(cmdStream, *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-                                                   UniqueIH(ioh), UniqueIH(ssh),
+    blockedCommandsData->setHeaps(dsh, ioh, ssh);
                                                   *cmdQ.getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
    SPatchAllocateStatelessPrintfSurface *pPrintfSurface = new SPatchAllocateStatelessPrintfSurface();
    pPrintfSurface->DataParamOffset = 0;
@@ -912,7 +909,6 @@ TEST(Event, GivenNoContextOnDeletionDeletesSelf) {
 }
 HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCSROccurs) {
    using UniqueIH = std::unique_ptr<IndirectHeap>;
    class MockCommandComputeKernel : public CommandComputeKernel {
      public:
        using CommandComputeKernel::eventsWaitlist;
@@ -936,8 +932,8 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCSROccurs) {
    auto cmdStream = new LinearStream(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
    std::vector<Surface *> surfaces;
-    auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
+    auto kernelOperation = new KernelOperation(cmdStream, *pDevice->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
-                                               *pDevice->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
+    kernelOperation->setHeaps(ih1, ih2, ih3);
    std::unique_ptr<MockCommandComputeKernel> command = std::make_unique<MockCommandComputeKernel>(*pCmdQ, kernelOperation, surfaces, kernel);
@@ -1490,9 +1486,8 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
    pCmdQ->allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 4096u, dsh);
    pCmdQ->allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 4096u, ioh);
    pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
-    using UniqueIH = std::unique_ptr<IndirectHeap>;
+    auto blockedCommandsData = new KernelOperation(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
-    auto blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(dsh),
+    blockedCommandsData->setHeaps(dsh, ioh, ssh);
                                                   UniqueIH(ioh), UniqueIH(ssh), *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
    PreemptionMode preemptionMode = pDevice->getPreemptionMode();
    std::vector<Surface *> v;
    auto cmd = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandsData), v, false, false, false, nullptr, preemptionMode, pKernel, 1);
--- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
+++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
@@ -48,13 +48,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
        EXPECT_EQ(0u, dshUsedAfter);
@@ -104,13 +104,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        auto iohUsed = ioh.getUsed();
        EXPECT_EQ(0u, iohUsed);
@@ -130,13 +130,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
@@ -154,7 +154,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
    using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
    if (std::string(pPlatform->getDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
-        KernelOperation *blockedCommandsData = nullptr;
+        auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};
@@ -166,13 +166,13 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData.get(),
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            true);
+            CL_COMMAND_NDRANGE_KERNEL);
        ASSERT_NE(nullptr, blockedCommandsData);
        size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
@@ -191,8 +191,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
        EXPECT_GE(minRequiredSize, sshUsed);
        // Total SSH size including EM must be greater then ssh allocated
        EXPECT_GT(minRequiredSize + minRequiredSizeForEM, sshUsed);
        delete blockedCommandsData;
    }
 }
@@ -251,6 +249,16 @@ class MockParentKernelDispatch : public ExecutionModelSchedulerTest,
    void TearDown() override {
        ExecutionModelSchedulerTest::TearDown();
    }
    std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
        auto commandStream = new LinearStream();
        auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
        gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
        return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
    }
    DebugManagerStateRestore dbgRestore;
 };
@@ -259,7 +267,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParen
    if (pDevice->getSupportedClVersion() >= 20) {
        MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
-        KernelOperation *blockedCommandsData = nullptr;
+        auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};
@@ -270,18 +278,17 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParen
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData.get(),
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            true);
+            CL_COMMAND_NDRANGE_KERNEL);
        ASSERT_NE(nullptr, blockedCommandsData);
        EXPECT_EQ(blockedCommandsData->dsh.get(), blockedCommandsData->ioh.get());
        delete blockedCommandsData;
        delete mockParentKernel;
    }
 }
@@ -304,13 +311,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        LinearStream *commandStream = &pCmdQ->getCS(0);
@@ -362,13 +369,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(UnitTestHelper<FamilyType>::getDefaultSshUsage(), ssh.getUsed());
@@ -398,13 +405,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenPar
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData,
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            pDevice->getPreemptionMode(),
-            false);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_EQ(bufferMemory, ssh.getCpuBase());
--- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
@@ -97,11 +97,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca
        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
        PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -157,11 +155,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
        EXPECT_EQ(colorCalcSizeDevQueue, usedDSHBeforeSubmit);
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
@@ -200,11 +196,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
        dsh->getSpace(mockDevQueue.getDshOffset());
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
@@ -240,11 +234,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK
        dsh->getSpace(mockDevQueue.getDshOffset());
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
@@ -283,11 +275,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
        dsh->getSpace(mockDevQueue.getDshOffset());
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
@@ -338,11 +328,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
        queueIoh.getSpace(usedSize);
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
        PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -388,11 +376,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen
        void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();
        auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
-        KernelOperation *blockedCommandData = new KernelOperation(std::unique_ptr<LinearStream>(new LinearStream(cmdStreamAllocation)),
+        KernelOperation *blockedCommandData = new KernelOperation(new LinearStream(cmdStreamAllocation),
                                                                  std::unique_ptr<IndirectHeap>(dsh),
                                                                  std::unique_ptr<IndirectHeap>(ioh),
                                                                  std::unique_ptr<IndirectHeap>(ssh),
                                                                  *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
        blockedCommandData->setHeaps(dsh, ioh, ssh);
        blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
        PreemptionMode preemptionMode = device->getPreemptionMode();
@@ -419,7 +405,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
        parentKernel->createReflectionSurface();
        context->setDefaultDeviceQueue(&mockDevQueue);
-        KernelOperation *blockedCommandsData = nullptr;
+        auto blockedCommandsData = createBlockedCommandsData(*pCmdQ);
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};
@@ -430,13 +416,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
            *pCmdQ,
            multiDispatchInfo,
            CsrDependencies(),
-            &blockedCommandsData,
+            blockedCommandsData.get(),
            nullptr,
            nullptr,
            nullptr,
            nullptr,
            device->getPreemptionMode(),
-            true);
+            CL_COMMAND_NDRANGE_KERNEL);
        EXPECT_NE(nullptr, blockedCommandsData);
        EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
@@ -446,7 +432,5 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
        EXPECT_NE(nullptr, blockedCommandsData->ioh->getGraphicsAllocation());
        EXPECT_NE(nullptr, blockedCommandsData->ssh->getGraphicsAllocation());
        EXPECT_EQ(blockedCommandsData->dsh->getGraphicsAllocation(), blockedCommandsData->ioh->getGraphicsAllocation());
        delete blockedCommandsData;
    }
 }
--- a/unit_tests/fixtures/execution_model_fixture.h
+++ b/unit_tests/fixtures/execution_model_fixture.h
@@ -64,6 +64,16 @@ class ExecutionModelKernelTest : public ExecutionModelKernelFixture,
        CommandQueueHwFixture::TearDown();
        ExecutionModelKernelFixture::TearDown();
    }
    std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
        auto commandStream = new LinearStream();
        auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
        gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
        return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
    }
    DebugManagerStateRestore dbgRestore;
 };
@@ -104,5 +114,15 @@ struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture,
        CommandQueueHwFixture::TearDown();
        delete device;
    }
    std::unique_ptr<KernelOperation> createBlockedCommandsData(CommandQueue &commandQueue) {
        auto commandStream = new LinearStream();
        auto &gpgpuCsr = commandQueue.getGpgpuCommandStreamReceiver();
        gpgpuCsr.ensureCommandBufferAllocation(*commandStream, 1, 1);
        return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
    }
    MockDevice *device;
 };
--- a/unit_tests/helpers/task_information_tests.cpp
+++ b/unit_tests/helpers/task_information_tests.cpp
@@ -88,7 +88,6 @@ TEST(CommandTest, markerSubmitWithTerminateFlagAbortsFlush) {
 }
 TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeLocalCopyOfWaitlist) {
    using UniqueIH = std::unique_ptr<IndirectHeap>;
    class MockCommandComputeKernel : public CommandComputeKernel {
      public:
        using CommandComputeKernel::eventsWaitlist;
@@ -107,8 +106,8 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
    auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
    std::vector<Surface *> surfaces;
-    auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
+    auto kernelOperation = new KernelOperation(cmdStream, *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
-                                               *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
+    kernelOperation->setHeaps(ih1, ih2, ih3);
    UserEvent event1, event2, event3;
    cl_event waitlist[] = {&event1, &event2};
@@ -128,7 +127,6 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
 }
 TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAllocationsAreStoredInInternalStorageForReuse) {
    using UniqueIH = std::unique_ptr<IndirectHeap>;
    auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(platformDevices[0]));
    CommandQueue cmdQ(nullptr, device.get(), nullptr);
    InternalAllocationStorage &allocationStorage = *device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage();
@@ -138,14 +136,15 @@ TEST(KernelOperationDestruction, givenKernelOperationWhenItIsDestructedThenAllAl
    cmdQ.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, 1, ih1);
    cmdQ.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, 1, ih2);
    cmdQ.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 1, ih3);
-    auto cmdStream = std::make_unique<LinearStream>(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
+    auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({1, GraphicsAllocation::AllocationType::COMMAND_BUFFER}));
    auto &heapAllocation1 = *ih1->getGraphicsAllocation();
    auto &heapAllocation2 = *ih2->getGraphicsAllocation();
    auto &heapAllocation3 = *ih3->getGraphicsAllocation();
    auto &cmdStreamAllocation = *cmdStream->getGraphicsAllocation();
-    auto kernelOperation = std::make_unique<KernelOperation>(std::move(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3), allocationStorage);
+    auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, allocationStorage);
    kernelOperation->setHeaps(ih1, ih2, ih3);
    EXPECT_TRUE(allocationsForReuse.peekIsEmpty());
    kernelOperation.reset();
--- a/unit_tests/helpers/timestamp_packet_tests.cpp
+++ b/unit_tests/helpers/timestamp_packet_tests.cpp
@@ -390,7 +390,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
        nullptr,
        &timestampPacket,
        device->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -435,7 +435,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWh
        nullptr,
        &timestampPacket,
        device->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -885,7 +885,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
        nullptr,
        &timestamp7,
        device->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
@@ -969,7 +969,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
        nullptr,
        &timestamp7,
        device->getPreemptionMode(),
-        false);
+        CL_COMMAND_NDRANGE_KERNEL);
    HardwareParse hwParser;
    hwParser.parseCommands<FamilyType>(cmdStream, 0);
--- a/unit_tests/mocks/mock_command_queue.h
+++ b/unit_tests/mocks/mock_command_queue.h
@@ -77,6 +77,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
    using BaseClass::commandStream;
    using BaseClass::gpgpuEngine;
    using BaseClass::multiEngineQueue;
    using BaseClass::obtainCommandStream;
    using BaseClass::obtainNewTimestampPacketNodes;
    using BaseClass::requiresCacheFlushAfterWalker;
    using BaseClass::timestampPacketContainer;