From c7a49666d5776d5f925aa943fdb5725c3243603c Mon Sep 17 00:00:00 2001
From: "Dunajski, Bartosz" <bartosz.dunajski@intel.com>
Date: Thu, 16 Aug 2018 15:47:25 +0200
Subject: [PATCH] Refactor querying Main and Parent Kernel from
 MultiDispatchInfo

Change-Id: I723d91f2f445bc7af1bcb0de46f8ac07837f3449
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
---
 runtime/command_queue/enqueue_common.h        |  36 ++-
 runtime/command_queue/gpgpu_walker.h          |  23 +-
 runtime/command_queue/gpgpu_walker.inl        |  33 +--
 runtime/helpers/dispatch_info.cpp             |  11 +
 runtime/helpers/dispatch_info.h               |  12 +-
 .../command_queue/dispatch_walker_tests.cpp   | 139 ++++++------
 .../aub_command_stream_receiver_tests.cpp     |  12 +-
 .../command_stream/aub_subcapture_tests.cpp   | 107 +++++----
 ...nd_stream_receiver_with_aub_dump_tests.cpp |   4 +-
 .../parent_kernel_dispatch_tests.cpp          | 210 +++++++++---------
 .../submit_blocked_parent_kernel_tests.cpp    |  26 +--
 unit_tests/helpers/dispatch_info_tests.cpp    |  47 +++-
 unit_tests/mocks/mock_kernel.h                |   1 +
 unit_tests/mocks/mock_mdi.h                   |   4 +-
 unit_tests/profiling/profiling_tests.cpp      |   6 +-
 15 files changed, 353 insertions(+), 318 deletions(-)
diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h
index 23fddc6df1..365b84956c 100644
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -79,7 +79,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
         enqueueHandler<commandType>(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event);
     } else {
         BuiltInOwnershipWrapper builtInLock;
-        MultiDispatchInfo multiDispatchInfo;
+        MultiDispatchInfo multiDispatchInfo(kernel);
 
         if (DebugManager.flags.ForceDispatchScheduler.get()) {
             forceDispatchScheduler(multiDispatchInfo);
@@ -158,8 +158,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
         return;
     }
 
-    bool executionModelKernel = multiDispatchInfo.empty() ? false : multiDispatchInfo.begin()->getKernel()->isParentKernel;
-    Kernel *parentKernel = executionModelKernel ? multiDispatchInfo.begin()->getKernel() : nullptr;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
     auto devQueue = this->getContext().getDefaultDeviceQueue();
     DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
 
@@ -205,7 +204,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
 
     DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel);
 
-    if (executionModelKernel && !blockQueue) {
+    if (parentKernel && !blockQueue) {
         while (!devQueueHw->isEMCriticalSectionFree())
             ;
     }
@@ -230,8 +229,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
         }
 
         if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
-            if (multiDispatchInfo.begin()->getKernel()->getProgram()->isKernelDebugEnabled()) {
-                setupDebugSurface(multiDispatchInfo.begin()->getKernel());
+            if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
+                setupDebugSurface(multiDispatchInfo.peekMainKernel());
             }
         }
 
@@ -245,7 +244,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
             }
         }
 
-        if (executionModelKernel) {
+        if (parentKernel) {
             parentKernel->createReflectionSurface();
             parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
             parentKernel->patchEventPool(context->getDefaultDeviceQueue());
@@ -283,13 +282,13 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
 
     CompletionStamp completionStamp;
     if (!blockQueue) {
-        if (executionModelKernel) {
-            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+        if (parentKernel) {
+            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*parentKernel));
 
             uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
             devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
                                                     *devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-                                                    multiDispatchInfo.begin()->getKernel(),
+                                                    parentKernel,
                                                     (uint32_t)multiDispatchInfo.size(),
                                                     taskCount,
                                                     hwTimeStamps);
@@ -302,7 +301,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                               devQueueHw->getEventPoolBuffer(),
                               devQueueHw->getSlbBuffer(),
                               devQueueHw->getDshBuffer(),
-                              multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
+                              parentKernel->getKernelReflectionSurface(),
                               devQueueHw->getQueueStorageBuffer(),
                               this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
                               devQueueHw->getDebugQueue());
@@ -342,7 +341,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                 eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
             }
 
-            if (executionModelKernel) {
+            if (parentKernel) {
                 commandStreamReceiver.overrideMediaVFEStateDirty(true);
 
                 if (devQueueHw->getSchedulerReturnInstance() > 0) {
@@ -354,7 +353,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                                                       devQueueHw->getEventPoolBuffer(),
                                                       devQueueHw->getSlbBuffer(),
                                                       devQueueHw->getDshBuffer(),
-                                                      multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
+                                                      parentKernel->getKernelReflectionSurface(),
                                                       devQueueHw->getQueueStorageBuffer(),
                                                       this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
                                                       devQueueHw->getDebugQueue());
@@ -398,8 +397,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
     }
 
     if (blockQueue) {
-        if (executionModelKernel) {
-            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+        if (parentKernel) {
+            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*parentKernel));
             blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
         }
 
@@ -536,9 +535,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
 
     IndirectHeap *dsh = nullptr;
     IndirectHeap *ioh = nullptr;
-    const bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
 
-    if (executionModelKernel) {
+    if (multiDispatchInfo.peekParentKernel()) {
         DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
         DEBUG_BREAK_IF(pDevQueue == nullptr);
         dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
@@ -550,7 +548,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
         ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
     }
 
-    commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy<GfxFamily>());
+    commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());
 
     DispatchFlags dispatchFlags;
     dispatchFlags.blocking = blocking;
@@ -663,7 +661,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
             commandType == CL_COMMAND_NDRANGE_KERNEL,
             std::move(printfHandler),
             preemptionMode,
-            multiDispatchInfo.begin()->getKernel(),
+            multiDispatchInfo.peekMainKernel(),
             (uint32_t)multiDispatchInfo.size()));
         eventBuilder->getEvent()->setCommand(std::move(cmd));
     }
diff --git a/runtime/command_queue/gpgpu_walker.h b/runtime/command_queue/gpgpu_walker.h
index e4ef8b0434..09bdac2747 100644
--- a/runtime/command_queue/gpgpu_walker.h
+++ b/runtime/command_queue/gpgpu_walker.h
@@ -203,21 +203,6 @@ class GpgpuWalkerHelper {
         bool blockQueue,
         uint32_t commandType = 0);
 
-    static void dispatchWalker(
-        CommandQueue &commandQueue,
-        const Kernel &kernel,
-        cl_uint workDim,
-        const size_t globalOffsets[3],
-        const size_t workItems[3],
-        const size_t *localWorkSizesIn,
-        cl_uint numEventsInWaitList,
-        const cl_event *eventWaitList,
-        KernelOperation **blockedCommandsData,
-        HwTimeStamps *hwTimeStamps,
-        HwPerfCounter *hwPerfCounter,
-        PreemptionMode preemptionMode,
-        bool blockQueue);
-
     static void dispatchScheduler(
         CommandQueue &commandQueue,
         DeviceQueueHw<GfxFamily> &devQueueHw,
@@ -246,11 +231,11 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
 template <typename GfxFamily, uint32_t eventType>
 LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
     size_t expectedSizeCS = 0;
-    Kernel *parentKernel = multiDispatchInfo.size() > 0 ? multiDispatchInfo.begin()->getKernel() : nullptr;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
     for (auto &dispatchInfo : multiDispatchInfo) {
         expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, dispatchInfo.getKernel());
     }
-    if (parentKernel && parentKernel->isParentKernel) {
+    if (parentKernel) {
         SchedulerKernel &scheduler = BuiltIns::getInstance().getSchedulerKernel(parentKernel->getContext());
         expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, &scheduler);
     }
@@ -270,9 +255,9 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
     }
     // clang-format on
 
-    if (multiDispatchInfo.begin()->getKernel()->isParentKernel) {
+    if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
         if (heapType == IndirectHeap::SURFACE_STATE) {
-            expectedSize += KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<heapType>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+            expectedSize += KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<heapType>(const_cast<const Kernel &>(*parentKernel));
         } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
         {
             DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl
index 61ef0403e9..2c1d47584a 100644
--- a/runtime/command_queue/gpgpu_walker.inl
+++ b/runtime/command_queue/gpgpu_walker.inl
@@ -446,7 +446,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
 
     OCLRT::LinearStream *commandStream = nullptr;
     OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-    bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
 
     for (auto &dispatchInfo : multiDispatchInfo) {
         // Compute local workgroup sizes
@@ -460,7 +460,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
     if (blockQueue) {
         using KCH = KernelCommandsHelper<GfxFamily>;
         commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
-        if (executionModelKernel) {
+        if (parentKernel) {
             uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;
 
             commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
@@ -470,7 +470,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
             dsh->getSpace(colorCalcSize);
             ioh = dsh;
             commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
-                                            KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
+                                            KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel) +
                                                 KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
                                             ssh);
         } else {
@@ -482,12 +482,12 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
         using UniqueIH = std::unique_ptr<IndirectHeap>;
         *blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
                                                    *commandQueue.getDevice().getMemoryManager());
-        if (executionModelKernel) {
+        if (parentKernel) {
             (*blockedCommandsData)->doNotFreeISH = true;
         }
     } else {
         commandStream = &commandQueue.getCS(0);
-        if (executionModelKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
+        if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
             commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE);
         }
         dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo);
@@ -505,7 +505,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
     size_t numDispatches = multiDispatchInfo.size();
     totalInterfaceDescriptorTableSize *= numDispatches;
 
-    if (!executionModelKernel) {
+    if (!parentKernel) {
         dsh->getSpace(totalInterfaceDescriptorTableSize);
     } else {
         dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed());
@@ -656,27 +656,6 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
     }
 }
 
-template <typename GfxFamily>
-void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
-    CommandQueue &commandQueue,
-    const Kernel &kernel,
-    cl_uint workDim,
-    const size_t globalOffsets[3],
-    const size_t workItems[3],
-    const size_t *localWorkSizesIn,
-    cl_uint numEventsInWaitList,
-    const cl_event *eventWaitList,
-    KernelOperation **blockedCommandsData,
-    HwTimeStamps *hwTimeStamps,
-    HwPerfCounter *hwPerfCounter,
-    PreemptionMode preemptionMode,
-    bool blockQueue) {
-
-    DispatchInfo dispatchInfo(const_cast<Kernel *>(&kernel), workDim, workItems, localWorkSizesIn, globalOffsets);
-    GpgpuWalkerHelper<GfxFamily>::dispatchWalker(commandQueue, dispatchInfo, numEventsInWaitList, eventWaitList,
-                                                 blockedCommandsData, hwTimeStamps, hwPerfCounter, preemptionMode, blockQueue);
-}
-
 template <typename GfxFamily>
 void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
     CommandQueue &commandQueue,
diff --git a/runtime/helpers/dispatch_info.cpp b/runtime/helpers/dispatch_info.cpp
index 98411b7315..9e41fc22ab 100644
--- a/runtime/helpers/dispatch_info.cpp
+++ b/runtime/helpers/dispatch_info.cpp
@@ -35,4 +35,15 @@ bool DispatchInfo::usesStatelessPrintfSurface() const {
 uint32_t DispatchInfo::getRequiredScratchSize() const {
     return (kernel == nullptr) ? 0 : kernel->getScratchSize();
 }
+
+Kernel *MultiDispatchInfo::peekMainKernel() const {
+    if (dispatchInfos.size() == 0) {
+        return nullptr;
+    }
+    return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel();
+}
+
+Kernel *MultiDispatchInfo::peekParentKernel() const {
+    return (mainKernel && mainKernel->isParentKernel) ? mainKernel : nullptr;
+}
 } // namespace OCLRT
diff --git a/runtime/helpers/dispatch_info.h b/runtime/helpers/dispatch_info.h
index f60f64d008..2f9491d84b 100644
--- a/runtime/helpers/dispatch_info.h
+++ b/runtime/helpers/dispatch_info.h
@@ -79,18 +79,14 @@ class DispatchInfo {
 };
 
 struct MultiDispatchInfo {
-    MultiDispatchInfo(const DispatchInfo &dispatchInfo) {
-        dispatchInfos.push_back(dispatchInfo);
-    }
-
     ~MultiDispatchInfo() {
         for (MemObj *redescribedSurface : redescribedSurfaces) {
             redescribedSurface->release();
         }
     }
 
-    MultiDispatchInfo() {
-    }
+    MultiDispatchInfo(Kernel *mainKernel) : mainKernel(mainKernel) {}
+    MultiDispatchInfo() = default;
 
     MultiDispatchInfo &operator=(const MultiDispatchInfo &) = delete;
     MultiDispatchInfo(const MultiDispatchInfo &) = delete;
@@ -149,8 +145,12 @@ struct MultiDispatchInfo {
         redescribedSurfaces.push_back(memObj.release());
     }
 
+    Kernel *peekParentKernel() const;
+    Kernel *peekMainKernel() const;
+
   protected:
     StackVec<DispatchInfo, 9> dispatchInfos;
     StackVec<MemObj *, 2> redescribedSurfaces;
+    Kernel *mainKernel = nullptr;
 };
 } // namespace OCLRT
diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp
index 2ef22b3a3d..a0990e6176 100644
--- a/unit_tests/command_queue/dispatch_walker_tests.cpp
+++ b/unit_tests/command_queue/dispatch_walker_tests.cpp
@@ -140,13 +140,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, shouldntChangeCommandStreamMemor
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {1, 1, 1};
     cl_uint dimensions = 1;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -188,13 +187,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, noLocalIdsShouldntCrash) {
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {1, 1, 1};
     cl_uint dimensions = 1;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -217,13 +215,13 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
     size_t workItems[3] = {1, 1, 1};
     for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
         workItems[dimension - 1] = 256;
+
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
         GpgpuWalkerHelper<FamilyType>::dispatchWalker(
             *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
             0,
             nullptr,
             nullptr,
@@ -231,6 +229,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
             nullptr,
             pDevice->getPreemptionMode(),
             false);
+
         EXPECT_EQ(dimension, *kernel.workDim);
     }
 }
@@ -247,13 +246,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
     size_t workItems[3] = {1, 1, 1};
     for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
         workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
         GpgpuWalkerHelper<FamilyType>::dispatchWalker(
             *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
             0,
             nullptr,
             nullptr,
@@ -276,13 +274,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
     size_t workItems[3] = {1, 1, 1};
     for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
         workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
         GpgpuWalkerHelper<FamilyType>::dispatchWalker(
             *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
             0,
             nullptr,
             nullptr,
@@ -306,13 +303,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
     size_t workItems[3] = {1, 1, 1};
     for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
         workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
         GpgpuWalkerHelper<FamilyType>::dispatchWalker(
             *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
             0,
             nullptr,
             nullptr,
@@ -335,13 +331,13 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
     size_t workItems[3] = {2, 5, 10};
     size_t workGroupSize[3] = {1, 1, 1};
     cl_uint dimensions = 3;
+
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -349,6 +345,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
         nullptr,
         pDevice->getPreemptionMode(),
         false);
+
     EXPECT_EQ(2u, *kernel.numWorkGroupsX);
     EXPECT_EQ(5u, *kernel.numWorkGroupsY);
     EXPECT_EQ(10u, *kernel.numWorkGroupsZ);
@@ -366,13 +363,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {2, 5, 10};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -397,13 +393,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {2, 5, 10};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -429,13 +424,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {2, 5, 10};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -461,13 +455,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
     size_t globalOffsets[3] = {0, 0, 0};
     size_t workItems[3] = {2, 5, 10};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -491,13 +484,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
     size_t workItems[3] = {2, 5, 10};
     size_t workGroupSize[3] = {1, 2, 3};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -524,13 +516,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
     size_t workItems[3] = {2, 5, 10};
     size_t workGroupSize[3] = {1, 2, 3};
     cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
         0,
         nullptr,
         nullptr,
@@ -649,13 +640,12 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
 
     KernelOperation *blockedCommandsData = nullptr;
 
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
         0,
         nullptr,
         &blockedCommandsData,
@@ -689,13 +679,12 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
 
     KernelOperation *blockedCommandsData = nullptr;
 
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     GpgpuWalkerHelper<FamilyType>::dispatchWalker(
         *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
         0,
         nullptr,
         &blockedCommandsData,
diff --git a/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp b/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp
index 3598373271..54810cb0d5 100644
--- a/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp
+++ b/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp
@@ -452,10 +452,12 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptur
     LinearStream cs(aubExecutionEnvironment->commandBuffer);
 
     const DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("");
     aubSubCaptureManagerMock->subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManagerMock->setSubCaptureToggleActive(true);
-    aubSubCaptureManagerMock->activateSubCapture(dispatchInfo);
+    aubSubCaptureManagerMock->activateSubCapture(multiDispatchInfo);
     aubCsr->subCaptureManager.reset(aubSubCaptureManagerMock);
     ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled());
 
@@ -572,10 +574,12 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandalon
     LinearStream cs(commandBuffer);
 
     const DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("");
     aubSubCaptureManagerMock->subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManagerMock->setSubCaptureToggleActive(true);
-    aubSubCaptureManagerMock->activateSubCapture(dispatchInfo);
+    aubSubCaptureManagerMock->activateSubCapture(multiDispatchInfo);
     aubCsr->subCaptureManager.reset(aubSubCaptureManagerMock);
     ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled());
 
@@ -913,7 +917,9 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptur
     aubCsr->subCaptureManager.reset(subCaptureManagerMock);
 
     const DispatchInfo dispatchInfo;
-    aubCsr->activateAubSubCapture(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
+    aubCsr->activateAubSubCapture(multiDispatchInfo);
 
     EXPECT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled());
 }
diff --git a/unit_tests/command_stream/aub_subcapture_tests.cpp b/unit_tests/command_stream/aub_subcapture_tests.cpp
index fd0b718eb2..041821ef63 100644
--- a/unit_tests/command_stream/aub_subcapture_tests.cpp
+++ b/unit_tests/command_stream/aub_subcapture_tests.cpp
@@ -99,14 +99,15 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenActivateSubCaptureIsCalledWi
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     uint32_t kernelCurrentIndex = aubSubCaptureManager.getKernelCurrentIndex();
     ASSERT_EQ(0u, kernelCurrentIndex);
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_EQ(kernelCurrentIndex + 1, aubSubCaptureManager.getKernelCurrentIndex());
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_EQ(kernelCurrentIndex + 2, aubSubCaptureManager.getKernelCurrentIndex());
 }
 
@@ -114,10 +115,11 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenActivateSubCaptureIsCalledBu
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Off;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(active);
     EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -126,11 +128,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenActivateSubCaptu
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(true);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(active);
     EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -139,11 +142,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenActivateSubCaptu
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(false);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(active);
     EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -154,10 +158,11 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(active);
     EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -168,11 +173,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     aubSubCaptureManager.subCaptureFilter.dumpKernelStartIdx = 0;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(active);
     EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -183,11 +189,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     aubSubCaptureManager.subCaptureFilter.dumpKernelStartIdx = 1;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(active);
     EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -198,12 +205,13 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     aubSubCaptureManager.subCaptureFilter.dumpKernelEndIdx = 0;
     aubSubCaptureManager.setKernelCurrentIndex(1);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(active);
     EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -214,11 +222,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     aubSubCaptureManager.subCaptureFilter.dumpKernelName = "kernel_name";
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(active);
     EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -229,11 +238,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     aubSubCaptureManager.subCaptureFilter.dumpKernelName = "invalid_kernel_name";
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(active);
     EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@@ -255,13 +265,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureKeepsInactiveThenM
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.setSubCaptureIsActive(false);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(false);
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(DebugManager.flags.MakeEachEnqueueBlocking.get());
     EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
     EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
@@ -273,13 +284,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureGetsActiveThenDont
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.setSubCaptureIsActive(false);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(true);
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
     EXPECT_TRUE(DebugManager.flags.ForceCsrReprogramming.get());
     EXPECT_FALSE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@@ -291,13 +303,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureKeepsActiveThenDon
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.setSubCaptureIsActive(true);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(true);
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
     EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
     EXPECT_FALSE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@@ -309,13 +322,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureGetsInactiveThenMa
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
 
     aubSubCaptureManager.setSubCaptureIsActive(true);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
     aubSubCaptureManager.setSubCaptureToggleActive(false);
 
-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
     EXPECT_TRUE(DebugManager.flags.ForceCsrFlushing.get());
     EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
     EXPECT_TRUE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@@ -344,41 +358,49 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureActiveStatesAreDet
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInOffModeWhenGetSubCaptureFileNameIsCalledThenItReturnsEmptyFileName) {
     AubSubCaptureManagerMock aubSubCaptureManager("");
     DispatchInfo dispatchInfo;
-    EXPECT_STREQ("", aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
+    EXPECT_STREQ("", aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }
 
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsSpecifiedThenItReturnsItsName) {
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     std::string externalFileName = "external_file_name.aub";
     aubSubCaptureManager.setExternalFileName(externalFileName);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }
 
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsSpecifiedThenItReturnsItsName) {
     AubSubCaptureManagerMock aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     std::string externalFileName = "external_file_name.aub";
     aubSubCaptureManager.setExternalFileName(externalFileName);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
-    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }
 
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsNotSpecifiedThenItGeneratesFilterFileName) {
     AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub");
 
     DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     std::string externalFileName = "";
     aubSubCaptureManager.setExternalFileName(externalFileName);
 
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
     std::string filterFileName = aubSubCaptureManager.generateFilterFileName();
-    EXPECT_STREQ(filterFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(filterFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }
 
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsNotSpecifiedThenItGeneratesToggleFileName) {
@@ -387,7 +409,8 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFil
     DispatchInfo dispatchInfo;
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     std::string externalFileName = "";
     aubSubCaptureManager.setExternalFileName(externalFileName);
 
@@ -407,10 +430,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFil
     } aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
     EXPECT_EQ(1u, aubSubCaptureManager.generateFilterFileNameCount);
 }
 
@@ -425,10 +450,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFil
     } aubSubCaptureManager("");
 
     DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
     EXPECT_EQ(1u, aubSubCaptureManager.generateToggleFileNameCount);
 }
 
diff --git a/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp b/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp
index 17cc1a2656..3b79406362 100644
--- a/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp
+++ b/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp
@@ -218,8 +218,8 @@ HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAub
 
 HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenActivateAubSubCaptureIsCalledThenBaseCsrCommandStreamReceiverIsCalled) {
     const DispatchInfo dispatchInfo;
-    const MultiDispatchInfo multiDispatchInfo(dispatchInfo);
-
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     csrWithAubDump->activateAubSubCapture(multiDispatchInfo);
 
     EXPECT_TRUE(csrWithAubDump->activateAubSubCaptureParameterization.wasCalled);
diff --git a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
index cfaeef13a5..1625101cca 100644
--- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
+++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
@@ -53,19 +53,19 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDev
 
         size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(pKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
         EXPECT_EQ(0u, dshUsedAfter);
@@ -109,19 +109,18 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDef
 
         auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         auto iohUsed = ioh.getUsed();
         EXPECT_EQ(0u, iohUsed);
@@ -135,20 +134,18 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSH
         const size_t workItems[3] = {1, 1, 1};
 
         MockMultiDispatchInfo multiDispatchInfo(pKernel);
-
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
 
@@ -170,21 +167,20 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz
         const size_t globalOffsets[3] = {0, 0, 0};
         const size_t workItems[3] = {1, 1, 1};
 
-        MockMultiDispatchInfo multiDispatchInfo(pKernel);
+        MultiDispatchInfo multiDispatchInfo(pKernel);
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      true); // blockQueue
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            true);
         ASSERT_NE(nullptr, blockedCommandsData);
 
         size_t minRequiredSize = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -220,7 +216,7 @@ HWTEST_F(ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhen
         MockParentKernel *mockParentKernel = MockParentKernel::create(*context);
 
         DispatchInfo dispatchInfo(mockParentKernel, 1, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{0, 0, 0});
-        MultiDispatchInfo multiDispatchInfo;
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
 
         size_t size = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel);
         size_t numOfKernels = MemoryConstants::pageSize / size;
@@ -269,19 +265,19 @@ HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatched
         const size_t globalOffsets[3] = {0, 0, 0};
         const size_t workItems[3] = {1, 1, 1};
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      true); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            true);
 
         ASSERT_NE(nullptr, blockedCommandsData);
 
@@ -302,19 +298,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
         const size_t globalOffsets[3] = {0, 0, 0};
         const size_t workItems[3] = {1, 1, 1};
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         LinearStream *commandStream = &pCmdQ->getCS(0);
 
@@ -358,19 +354,19 @@ HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedT
         // If parent is not using SSH, then heap obtained has zero usage and the same buffer
         ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize);
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         EXPECT_EQ(0u, ssh.getUsed());
 
@@ -393,19 +389,19 @@ HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatch
 
         auto *bufferMemory = ssh.getCpuBase();
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);
 
         EXPECT_EQ(bufferMemory, ssh.getCpuBase());
 
diff --git a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
index 8f804a9e37..b05ad35b66 100644
--- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
@@ -438,19 +438,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
         const size_t globalOffsets[3] = {0, 0, 0};
         const size_t workItems[3] = {1, 1, 1};
 
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *parentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      device->getPreemptionMode(),
-                                                      true);
+        DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(parentKernel.get());
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            device->getPreemptionMode(),
+            true);
 
         EXPECT_NE(nullptr, blockedCommandsData);
         EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
diff --git a/unit_tests/helpers/dispatch_info_tests.cpp b/unit_tests/helpers/dispatch_info_tests.cpp
index c93d273cbe..1154dc8b56 100644
--- a/unit_tests/helpers/dispatch_info_tests.cpp
+++ b/unit_tests/helpers/dispatch_info_tests.cpp
@@ -189,7 +189,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithRedescribedSurfaces) {
 TEST_F(DispatchInfoTest, MultiDispatchInfoWithNoGeometry) {
     DispatchInfo dispatchInfo;
 
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     EXPECT_FALSE(multiDispatchInfo.empty());
     EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize());
     EXPECT_FALSE(multiDispatchInfo.usesSlm());
@@ -203,7 +204,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithUserGeometry) {
 
     DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset);
 
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     EXPECT_FALSE(multiDispatchInfo.empty());
     EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize());
     EXPECT_TRUE(multiDispatchInfo.usesSlm());
@@ -235,7 +237,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithFullGeometry) {
 
     DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs);
 
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     EXPECT_FALSE(multiDispatchInfo.empty());
     EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize());
     EXPECT_TRUE(multiDispatchInfo.usesSlm());
@@ -283,3 +286,41 @@ TEST_F(DispatchInfoTest, WorkGroupSetGet) {
     EXPECT_EQ(nwgs, dispatchInfo.getNumberOfWorkgroups());
     EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups());
 }
+
+TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParentAndMainKernel) {
+    std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*pContext));
+    std::unique_ptr<MockKernel> baseKernel(MockKernel::create(*pDevice, pProgram));
+    std::unique_ptr<MockKernel> builtInKernel(MockKernel::create(*pDevice, pProgram));
+    builtInKernel->isBuiltIn = true;
+    DispatchInfo parentKernelDispatchInfo(parentKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+    DispatchInfo baseDispatchInfo(baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+    DispatchInfo builtInDispatchInfo(builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+
+    {
+        MultiDispatchInfo multiDispatchInfo(parentKernel.get());
+        multiDispatchInfo.push(parentKernelDispatchInfo);
+        EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+
+    {
+        MultiDispatchInfo multiDispatchInfo(baseKernel.get());
+        multiDispatchInfo.push(builtInDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick bultin kernel
+
+        multiDispatchInfo.push(baseDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+
+    {
+        MultiDispatchInfo multiDispatchInfo;
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel());
+
+        multiDispatchInfo.push(builtInDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+}
diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h
index 7724d75b9c..f38025433b 100644
--- a/unit_tests/mocks/mock_kernel.h
+++ b/unit_tests/mocks/mock_kernel.h
@@ -336,6 +336,7 @@ class MockParentKernel : public Kernel {
         info->patchInfo.threadPayload = threadPayload;
 
         SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
+        *executionEnvironment = {};
         executionEnvironment->HasDeviceEnqueue = 1;
         info->patchInfo.executionEnvironment = executionEnvironment;
 
diff --git a/unit_tests/mocks/mock_mdi.h b/unit_tests/mocks/mock_mdi.h
index 114f4d876b..6a22ada98b 100644
--- a/unit_tests/mocks/mock_mdi.h
+++ b/unit_tests/mocks/mock_mdi.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,7 +28,7 @@ using namespace OCLRT;
 
 class MockMultiDispatchInfo : public MultiDispatchInfo {
   public:
-    MockMultiDispatchInfo(Kernel *kernel) {
+    MockMultiDispatchInfo(Kernel *kernel) : MultiDispatchInfo(kernel) {
         DispatchInfo di(kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0});
         dispatchInfos.push_back(di);
     }
diff --git a/unit_tests/profiling/profiling_tests.cpp b/unit_tests/profiling/profiling_tests.cpp
index 3c2a7045bc..d2d3033b72 100644
--- a/unit_tests/profiling/profiling_tests.cpp
+++ b/unit_tests/profiling/profiling_tests.cpp
@@ -134,7 +134,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     DispatchInfo dispatchInfo;
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     multiDispatchInfo.push(dispatchInfo);
     auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
     auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(true, false, *pCmdQ, multiDispatchInfo);
@@ -547,7 +548,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
     MockKernel kernel(program.get(), kernelInfo, *pDevice);
     DispatchInfo dispatchInfo;
     dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
     multiDispatchInfo.push(dispatchInfo);
     auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
     auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(true, true, *pCmdQ, multiDispatchInfo);