Refactor querying Main and Parent Kernel from MultiDispatchInfo

Change-Id: I723d91f2f445bc7af1bcb0de46f8ac07837f3449 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
2018-08-16 15:47:25 +02:00 · 2018-08-16 15:47:25 +02:00 · c7a49666d5
parent 58b85e19af
commit c7a49666d5
15 changed files with 353 additions and 318 deletions
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@ -79,7 +79,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
        enqueueHandler<commandType>(surfaces, blocking, MultiDispatchInfo(), numEventsInWaitList, eventWaitList, event);
    } else {
        BuiltInOwnershipWrapper builtInLock;
-        MultiDispatchInfo multiDispatchInfo;
+        MultiDispatchInfo multiDispatchInfo(kernel);

        if (DebugManager.flags.ForceDispatchScheduler.get()) {
            forceDispatchScheduler(multiDispatchInfo);
@ -158,8 +158,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        return;
    }

-    bool executionModelKernel = multiDispatchInfo.empty() ? false : multiDispatchInfo.begin()->getKernel()->isParentKernel;
-    Kernel *parentKernel = executionModelKernel ? multiDispatchInfo.begin()->getKernel() : nullptr;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
    auto devQueue = this->getContext().getDefaultDeviceQueue();
    DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);

@ -205,7 +204,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,

    DBG_LOG(EventsDebugEnable, "blockQueue", blockQueue, "virtualEvent", virtualEvent, "taskLevel", taskLevel);

-    if (executionModelKernel && !blockQueue) {
+    if (parentKernel && !blockQueue) {
        while (!devQueueHw->isEMCriticalSectionFree())
            ;
    }
@ -230,8 +229,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        }

        if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
-            if (multiDispatchInfo.begin()->getKernel()->getProgram()->isKernelDebugEnabled()) {
-                setupDebugSurface(multiDispatchInfo.begin()->getKernel());
+            if (multiDispatchInfo.peekMainKernel()->getProgram()->isKernelDebugEnabled()) {
+                setupDebugSurface(multiDispatchInfo.peekMainKernel());
            }
        }

@ -245,7 +244,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
            }
        }

-        if (executionModelKernel) {
+        if (parentKernel) {
            parentKernel->createReflectionSurface();
            parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
            parentKernel->patchEventPool(context->getDefaultDeviceQueue());
@ -283,13 +282,13 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,

    CompletionStamp completionStamp;
    if (!blockQueue) {
-        if (executionModelKernel) {
-            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+        if (parentKernel) {
+            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*parentKernel));

            uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
            devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
                                                    *devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-                                                    multiDispatchInfo.begin()->getKernel(),
+                                                    parentKernel,
                                                    (uint32_t)multiDispatchInfo.size(),
                                                    taskCount,
                                                    hwTimeStamps);
@ -302,7 +301,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                              devQueueHw->getEventPoolBuffer(),
                              devQueueHw->getSlbBuffer(),
                              devQueueHw->getDshBuffer(),
-                              multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
+                              parentKernel->getKernelReflectionSurface(),
                              devQueueHw->getQueueStorageBuffer(),
                              this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
                              devQueueHw->getDebugQueue());
@ -342,7 +341,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
            }

-            if (executionModelKernel) {
+            if (parentKernel) {
                commandStreamReceiver.overrideMediaVFEStateDirty(true);

                if (devQueueHw->getSchedulerReturnInstance() > 0) {
@ -354,7 +353,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                                                      devQueueHw->getEventPoolBuffer(),
                                                      devQueueHw->getSlbBuffer(),
                                                      devQueueHw->getDshBuffer(),
-                                                      multiDispatchInfo.begin()->getKernel()->getKernelReflectionSurface(),
+                                                      parentKernel->getKernelReflectionSurface(),
                                                      devQueueHw->getQueueStorageBuffer(),
                                                      this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
                                                      devQueueHw->getDebugQueue());
@ -398,8 +397,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
    }

    if (blockQueue) {
-        if (executionModelKernel) {
-            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+        if (parentKernel) {
+            size_t minSizeSSHForEM = KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(const_cast<const Kernel &>(*parentKernel));
            blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
        }

@ -536,9 +535,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(

    IndirectHeap *dsh = nullptr;
    IndirectHeap *ioh = nullptr;
-    const bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;

-    if (executionModelKernel) {
+    if (multiDispatchInfo.peekParentKernel()) {
        DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
        DEBUG_BREAK_IF(pDevQueue == nullptr);
        dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
@ -550,7 +548,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
        ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
    }

-    commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.begin()->getKernel()->getThreadArbitrationPolicy<GfxFamily>());
+    commandStreamReceiver.requestThreadArbitrationPolicy(multiDispatchInfo.peekMainKernel()->getThreadArbitrationPolicy<GfxFamily>());

    DispatchFlags dispatchFlags;
    dispatchFlags.blocking = blocking;
@ -663,7 +661,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
            commandType == CL_COMMAND_NDRANGE_KERNEL,
            std::move(printfHandler),
            preemptionMode,
-            multiDispatchInfo.begin()->getKernel(),
+            multiDispatchInfo.peekMainKernel(),
            (uint32_t)multiDispatchInfo.size()));
        eventBuilder->getEvent()->setCommand(std::move(cmd));
    }
--- a/runtime/command_queue/gpgpu_walker.h
+++ b/runtime/command_queue/gpgpu_walker.h
@ -203,21 +203,6 @@ class GpgpuWalkerHelper {
        bool blockQueue,
        uint32_t commandType = 0);

-    static void dispatchWalker(
-        CommandQueue &commandQueue,
-        const Kernel &kernel,
-        cl_uint workDim,
-        const size_t globalOffsets[3],
-        const size_t workItems[3],
-        const size_t *localWorkSizesIn,
-        cl_uint numEventsInWaitList,
-        const cl_event *eventWaitList,
-        KernelOperation **blockedCommandsData,
-        HwTimeStamps *hwTimeStamps,
-        HwPerfCounter *hwPerfCounter,
-        PreemptionMode preemptionMode,
-        bool blockQueue);
-
    static void dispatchScheduler(
        CommandQueue &commandQueue,
        DeviceQueueHw<GfxFamily> &devQueueHw,
@ -246,11 +231,11 @@ LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfiling
 template <typename GfxFamily, uint32_t eventType>
 LinearStream &getCommandStream(CommandQueue &commandQueue, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, const MultiDispatchInfo &multiDispatchInfo) {
    size_t expectedSizeCS = 0;
-    Kernel *parentKernel = multiDispatchInfo.size() > 0 ? multiDispatchInfo.begin()->getKernel() : nullptr;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
    for (auto &dispatchInfo : multiDispatchInfo) {
        expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, dispatchInfo.getKernel());
    }
-    if (parentKernel && parentKernel->isParentKernel) {
+    if (parentKernel) {
        SchedulerKernel &scheduler = BuiltIns::getInstance().getSchedulerKernel(parentKernel->getContext());
        expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, commandQueue, &scheduler);
    }
@ -270,9 +255,9 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
    }
    // clang-format on

-    if (multiDispatchInfo.begin()->getKernel()->isParentKernel) {
+    if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
        if (heapType == IndirectHeap::SURFACE_STATE) {
-            expectedSize += KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<heapType>(const_cast<const Kernel &>(*(multiDispatchInfo.begin()->getKernel())));
+            expectedSize += KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<heapType>(const_cast<const Kernel &>(*parentKernel));
        } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
        {
            DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
--- a/runtime/command_queue/gpgpu_walker.inl
+++ b/runtime/command_queue/gpgpu_walker.inl
@ -446,7 +446,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(

    OCLRT::LinearStream *commandStream = nullptr;
    OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
-    bool executionModelKernel = multiDispatchInfo.begin()->getKernel()->isParentKernel;
+    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();

    for (auto &dispatchInfo : multiDispatchInfo) {
        // Compute local workgroup sizes
@ -460,7 +460,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
    if (blockQueue) {
        using KCH = KernelCommandsHelper<GfxFamily>;
        commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), MemoryConstants::pageSize);
-        if (executionModelKernel) {
+        if (parentKernel) {
            uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize;

            commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE,
@ -470,7 +470,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
            dsh->getSpace(colorCalcSize);
            ioh = dsh;
            commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE,
-                                            KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*(multiDispatchInfo.begin()->getKernel())) +
+                                            KernelCommandsHelper<GfxFamily>::template getSizeRequiredForExecutionModel<IndirectHeap::SURFACE_STATE>(*parentKernel) +
                                                KCH::getTotalSizeRequiredSSH(multiDispatchInfo),
                                            ssh);
        } else {
@ -482,12 +482,12 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
        using UniqueIH = std::unique_ptr<IndirectHeap>;
        *blockedCommandsData = new KernelOperation(std::unique_ptr<LinearStream>(commandStream), UniqueIH(dsh), UniqueIH(ioh), UniqueIH(ssh),
                                                   *commandQueue.getDevice().getMemoryManager());
-        if (executionModelKernel) {
+        if (parentKernel) {
            (*blockedCommandsData)->doNotFreeISH = true;
        }
    } else {
        commandStream = &commandQueue.getCS(0);
-        if (executionModelKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
+        if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
            commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE);
        }
        dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo);
@ -505,7 +505,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
    size_t numDispatches = multiDispatchInfo.size();
    totalInterfaceDescriptorTableSize *= numDispatches;

-    if (!executionModelKernel) {
+    if (!parentKernel) {
        dsh->getSpace(totalInterfaceDescriptorTableSize);
    } else {
        dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed());
@ -656,27 +656,6 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
    }
 }

-template <typename GfxFamily>
-void GpgpuWalkerHelper<GfxFamily>::dispatchWalker(
-    CommandQueue &commandQueue,
-    const Kernel &kernel,
-    cl_uint workDim,
-    const size_t globalOffsets[3],
-    const size_t workItems[3],
-    const size_t *localWorkSizesIn,
-    cl_uint numEventsInWaitList,
-    const cl_event *eventWaitList,
-    KernelOperation **blockedCommandsData,
-    HwTimeStamps *hwTimeStamps,
-    HwPerfCounter *hwPerfCounter,
-    PreemptionMode preemptionMode,
-    bool blockQueue) {
-
-    DispatchInfo dispatchInfo(const_cast<Kernel *>(&kernel), workDim, workItems, localWorkSizesIn, globalOffsets);
-    GpgpuWalkerHelper<GfxFamily>::dispatchWalker(commandQueue, dispatchInfo, numEventsInWaitList, eventWaitList,
-                                                 blockedCommandsData, hwTimeStamps, hwPerfCounter, preemptionMode, blockQueue);
-}
-
 template <typename GfxFamily>
 void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
    CommandQueue &commandQueue,
--- a/runtime/helpers/dispatch_info.cpp
+++ b/runtime/helpers/dispatch_info.cpp
@ -35,4 +35,15 @@ bool DispatchInfo::usesStatelessPrintfSurface() const {
 uint32_t DispatchInfo::getRequiredScratchSize() const {
    return (kernel == nullptr) ? 0 : kernel->getScratchSize();
 }
+
+Kernel *MultiDispatchInfo::peekMainKernel() const {
+    if (dispatchInfos.size() == 0) {
+        return nullptr;
+    }
+    return mainKernel ? mainKernel : dispatchInfos.begin()->getKernel();
+}
+
+Kernel *MultiDispatchInfo::peekParentKernel() const {
+    return (mainKernel && mainKernel->isParentKernel) ? mainKernel : nullptr;
+}
 } // namespace OCLRT
--- a/runtime/helpers/dispatch_info.h
+++ b/runtime/helpers/dispatch_info.h
@ -79,18 +79,14 @@ class DispatchInfo {
 };

 struct MultiDispatchInfo {
-    MultiDispatchInfo(const DispatchInfo &dispatchInfo) {
-        dispatchInfos.push_back(dispatchInfo);
-    }
-
    ~MultiDispatchInfo() {
        for (MemObj *redescribedSurface : redescribedSurfaces) {
            redescribedSurface->release();
        }
    }

-    MultiDispatchInfo() {
-    }
+    MultiDispatchInfo(Kernel *mainKernel) : mainKernel(mainKernel) {}
+    MultiDispatchInfo() = default;

    MultiDispatchInfo &operator=(const MultiDispatchInfo &) = delete;
    MultiDispatchInfo(const MultiDispatchInfo &) = delete;
@ -149,8 +145,12 @@ struct MultiDispatchInfo {
        redescribedSurfaces.push_back(memObj.release());
    }

+    Kernel *peekParentKernel() const;
+    Kernel *peekMainKernel() const;
+
  protected:
    StackVec<DispatchInfo, 9> dispatchInfos;
    StackVec<MemObj *, 2> redescribedSurfaces;
+    Kernel *mainKernel = nullptr;
 };
 } // namespace OCLRT
--- a/unit_tests/command_queue/dispatch_walker_tests.cpp
+++ b/unit_tests/command_queue/dispatch_walker_tests.cpp
@ -140,13 +140,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, shouldntChangeCommandStreamMemor
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {1, 1, 1};
    cl_uint dimensions = 1;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -188,13 +187,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, noLocalIdsShouldntCrash) {
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {1, 1, 1};
    cl_uint dimensions = 1;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -217,13 +215,13 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
    size_t workItems[3] = {1, 1, 1};
    for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
        workItems[dimension - 1] = 256;
+
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
            *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
            0,
            nullptr,
            nullptr,
@ -231,6 +229,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
            nullptr,
            pDevice->getPreemptionMode(),
            false);
+
        EXPECT_EQ(dimension, *kernel.workDim);
    }
 }
@ -247,13 +246,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
    size_t workItems[3] = {1, 1, 1};
    for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
        workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
            *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
            0,
            nullptr,
            nullptr,
@ -276,13 +274,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
    size_t workItems[3] = {1, 1, 1};
    for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
        workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
            *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
            0,
            nullptr,
            nullptr,
@ -306,13 +303,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
    size_t workItems[3] = {1, 1, 1};
    for (uint32_t dimension = 1; dimension <= 3; ++dimension) {
        workItems[dimension - 1] = 256;
+        DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimension, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
            *pCmdQ,
-            kernel,
-            dimension,
-            globalOffsets,
-            workItems,
-            nullptr,
+            multiDispatchInfo,
            0,
            nullptr,
            nullptr,
@ -335,13 +331,13 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
    size_t workItems[3] = {2, 5, 10};
    size_t workGroupSize[3] = {1, 1, 1};
    cl_uint dimensions = 3;
+
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -349,6 +345,7 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
        nullptr,
        pDevice->getPreemptionMode(),
        false);
+
    EXPECT_EQ(2u, *kernel.numWorkGroupsX);
    EXPECT_EQ(5u, *kernel.numWorkGroupsY);
    EXPECT_EQ(10u, *kernel.numWorkGroupsZ);
@ -366,13 +363,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {2, 5, 10};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -397,13 +393,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {2, 5, 10};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -429,13 +424,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {2, 5, 10};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -461,13 +455,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
    size_t globalOffsets[3] = {0, 0, 0};
    size_t workItems[3] = {2, 5, 10};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, nullptr, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        nullptr,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -491,13 +484,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
    size_t workItems[3] = {2, 5, 10};
    size_t workGroupSize[3] = {1, 2, 3};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -524,13 +516,12 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
    size_t workItems[3] = {2, 5, 10};
    size_t workGroupSize[3] = {1, 2, 3};
    cl_uint dimensions = 3;
+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
        0,
        nullptr,
        nullptr,
@ -649,13 +640,12 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs

    KernelOperation *blockedCommandsData = nullptr;

+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
        0,
        nullptr,
        &blockedCommandsData,
@ -689,13 +679,12 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW

    KernelOperation *blockedCommandsData = nullptr;

+    DispatchInfo dispatchInfo(const_cast<MockKernel *>(&kernel), dimensions, workItems, workGroupSize, globalOffsets);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    GpgpuWalkerHelper<FamilyType>::dispatchWalker(
        *pCmdQ,
-        kernel,
-        dimensions,
-        globalOffsets,
-        workItems,
-        workGroupSize,
+        multiDispatchInfo,
        0,
        nullptr,
        &blockedCommandsData,
--- a/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp
+++ b/unit_tests/command_stream/aub_command_stream_receiver_tests.cpp
@ -452,10 +452,12 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptur
    LinearStream cs(aubExecutionEnvironment->commandBuffer);

    const DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("");
    aubSubCaptureManagerMock->subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManagerMock->setSubCaptureToggleActive(true);
-    aubSubCaptureManagerMock->activateSubCapture(dispatchInfo);
+    aubSubCaptureManagerMock->activateSubCapture(multiDispatchInfo);
    aubCsr->subCaptureManager.reset(aubSubCaptureManagerMock);
    ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled());

@ -572,10 +574,12 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInStandalon
    LinearStream cs(commandBuffer);

    const DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    auto aubSubCaptureManagerMock = new AubSubCaptureManagerMock("");
    aubSubCaptureManagerMock->subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManagerMock->setSubCaptureToggleActive(true);
-    aubSubCaptureManagerMock->activateSubCapture(dispatchInfo);
+    aubSubCaptureManagerMock->activateSubCapture(multiDispatchInfo);
    aubCsr->subCaptureManager.reset(aubSubCaptureManagerMock);
    ASSERT_TRUE(aubCsr->subCaptureManager->isSubCaptureEnabled());

@ -913,7 +917,9 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverInSubCaptur
    aubCsr->subCaptureManager.reset(subCaptureManagerMock);

    const DispatchInfo dispatchInfo;
-    aubCsr->activateAubSubCapture(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
+    aubCsr->activateAubSubCapture(multiDispatchInfo);

    EXPECT_FALSE(aubCsr->subCaptureManager->isSubCaptureEnabled());
 }
--- a/unit_tests/command_stream/aub_subcapture_tests.cpp
+++ b/unit_tests/command_stream/aub_subcapture_tests.cpp
@ -99,14 +99,15 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenActivateSubCaptureIsCalledWi
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    uint32_t kernelCurrentIndex = aubSubCaptureManager.getKernelCurrentIndex();
    ASSERT_EQ(0u, kernelCurrentIndex);

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_EQ(kernelCurrentIndex + 1, aubSubCaptureManager.getKernelCurrentIndex());

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_EQ(kernelCurrentIndex + 2, aubSubCaptureManager.getKernelCurrentIndex());
 }

@ -114,10 +115,11 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenActivateSubCaptureIsCalledBu
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Off;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(active);
    EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -126,11 +128,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenActivateSubCaptu
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(true);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(active);
    EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -139,11 +142,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenActivateSubCaptu
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(false);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(active);
    EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -154,10 +158,11 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(active);
    EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -168,11 +173,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    aubSubCaptureManager.subCaptureFilter.dumpKernelStartIdx = 0;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(active);
    EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -183,11 +189,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    aubSubCaptureManager.subCaptureFilter.dumpKernelStartIdx = 1;
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(active);
    EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -198,12 +205,13 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    aubSubCaptureManager.subCaptureFilter.dumpKernelEndIdx = 0;
    aubSubCaptureManager.setKernelCurrentIndex(1);
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(active);
    EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -214,11 +222,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    aubSubCaptureManager.subCaptureFilter.dumpKernelName = "kernel_name";
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(active);
    EXPECT_TRUE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -229,11 +238,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenActivateSubCaptu
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    aubSubCaptureManager.subCaptureFilter.dumpKernelName = "invalid_kernel_name";
-    bool active = aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    bool active = aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(active);
    EXPECT_FALSE(aubSubCaptureManager.isSubCaptureActive());
 }
@ -255,13 +265,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureKeepsInactiveThenM
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.setSubCaptureIsActive(false);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(false);

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(DebugManager.flags.MakeEachEnqueueBlocking.get());
    EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
    EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
@ -273,13 +284,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureGetsActiveThenDont
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.setSubCaptureIsActive(false);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(true);

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
    EXPECT_TRUE(DebugManager.flags.ForceCsrReprogramming.get());
    EXPECT_FALSE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@ -291,13 +303,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureKeepsActiveThenDon
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.setSubCaptureIsActive(true);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(true);

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_FALSE(DebugManager.flags.ForceCsrFlushing.get());
    EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
    EXPECT_FALSE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@ -309,13 +322,14 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureGetsInactiveThenMa
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);

    aubSubCaptureManager.setSubCaptureIsActive(true);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
    aubSubCaptureManager.setSubCaptureToggleActive(false);

-    aubSubCaptureManager.activateSubCapture(dispatchInfo);
+    aubSubCaptureManager.activateSubCapture(multiDispatchInfo);
    EXPECT_TRUE(DebugManager.flags.ForceCsrFlushing.get());
    EXPECT_FALSE(DebugManager.flags.ForceCsrReprogramming.get());
    EXPECT_TRUE(DebugManager.flags.MakeEachEnqueueBlocking.get());
@ -344,41 +358,49 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerWhenSubCaptureActiveStatesAreDet
 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInOffModeWhenGetSubCaptureFileNameIsCalledThenItReturnsEmptyFileName) {
    AubSubCaptureManagerMock aubSubCaptureManager("");
    DispatchInfo dispatchInfo;
-    EXPECT_STREQ("", aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
+    EXPECT_STREQ("", aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }

 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsSpecifiedThenItReturnsItsName) {
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    std::string externalFileName = "external_file_name.aub";
    aubSubCaptureManager.setExternalFileName(externalFileName);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }

 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsSpecifiedThenItReturnsItsName) {
    AubSubCaptureManagerMock aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    std::string externalFileName = "external_file_name.aub";
    aubSubCaptureManager.setExternalFileName(externalFileName);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
-    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(externalFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }

 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsNotSpecifiedThenItGeneratesFilterFileName) {
    AubSubCaptureManagerMock aubSubCaptureManager("aubfile.aub");

    DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    std::string externalFileName = "";
    aubSubCaptureManager.setExternalFileName(externalFileName);

    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
    std::string filterFileName = aubSubCaptureManager.generateFilterFileName();
-    EXPECT_STREQ(filterFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(dispatchInfo).c_str());
+    EXPECT_STREQ(filterFileName.c_str(), aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo).c_str());
 }

 TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFileNameIsCalledAndExternalFileNameIsNotSpecifiedThenItGeneratesToggleFileName) {
@ -387,7 +409,8 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFil
    DispatchInfo dispatchInfo;
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    std::string externalFileName = "";
    aubSubCaptureManager.setExternalFileName(externalFileName);

@ -407,10 +430,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInFilterModeWhenGetSubCaptureFil
    } aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Filter;
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
    EXPECT_EQ(1u, aubSubCaptureManager.generateFilterFileNameCount);
 }

@ -425,10 +450,12 @@ TEST_F(AubSubCaptureTest, givenSubCaptureManagerInToggleModeWhenGetSubCaptureFil
    } aubSubCaptureManager("");

    DispatchInfo dispatchInfo;
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    aubSubCaptureManager.subCaptureMode = AubSubCaptureManager::SubCaptureMode::Toggle;
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
-    aubSubCaptureManager.getSubCaptureFileName(dispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
+    aubSubCaptureManager.getSubCaptureFileName(multiDispatchInfo);
    EXPECT_EQ(1u, aubSubCaptureManager.generateToggleFileNameCount);
 }

--- a/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp
+++ b/unit_tests/command_stream/command_stream_receiver_with_aub_dump_tests.cpp
@ -218,8 +218,8 @@ HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAub

 HWTEST_P(CommandStreamReceiverWithAubDumpTest, givenCommandStreamReceiverWithAubDumpWhenActivateAubSubCaptureIsCalledThenBaseCsrCommandStreamReceiverIsCalled) {
    const DispatchInfo dispatchInfo;
-    const MultiDispatchInfo multiDispatchInfo(dispatchInfo);
-
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    csrWithAubDump->activateAubSubCapture(multiDispatchInfo);

    EXPECT_TRUE(csrWithAubDump->activateAubSubCaptureParameterization.wasCalled);
--- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
+++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
@ -53,19 +53,19 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDev

        size_t executionModelDSHUsedBefore = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE)->getUsed();

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(pKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
        EXPECT_EQ(0u, dshUsedAfter);
@ -109,19 +109,18 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenDef

        auto &ioh = pCmdQ->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        auto iohUsed = ioh.getUsed();
        EXPECT_EQ(0u, iohUsed);
@ -135,20 +134,18 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsNotBlockedThenSSH
        const size_t workItems[3] = {1, 1, 1};

        MockMultiDispatchInfo multiDispatchInfo(pKernel);
-
-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false);
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);

@ -170,21 +167,20 @@ HWTEST_P(ParentKernelDispatchTest, givenParentKernelWhenQueueIsBlockedThenSSHSiz
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};

-        MockMultiDispatchInfo multiDispatchInfo(pKernel);
+        MultiDispatchInfo multiDispatchInfo(pKernel);

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *pKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      true); // blockQueue
+        DispatchInfo dispatchInfo(pKernel, 1, workItems, nullptr, globalOffsets);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            true);
        ASSERT_NE(nullptr, blockedCommandsData);

        size_t minRequiredSize = KernelCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -220,7 +216,7 @@ HWTEST_F(ParentKernelCommandStreamFixture, GivenDispatchInfoWithParentKernelWhen
        MockParentKernel *mockParentKernel = MockParentKernel::create(*context);

        DispatchInfo dispatchInfo(mockParentKernel, 1, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{24, 1, 1}, Vec3<size_t>{0, 0, 0});
-        MultiDispatchInfo multiDispatchInfo;
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);

        size_t size = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, mockParentKernel);
        size_t numOfKernels = MemoryConstants::pageSize / size;
@ -269,19 +265,19 @@ HWTEST_F(MockParentKernelDispatch, GivenBlockedQueueWhenParentKernelIsDispatched
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      true); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            true);

        ASSERT_NE(nullptr, blockedCommandsData);

@ -302,19 +298,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        LinearStream *commandStream = &pCmdQ->getCS(0);

@ -358,19 +354,19 @@ HWTEST_F(MockParentKernelDispatch, GivenUsedSSHHeapWhenParentKernelIsDispatchedT
        // If parent is not using SSH, then heap obtained has zero usage and the same buffer
        ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.pKernelHeader->SurfaceStateHeapSize);

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(mockParentKernel);
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        EXPECT_EQ(0u, ssh.getUsed());

@ -393,19 +389,19 @@ HWTEST_F(MockParentKernelDispatch, GivenNotUsedSSHHeapWhenParentKernelIsDispatch

        auto *bufferMemory = ssh.getCpuBase();

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *mockParentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      pDevice->getPreemptionMode(),
-                                                      false); // blockQueue
+        DispatchInfo dispatchInfo(mockParentKernel, 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo;
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            pDevice->getPreemptionMode(),
+            false);

        EXPECT_EQ(bufferMemory, ssh.getCpuBase());

--- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
@ -438,19 +438,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
        const size_t globalOffsets[3] = {0, 0, 0};
        const size_t workItems[3] = {1, 1, 1};

-        GpgpuWalkerHelper<FamilyType>::dispatchWalker(*pCmdQ,
-                                                      *parentKernel,
-                                                      1,
-                                                      globalOffsets,
-                                                      workItems,
-                                                      nullptr,
-                                                      0,
-                                                      nullptr,
-                                                      &blockedCommandsData,
-                                                      nullptr,
-                                                      nullptr,
-                                                      device->getPreemptionMode(),
-                                                      true);
+        DispatchInfo dispatchInfo(parentKernel.get(), 1, workItems, nullptr, globalOffsets);
+        MultiDispatchInfo multiDispatchInfo(parentKernel.get());
+        multiDispatchInfo.push(dispatchInfo);
+        GpgpuWalkerHelper<FamilyType>::dispatchWalker(
+            *pCmdQ,
+            multiDispatchInfo,
+            0,
+            nullptr,
+            &blockedCommandsData,
+            nullptr,
+            nullptr,
+            device->getPreemptionMode(),
+            true);

        EXPECT_NE(nullptr, blockedCommandsData);
        EXPECT_EQ(blockedCommandsData->dsh->getMaxAvailableSpace(), mockDevQueue.getDshBuffer()->getUnderlyingBufferSize());
--- a/unit_tests/helpers/dispatch_info_tests.cpp
+++ b/unit_tests/helpers/dispatch_info_tests.cpp
@ -189,7 +189,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithRedescribedSurfaces) {
 TEST_F(DispatchInfoTest, MultiDispatchInfoWithNoGeometry) {
    DispatchInfo dispatchInfo;

-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    EXPECT_FALSE(multiDispatchInfo.empty());
    EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize());
    EXPECT_FALSE(multiDispatchInfo.usesSlm());
@ -203,7 +204,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithUserGeometry) {

    DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset);

-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    EXPECT_FALSE(multiDispatchInfo.empty());
    EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize());
    EXPECT_TRUE(multiDispatchInfo.usesSlm());
@ -235,7 +237,8 @@ TEST_F(DispatchInfoTest, MultiDispatchInfoWithFullGeometry) {

    DispatchInfo dispatchInfo(pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs);

-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    EXPECT_FALSE(multiDispatchInfo.empty());
    EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize());
    EXPECT_TRUE(multiDispatchInfo.usesSlm());
@ -283,3 +286,41 @@ TEST_F(DispatchInfoTest, WorkGroupSetGet) {
    EXPECT_EQ(nwgs, dispatchInfo.getNumberOfWorkgroups());
    EXPECT_EQ(swgs, dispatchInfo.getStartOfWorkgroups());
 }
+
+TEST_F(DispatchInfoTest, givenKernelWhenMultiDispatchInfoIsCreatedThenQueryParentAndMainKernel) {
+    std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*pContext));
+    std::unique_ptr<MockKernel> baseKernel(MockKernel::create(*pDevice, pProgram));
+    std::unique_ptr<MockKernel> builtInKernel(MockKernel::create(*pDevice, pProgram));
+    builtInKernel->isBuiltIn = true;
+    DispatchInfo parentKernelDispatchInfo(parentKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+    DispatchInfo baseDispatchInfo(baseKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+    DispatchInfo builtInDispatchInfo(builtInKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1});
+
+    {
+        MultiDispatchInfo multiDispatchInfo(parentKernel.get());
+        multiDispatchInfo.push(parentKernelDispatchInfo);
+        EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(parentKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+
+    {
+        MultiDispatchInfo multiDispatchInfo(baseKernel.get());
+        multiDispatchInfo.push(builtInDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel()); // dont pick bultin kernel
+
+        multiDispatchInfo.push(baseDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(baseKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+
+    {
+        MultiDispatchInfo multiDispatchInfo;
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekMainKernel());
+
+        multiDispatchInfo.push(builtInDispatchInfo);
+        EXPECT_EQ(nullptr, multiDispatchInfo.peekParentKernel());
+        EXPECT_EQ(builtInKernel.get(), multiDispatchInfo.peekMainKernel());
+    }
+}
--- a/unit_tests/mocks/mock_kernel.h
+++ b/unit_tests/mocks/mock_kernel.h
@ -336,6 +336,7 @@ class MockParentKernel : public Kernel {
        info->patchInfo.threadPayload = threadPayload;

        SPatchExecutionEnvironment *executionEnvironment = new SPatchExecutionEnvironment;
+        *executionEnvironment = {};
        executionEnvironment->HasDeviceEnqueue = 1;
        info->patchInfo.executionEnvironment = executionEnvironment;

--- a/unit_tests/mocks/mock_mdi.h
+++ b/unit_tests/mocks/mock_mdi.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, Intel Corporation
+ * Copyright (c) 2017 - 2018, Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -28,7 +28,7 @@ using namespace OCLRT;

 class MockMultiDispatchInfo : public MultiDispatchInfo {
  public:
-    MockMultiDispatchInfo(Kernel *kernel) {
+    MockMultiDispatchInfo(Kernel *kernel) : MultiDispatchInfo(kernel) {
        DispatchInfo di(kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0});
        dispatchInfos.push_back(di);
    }
--- a/unit_tests/profiling/profiling_tests.cpp
+++ b/unit_tests/profiling/profiling_tests.cpp
@ -134,7 +134,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfilingAndFor
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    DispatchInfo dispatchInfo;
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    multiDispatchInfo.push(dispatchInfo);
    auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, false, nullptr);
    auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(true, false, *pCmdQ, multiDispatchInfo);
@ -547,7 +548,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
    MockKernel kernel(program.get(), kernelInfo, *pDevice);
    DispatchInfo dispatchInfo;
    dispatchInfo.setKernel(&kernel);
-    MultiDispatchInfo multiDispatchInfo(dispatchInfo);
+    MultiDispatchInfo multiDispatchInfo;
+    multiDispatchInfo.push(dispatchInfo);
    multiDispatchInfo.push(dispatchInfo);
    auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, true, true, nullptr);
    auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(true, true, *pCmdQ, multiDispatchInfo);