diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp
index 434224c412..dd36d37975 100644
--- a/runtime/command_queue/command_queue.cpp
+++ b/runtime/command_queue/command_queue.cpp
@@ -612,4 +612,24 @@ bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const Ev
 
     return false;
 }
+
+void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo) {
+    if (DebugManager.flags.AUBDumpSubCaptureMode.get()) {
+        auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo);
+        if (!status.isActive) {
+            // make each enqueue blocking when subcapture is not active to split batch buffer
+            blocking = true;
+        } else if (!status.wasActiveInPreviousEnqueue) {
+            // omit timestamp packet dependencies dependencies upon subcapture activation
+            clearAllDependencies = true;
+        }
+    }
+
+    if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) {
+        for (auto &dispatchInfo : multiDispatchInfo) {
+            auto kernelName = dispatchInfo.getKernel()->getKernelInfo().name;
+            getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str());
+        }
+    }
+}
 } // namespace NEO
diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h
index 4e9bfb388e..791df24d89 100644
--- a/runtime/command_queue/command_queue.h
+++ b/runtime/command_queue/command_queue.h
@@ -438,6 +438,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
     void providePerformanceHint(TransferProperties &transferProperties);
     bool queueDependenciesClearRequired() const;
     bool blitEnqueueAllowed(bool queueBlocked, cl_command_type cmdType);
+    void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
 
     Context *context = nullptr;
     Device *device = nullptr;
diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h
index 75e6d05ae7..da51d6361f 100644
--- a/runtime/command_queue/command_queue_hw.h
+++ b/runtime/command_queue/command_queue_hw.h
@@ -328,7 +328,6 @@ class CommandQueueHw : public CommandQueue {
                                       EventsRequest &eventsRequest,
                                       EventBuilder &eventBuilder,
                                       uint32_t taskLevel,
-                                      bool slmUsed,
                                       PrintfHandler *printfHandler);
 
     template <uint32_t commandType>
@@ -339,7 +338,6 @@ class CommandQueueHw : public CommandQueue {
                         TimestampPacketContainer *previousTimestampPacketNodes,
                         std::unique_ptr<KernelOperation> &blockedCommandsData,
                         EventsRequest &eventsRequest,
-                        bool slmUsed,
                         EventBuilder &externalEventBuilder,
                         std::unique_ptr<PrintfHandler> printfHandler);
 
@@ -386,13 +384,15 @@ class CommandQueueHw : public CommandQueue {
                                                  AuxTranslationDirection auxTranslationDirection);
 
     template <uint32_t commandType>
-    LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool profilingRequired,
-                                      bool perfCountersRequired, bool blitEnqueue, bool blockedQueue,
-                                      const MultiDispatchInfo &multiDispatchInfo,
-                                      const EventsRequest &eventsRequest,
+    LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue,
+                                      const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest,
                                       std::unique_ptr<KernelOperation> &blockedCommandsData,
                                       Surface **surfaces, size_t numSurfaces) {
         LinearStream *commandStream = nullptr;
+
+        bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent);
+        bool perfCountersRequired = (this->isPerfCountersEnabled() && eventsRequest.outEvent);
+
         if (isBlockedCommandStreamRequired(commandType, eventsRequest, blockedQueue)) {
             constexpr size_t additionalAllocationSize = CSRequirements::csOverfetchSize;
             constexpr size_t allocationSize = MemoryConstants::pageSize64k - CSRequirements::csOverfetchSize;
@@ -422,11 +422,9 @@ class CommandQueueHw : public CommandQueue {
                                                    size_t bufferSlicePitch,
                                                    size_t hostRowPitch,
                                                    size_t hostSlicePitch);
-    void processDeviceEnqueue(Kernel *parentKernel,
-                              DeviceQueueHw<GfxFamily> *devQueueHw,
+    void processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
                               const MultiDispatchInfo &multiDispatchInfo,
                               TagNode<HwTimeStamps> *hwTimeStamps,
-                              PreemptionMode preemption,
                               bool &blocking);
 
     template <uint32_t commandType>
@@ -434,12 +432,10 @@ class CommandQueueHw : public CommandQueue {
                                    std::unique_ptr<PrintfHandler> &printfHandler,
                                    Event *event,
                                    TagNode<NEO::HwTimeStamps> *&hwTimeStamps,
-                                   Kernel *parentKernel,
                                    bool blockQueue,
                                    DeviceQueueHw<GfxFamily> *devQueueHw,
                                    CsrDependencies &csrDeps,
                                    KernelOperation *blockedCommandsData,
-                                   TimestampPacketContainer &previousTimestampPacketNodes,
-                                   PreemptionMode preemption);
+                                   TimestampPacketContainer &previousTimestampPacketNodes);
 };
 } // namespace NEO
diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h
index 3550b6aa89..83fed699db 100644
--- a/runtime/command_queue/enqueue_common.h
+++ b/runtime/command_queue/enqueue_common.h
@@ -163,12 +163,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
         DBG_LOG(EventsDebugEnable, "enqueueHandler commandType", commandType, "output Event", eventBuilder.getEvent());
     }
 
-    bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
-    bool perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
     std::unique_ptr<KernelOperation> blockedCommandsData;
     std::unique_ptr<PrintfHandler> printfHandler;
-    bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
-    auto preemption = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
     TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
 
     auto blockQueue = false;
@@ -185,23 +181,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
 
     enqueueHandlerHook(commandType, multiDispatchInfo);
 
-    if (DebugManager.flags.AUBDumpSubCaptureMode.get()) {
-        auto status = getGpgpuCommandStreamReceiver().checkAndActivateAubSubCapture(multiDispatchInfo);
-        if (!status.isActive) {
-            // make each enqueue blocking when subcapture is not active to split batch buffer
-            blocking = true;
-        } else if (!status.wasActiveInPreviousEnqueue) {
-            // omit timestamp packet dependencies dependencies upon subcapture activation
-            clearAllDependencies = true;
-        }
-    }
-
-    if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) {
-        for (auto &dispatchInfo : multiDispatchInfo) {
-            auto kernelName = dispatchInfo.getKernel()->getKernelInfo().name;
-            getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str());
-        }
-    }
+    aubCaptureHook(blocking, clearAllDependencies, multiDispatchInfo);
 
     if (DebugManager.flags.MakeEachEnqueueBlocking.get()) {
         blocking = true;
@@ -227,9 +207,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
         }
     }
 
-    auto &commandStream = *obtainCommandStream<commandType>(csrDeps, profilingRequired, perfCountersRequired, blitEnqueue, blockQueue,
-                                                            multiDispatchInfo, eventsRequest, blockedCommandsData, surfacesForResidency,
-                                                            numSurfaceForResidency);
+    auto &commandStream = *obtainCommandStream<commandType>(csrDeps, blitEnqueue, blockQueue, multiDispatchInfo, eventsRequest,
+                                                            blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
     auto commandStreamStart = commandStream.getUsed();
 
     if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
@@ -242,8 +221,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
         processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
     } else if (multiDispatchInfo.empty() == false) {
         processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
-                                               hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
-                                               previousTimestampPacketNodes, preemption);
+                                               hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
+                                               previousTimestampPacketNodes);
     } else if (isCacheFlushCommand(commandType)) {
         processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
     } else if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
@@ -268,7 +247,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
     CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0};
     if (!blockQueue) {
         if (parentKernel) {
-            processDeviceEnqueue(parentKernel, devQueueHw, multiDispatchInfo, hwTimeStamps, preemption, blocking);
+            processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
         }
 
         auto kernelSubmissionRequired = !isCommandWithoutKernel(commandType) && !blitEnqueue;
@@ -285,7 +264,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                 eventsRequest,
                 eventBuilder,
                 taskLevel,
-                slmUsed,
                 printfHandler.get());
 
             if (parentKernel) {
@@ -365,7 +343,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
             &previousTimestampPacketNodes,
             blockedCommandsData,
             eventsRequest,
-            slmUsed,
             eventBuilder,
             std::move(printfHandler));
     }
@@ -393,19 +370,17 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
                                                           std::unique_ptr<PrintfHandler> &printfHandler,
                                                           Event *event,
                                                           TagNode<HwTimeStamps> *&hwTimeStamps,
-                                                          Kernel *parentKernel,
                                                           bool blockQueue,
                                                           DeviceQueueHw<GfxFamily> *devQueueHw,
                                                           CsrDependencies &csrDeps,
                                                           KernelOperation *blockedCommandsData,
-                                                          TimestampPacketContainer &previousTimestampPacketNodes,
-                                                          PreemptionMode preemption) {
+                                                          TimestampPacketContainer &previousTimestampPacketNodes) {
     TagNode<HwPerfCounter> *hwPerfCounter = nullptr;
     DebugManager.dumpKernelArgs(&multiDispatchInfo);
 
     printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
     if (printfHandler) {
-        printfHandler.get()->prepareDispatch(multiDispatchInfo);
+        printfHandler->prepareDispatch(multiDispatchInfo);
     }
 
     if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
@@ -419,7 +394,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
         hwTimeStamps = event->getHwTimeStampNode();
     }
 
-    if (parentKernel) {
+    if (auto parentKernel = multiDispatchInfo.peekParentKernel()) {
         parentKernel->createReflectionSurface();
         parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
         parentKernel->patchEventPool(context->getDefaultDeviceQueue());
@@ -443,7 +418,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
         hwPerfCounter,
         &previousTimestampPacketNodes,
         timestampPacketContainer.get(),
-        preemption,
         commandType);
 
     if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
@@ -502,12 +476,11 @@ void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
 }
 
 template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
-                                                     DeviceQueueHw<GfxFamily> *devQueueHw,
+void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
                                                      const MultiDispatchInfo &multiDispatchInfo,
                                                      TagNode<HwTimeStamps> *hwTimeStamps,
-                                                     PreemptionMode preemption,
                                                      bool &blocking) {
+    auto parentKernel = multiDispatchInfo.peekParentKernel();
     size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSizeRequiredForExecutionModel(IndirectHeap::SURFACE_STATE, *parentKernel);
 
     uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
@@ -531,10 +504,11 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(Kernel *parentKernel,
                       this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
                       devQueueHw->getDebugQueue());
 
+    auto preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
     GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
         *this->commandStream,
         *devQueueHw,
-        preemption,
+        preemptionMode,
         scheduler,
         &getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
         devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE));
@@ -604,7 +578,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
     EventsRequest &eventsRequest,
     EventBuilder &eventBuilder,
     uint32_t taskLevel,
-    bool slmUsed,
     PrintfHandler *printfHandler) {
 
     UNRECOVERABLE_IF(multiDispatchInfo.empty());
@@ -693,7 +666,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
     DispatchFlags dispatchFlags;
     dispatchFlags.blocking = blocking;
     dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC;
-    dispatchFlags.useSLM = slmUsed;
+    dispatchFlags.useSLM = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel();
     dispatchFlags.guardCommandBufferWithPipeControl = true;
     dispatchFlags.GSBA32BitRequired = commandType == CL_COMMAND_NDRANGE_KERNEL;
     dispatchFlags.mediaSamplerRequired = mediaSamplerRequired;
@@ -740,7 +713,6 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
     TimestampPacketContainer *previousTimestampPacketNodes,
     std::unique_ptr<KernelOperation> &blockedCommandsData,
     EventsRequest &eventsRequest,
-    bool slmUsed,
     EventBuilder &externalEventBuilder,
     std::unique_ptr<PrintfHandler> printfHandler) {
 
@@ -790,6 +762,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
             allSurfaces.push_back(surface->duplicate());
         }
         PreemptionMode preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
+        bool slmUsed = multiDispatchInfo.usesSlm() || multiDispatchInfo.peekParentKernel();
         command = std::make_unique<CommandComputeKernel>(*this,
                                                          blockedCommandsData,
                                                          allSurfaces,
diff --git a/runtime/command_queue/hardware_interface.h b/runtime/command_queue/hardware_interface.h
index 40059a19cc..095a6c2f3e 100644
--- a/runtime/command_queue/hardware_interface.h
+++ b/runtime/command_queue/hardware_interface.h
@@ -44,7 +44,6 @@ class HardwareInterface {
         TagNode<HwPerfCounter> *hwPerfCounter,
         TimestampPacketContainer *previousTimestampPacketNodes,
         TimestampPacketContainer *currentTimestampPacketNodes,
-        PreemptionMode preemptionMode,
         uint32_t commandType);
 
     static void getDefaultDshSpace(
diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl
index 5f56d24554..3b3ffdadcb 100644
--- a/runtime/command_queue/hardware_interface_base.inl
+++ b/runtime/command_queue/hardware_interface_base.inl
@@ -32,13 +32,13 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
     TagNode<HwPerfCounter> *hwPerfCounter,
     TimestampPacketContainer *previousTimestampPacketNodes,
     TimestampPacketContainer *currentTimestampPacketNodes,
-    PreemptionMode preemptionMode,
     uint32_t commandType) {
 
     LinearStream *commandStream = nullptr;
     IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr;
     auto parentKernel = multiDispatchInfo.peekParentKernel();
     auto mainKernel = multiDispatchInfo.peekMainKernel();
+    auto preemptionMode = PreemptionHelper::taskPreemptionMode(commandQueue.getDevice(), multiDispatchInfo);
 
     for (auto &dispatchInfo : multiDispatchInfo) {
         // Compute local workgroup sizes
diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp
index df2276c759..7029d75958 100644
--- a/unit_tests/command_queue/dispatch_walker_tests.cpp
+++ b/unit_tests/command_queue/dispatch_walker_tests.cpp
@@ -156,7 +156,6 @@ HWTEST_F(DispatchWalkerTest, shouldntChangeCommandStreamMemory) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
@@ -204,7 +203,6 @@ HWTEST_F(DispatchWalkerTest, noLocalIdsShouldntCrash) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     EXPECT_EQ(commandStreamBuffer, commandStream.getCpuBase());
@@ -234,7 +232,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithDefaultLwsAlgorithm)
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         EXPECT_EQ(dimension, *kernel.workDim);
@@ -265,7 +262,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithSquaredLwsAlgorithm)
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
         EXPECT_EQ(dimension, *kernel.workDim);
     }
@@ -294,7 +290,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithNDLwsAlgorithm) {
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
         EXPECT_EQ(dimension, *kernel.workDim);
     }
@@ -324,7 +319,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterWorkDimensionswithOldLwsAlgorithm) {
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
         EXPECT_EQ(dimension, *kernel.workDim);
     }
@@ -354,7 +348,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNumWorkGroups) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     EXPECT_EQ(2u, *kernel.numWorkGroupsX);
@@ -386,7 +379,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeND) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(2u, *kernel.localWorkSizeX);
     EXPECT_EQ(5u, *kernel.localWorkSizeY);
@@ -417,7 +409,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeND) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(2u, *kernel.localWorkSizeX);
     EXPECT_EQ(5u, *kernel.localWorkSizeY);
@@ -449,7 +440,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithComputeSquared) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(2u, *kernel.localWorkSizeX);
     EXPECT_EQ(5u, *kernel.localWorkSizeY);
@@ -481,7 +471,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterNoLocalWorkSizeWithOutComputeSquaredAn
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(2u, *kernel.localWorkSizeX);
     EXPECT_EQ(5u, *kernel.localWorkSizeY);
@@ -511,7 +500,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSize) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(1u, *kernel.localWorkSizeX);
     EXPECT_EQ(2u, *kernel.localWorkSizeY);
@@ -544,7 +532,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizes) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
     EXPECT_EQ(1u, *kernel.localWorkSizeX);
     EXPECT_EQ(2u, *kernel.localWorkSizeY);
@@ -581,7 +568,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizeForSplitKernel) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto dispatchId = 0;
@@ -632,7 +618,6 @@ HWTEST_F(DispatchWalkerTest, dataParameterLocalWorkSizesForSplitWalker) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     for (auto &dispatchInfo : multiDispatchInfo) {
@@ -684,7 +669,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerDoesntConsumeCommandStreamWhenQueueIs
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto &commandStream = pCmdQ->getCS(1024);
@@ -719,7 +703,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromKernelW
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     Vec3<size_t> localWorkgroupSize(workGroupSize);
@@ -746,7 +729,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAl
 
     CsrDependencies csrDependencies;
     EventsRequest eventsRequest(0, nullptr, nullptr);
-    auto cmdStream = mockCmdQ.template obtainCommandStream<CL_COMMAND_NDRANGE_KERNEL>(csrDependencies, false, false, false, true,
+    auto cmdStream = mockCmdQ.template obtainCommandStream<CL_COMMAND_NDRANGE_KERNEL>(csrDependencies, false, true,
                                                                                       multiDispatchInfo, eventsRequest, blockedKernelData,
                                                                                       nullptr, 0u);
 
@@ -773,7 +756,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerShouldGetRequiredHeapSizesFromMdiWhen
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
@@ -800,7 +782,6 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     EXPECT_NE(nullptr, blockedCommandsData->commandStream->getGraphicsAllocation());
@@ -828,7 +809,6 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     EXPECT_TRUE(csr.getInternalAllocationStorage()->getAllocationsForReuse().peekIsEmpty());
@@ -852,7 +832,6 @@ HWTEST_F(DispatchWalkerTest, dispatchWalkerWithMultipleDispatchInfo) {
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     for (auto &dispatchInfo : multiDispatchInfo) {
@@ -894,7 +873,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto dshAfterMultiDisptach = indirectHeap.getUsed();
@@ -979,7 +957,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -1025,7 +1002,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -1076,7 +1052,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, dispatchWalkerWithMultipleDispat
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -1129,7 +1104,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParse;
@@ -1168,7 +1142,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParse;
@@ -1208,7 +1181,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareInterface<FamilyType>::dispatchWalker(
@@ -1220,7 +1192,6 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParse;
@@ -1243,7 +1214,7 @@ HWTEST_F(DispatchWalkerTest, givenMultiDispatchWhenWhitelistedRegisterForCoheren
     MockMultiDispatchInfo multiDispatchInfo(std::vector<DispatchInfo *>({&di1, &di2}));
 
     HardwareInterface<FamilyType>::dispatchWalker(*pCmdQ, multiDispatchInfo, CsrDependencies(), nullptr, nullptr, nullptr, nullptr, nullptr,
-                                                  pDevice->getPreemptionMode(), CL_COMMAND_NDRANGE_KERNEL);
+                                                  CL_COMMAND_NDRANGE_KERNEL);
 
     hwParser.parseCommands<FamilyType>(cmdStream, 0);
 
@@ -1306,7 +1277,6 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto sizeUsed = cmdStream.getUsed();
@@ -1363,7 +1333,6 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh
         nullptr,
         nullptr,
         nullptr,
-        pDevice->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     auto sizeUsed = cmdStream.getUsed();
diff --git a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
index 2f753d4568..dcfccd0486 100644
--- a/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
+++ b/unit_tests/execution_model/parent_kernel_dispatch_tests.cpp
@@ -53,7 +53,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         size_t dshUsedAfter = pCmdQ->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u).getUsed();
@@ -109,7 +108,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         auto iohUsed = ioh.getUsed();
@@ -135,7 +133,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u);
@@ -171,7 +168,6 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
         ASSERT_NE(nullptr, blockedCommandsData);
 
@@ -283,7 +279,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenBlockedQueueWhenParen
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         ASSERT_NE(nullptr, blockedCommandsData);
@@ -316,7 +311,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenParentKernelWhenDispa
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         LinearStream *commandStream = &pCmdQ->getCS(0);
@@ -374,7 +368,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         EXPECT_EQ(UnitTestHelper<FamilyType>::getDefaultSshUsage(), ssh.getUsed());
@@ -410,7 +403,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenNotUsedSSHHeapWhenPar
             nullptr,
             nullptr,
             nullptr,
-            pDevice->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         EXPECT_EQ(bufferMemory, ssh.getCpuBase());
diff --git a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
index a41fab8b70..c7da186cfd 100644
--- a/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
+++ b/unit_tests/execution_model/submit_blocked_parent_kernel_tests.cpp
@@ -421,7 +421,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedCommand
             nullptr,
             nullptr,
             nullptr,
-            device->getPreemptionMode(),
             CL_COMMAND_NDRANGE_KERNEL);
 
         EXPECT_NE(nullptr, blockedCommandsData);
diff --git a/unit_tests/helpers/timestamp_packet_tests.cpp b/unit_tests/helpers/timestamp_packet_tests.cpp
index 1a102bdb5f..e8b4f85972 100644
--- a/unit_tests/helpers/timestamp_packet_tests.cpp
+++ b/unit_tests/helpers/timestamp_packet_tests.cpp
@@ -390,7 +390,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat
         nullptr,
         nullptr,
         &timestampPacket,
-        device->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -435,7 +434,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketDisabledWh
         nullptr,
         nullptr,
         &timestampPacket,
-        device->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -885,7 +883,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
         nullptr,
         nullptr,
         &timestamp7,
-        device->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;
@@ -969,7 +966,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
         nullptr,
         nullptr,
         &timestamp7,
-        device->getPreemptionMode(),
         CL_COMMAND_NDRANGE_KERNEL);
 
     HardwareParse hwParser;