Remove device enqueue part 1

Remove: -tests with matcher for device enqueue -classes: MockDeviceQueueHw, DeviceQueueHw, SchedulerSimulation, DeviceQueueHwTest, KernelArgDevQueueTest -functions: forceDispatchScheduler, processDeviceEnqueue, dispatchScheduler Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
2026-01-03 06:49:52 +08:00 · 2022-01-03 17:29:57 +00:00
parent 61e5e0687d
commit a9ebb49fb5
61 changed files with 61 additions and 4980 deletions
--- a/opencl/source/builtin_kernels_simulation/CMakeLists.txt
+++ b/opencl/source/builtin_kernels_simulation/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -8,9 +8,6 @@ set(BUILTIN_KERNELS_SIMULATION_SRCS
    ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
    ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h
 )

 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
--- a/opencl/source/builtin_kernels_simulation/scheduler_simulation.h
+++ b/opencl/source/builtin_kernels_simulation/scheduler_simulation.h
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
-
-#include <cstdint>
-#include <thread>
-namespace NEO {
-class GraphicsAllocation;
-}
-
-namespace BuiltinKernelsSimulation {
-
-extern bool conditionReady;
-extern std::thread threads[];
-
-template <typename GfxFamily>
-class SchedulerSimulation {
-  public:
-    void runSchedulerSimulation(NEO::GraphicsAllocation *queue,
-                                NEO::GraphicsAllocation *commandsStack,
-                                NEO::GraphicsAllocation *eventsPool,
-                                NEO::GraphicsAllocation *secondaryBatchBuffer,
-                                NEO::GraphicsAllocation *dsh,
-                                NEO::GraphicsAllocation *reflectionSurface,
-                                NEO::GraphicsAllocation *queueStorageBuffer,
-                                NEO::GraphicsAllocation *ssh,
-                                NEO::GraphicsAllocation *debugQueue);
-
-    void cleanSchedulerSimulation();
-
-    static void startScheduler(uint32_t index,
-                               NEO::GraphicsAllocation *queue,
-                               NEO::GraphicsAllocation *commandsStack,
-                               NEO::GraphicsAllocation *eventsPool,
-                               NEO::GraphicsAllocation *secondaryBatchBuffer,
-                               NEO::GraphicsAllocation *dsh,
-                               NEO::GraphicsAllocation *reflectionSurface,
-                               NEO::GraphicsAllocation *queueStorageBuffer,
-                               NEO::GraphicsAllocation *ssh,
-                               NEO::GraphicsAllocation *debugQueue);
-
-    void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue,
-                                       NEO::GraphicsAllocation *commandsStack,
-                                       NEO::GraphicsAllocation *eventsPool,
-                                       NEO::GraphicsAllocation *secondaryBatchBuffer,
-                                       NEO::GraphicsAllocation *dsh,
-                                       NEO::GraphicsAllocation *reflectionSurface,
-                                       NEO::GraphicsAllocation *queueStorageBuffer,
-                                       NEO::GraphicsAllocation *ssh,
-                                       NEO::GraphicsAllocation *debugQueue);
-
-    static void patchGpGpuWalker(uint secondLevelBatchOffset,
-                                 __global uint *secondaryBatchBuffer,
-                                 uint interfaceDescriptorOffset,
-                                 uint simdSize,
-                                 uint totalLocalWorkSize,
-                                 uint3 dimSize,
-                                 uint3 startPoint,
-                                 uint numberOfHwThreadsPerWg,
-                                 uint indirectPayloadSize,
-                                 uint ioHoffset);
-    static bool enabled;
-    static bool simulationRun;
-};
-
-template <typename GfxFamily>
-bool SchedulerSimulation<GfxFamily>::enabled = true;
-
-template <typename GfxFamily>
-bool SchedulerSimulation<GfxFamily>::simulationRun = false;
-
-} // namespace BuiltinKernelsSimulation
--- a/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl
+++ b/opencl/source/builtin_kernels_simulation/scheduler_simulation.inl
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "shared/source/memory_manager/graphics_allocation.h"
-
-#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
-
-#include <cstdint>
-#include <mutex>
-#include <thread>
-
-using namespace NEO;
-
-namespace BuiltinKernelsSimulation {
-
-template <typename GfxFamily>
-void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
-    threadIDToLocalIDmap.clear();
-    delete pGlobalBarrier;
-}
-
-template <typename GfxFamily>
-void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
-                                                                   GraphicsAllocation *commandsStack,
-                                                                   GraphicsAllocation *eventsPool,
-                                                                   GraphicsAllocation *secondaryBatchBuffer,
-                                                                   GraphicsAllocation *dsh,
-                                                                   GraphicsAllocation *reflectionSurface,
-                                                                   GraphicsAllocation *queueStorageBuffer,
-                                                                   GraphicsAllocation *ssh,
-                                                                   GraphicsAllocation *debugQueue) {
-
-    localSize[0] = NUM_OF_THREADS;
-    localSize[1] = 1;
-    localSize[2] = 1;
-
-    threadIDToLocalIDmap.clear();
-    pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
-
-    // Spawn Thread ID == 0 on main thread
-    for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
-        threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
-    }
-
-    conditionReady = true;
-}
-
-template <typename GfxFamily>
-void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
-                                                            GraphicsAllocation *commandsStack,
-                                                            GraphicsAllocation *eventsPool,
-                                                            GraphicsAllocation *secondaryBatchBuffer,
-                                                            GraphicsAllocation *dsh,
-                                                            GraphicsAllocation *reflectionSurface,
-                                                            GraphicsAllocation *queueStorageBuffer,
-                                                            GraphicsAllocation *ssh,
-                                                            GraphicsAllocation *debugQueue) {
-    simulationRun = true;
-    if (enabled) {
-        initializeSchedulerSimulation(queue,
-                                      commandsStack,
-                                      eventsPool,
-                                      secondaryBatchBuffer,
-                                      dsh,
-                                      reflectionSurface,
-                                      queueStorageBuffer,
-                                      ssh,
-                                      debugQueue);
-
-        // start main thread with LID == 0
-        startScheduler(0,
-                       queue,
-                       commandsStack,
-                       eventsPool,
-                       secondaryBatchBuffer,
-                       dsh,
-                       reflectionSurface,
-                       queueStorageBuffer,
-                       ssh,
-                       debugQueue);
-
-        // Wait for all threads on main thread
-        if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
-
-            for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
-                threads[i].join();
-
-            cleanSchedulerSimulation();
-        }
-    }
-};
-
-} // namespace BuiltinKernelsSimulation
--- a/opencl/source/command_queue/CMakeLists.txt
+++ b/opencl/source/command_queue/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -13,8 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h
    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
@@ -43,8 +41,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
    ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h
    ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h
    ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl
--- a/opencl/source/command_queue/command_queue_hw.h
+++ b/opencl/source/command_queue/command_queue_hw.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -15,7 +15,6 @@
 #include "opencl/source/cl_device/cl_device.h"
 #include "opencl/source/command_queue/command_queue.h"
 #include "opencl/source/command_queue/gpgpu_walker.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/dispatch_info.h"
 #include "opencl/source/helpers/queue_helpers.h"
 #include "opencl/source/mem_obj/mem_obj.h"
@@ -502,8 +501,6 @@ class CommandQueueHw : public CommandQueue {

    bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
    void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
-    void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo);
-    void runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel);
    static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
                                                   size_t *hostOffset,
                                                   const size_t *bufferOrigin,
@@ -513,10 +510,6 @@ class CommandQueueHw : public CommandQueue {
                                                   size_t bufferSlicePitch,
                                                   size_t hostRowPitch,
                                                   size_t hostSlicePitch);
-    void processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
-                              const MultiDispatchInfo &multiDispatchInfo,
-                              TagNodeBase *hwTimeStamps,
-                              bool &blocking);

    template <uint32_t commandType>
    void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
@@ -524,7 +517,6 @@ class CommandQueueHw : public CommandQueue {
                                   Event *event,
                                   TagNodeBase *&hwTimeStamps,
                                   bool blockQueue,
-                                   DeviceQueueHw<GfxFamily> *devQueueHw,
                                   CsrDependencies &csrDeps,
                                   KernelOperation *blockedCommandsData,
                                   TimestampPacketDependencies &timestampPacketDependencies);
--- a/opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl
+++ b/opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl
@@ -1,15 +0,0 @@
-/*
- * Copyright (C) 2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/command_queue/command_queue_hw.h"
-
-namespace NEO {
-
-template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
-}
-} // namespace NEO
--- a/opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl
+++ b/opencl/source/command_queue/command_queue_hw_enabled_device_enqueue.inl
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/command_queue/command_queue_hw.h"
-
-namespace NEO {
-
-template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
-    BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
-    simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
-                                      devQueueHw.getStackBuffer(),
-                                      devQueueHw.getEventPoolBuffer(),
-                                      devQueueHw.getSlbBuffer(),
-                                      devQueueHw.getDshBuffer(),
-                                      parentKernel.getKernelReflectionSurface(),
-                                      devQueueHw.getQueueStorageBuffer(),
-                                      this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
-                                      devQueueHw.getDebugQueue());
-}
-} // namespace NEO
--- a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl
+++ b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -9,7 +9,6 @@

 #include "opencl/extensions/public/cl_ext_private.h"
 #include "opencl/source/command_queue/command_queue_hw_base.inl"
-#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
 #include "opencl/source/memory_manager/resource_surface.h"

 namespace NEO {
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -22,7 +22,6 @@
 #include "shared/source/utilities/tag_allocator.h"

 #include "opencl/source/built_ins/builtins_dispatch_builder.h"
-#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
 #include "opencl/source/command_queue/command_queue_hw.h"
 #include "opencl/source/command_queue/gpgpu_walker.h"
 #include "opencl/source/command_queue/hardware_interface.h"
@@ -67,45 +66,40 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount

    auto auxTranslationMode = AuxTranslationMode::None;

-    if (DebugManager.flags.ForceDispatchScheduler.get()) {
-        forceDispatchScheduler(multiDispatchInfo);
+    kernel->updateAuxTranslationRequired();
+    if (kernel->isAuxTranslationRequired()) {
+        kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
+        multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
+
+        if (!kernelObjsForAuxTranslation.empty()) {
+            auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
+        }
+    }
+
+    if (AuxTranslationMode::Builtin == auxTranslationMode) {
+        auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
+        builtInLock.takeOwnership(builder, this->context);
+
+        dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
+    }
+
+    if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
+        DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
+        builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
+        builder.setKernel(kernel);
+        builder.bake(multiDispatchInfo);
    } else {
+        auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
+        builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);

-        kernel->updateAuxTranslationRequired();
-        if (kernel->isAuxTranslationRequired()) {
-            kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
-            multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
-
-            if (!kernelObjsForAuxTranslation.empty()) {
-                auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
-            }
+        if (multiDispatchInfo.size() == 0) {
+            return;
        }
+    }

-        if (AuxTranslationMode::Builtin == auxTranslationMode) {
-            auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
-            builtInLock.takeOwnership(builder, this->context);
-
-            dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
-        }
-
-        if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
-            DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
-            builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
-            builder.setKernel(kernel);
-            builder.bake(multiDispatchInfo);
-        } else {
-            auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
-            builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
-
-            if (multiDispatchInfo.size() == 0) {
-                return;
-            }
-        }
-
-        if (AuxTranslationMode::Builtin == auxTranslationMode) {
-            UNRECOVERABLE_IF(kernel->isParentKernel);
-            dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
-        }
+    if (AuxTranslationMode::Builtin == auxTranslationMode) {
+        UNRECOVERABLE_IF(kernel->isParentKernel);
+        dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
    }

    if (AuxTranslationMode::Blit == auxTranslationMode) {
@@ -115,35 +109,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
    enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
 }

-template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
-    SchedulerKernel &scheduler = getContext().getSchedulerKernel();
-
-    auto devQueue = this->getContext().getDefaultDeviceQueue();
-    DeviceQueueHw<GfxFamily> *devQueueHw = castToObjectOrAbort<DeviceQueueHw<GfxFamily>>(devQueue);
-
-    DispatchInfo dispatchInfo(devQueue->getClDevice(), &scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
-    Vec3<size_t> workGroupCount = generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize());
-    dispatchInfo.setTotalNumberOfWorkgroups(workGroupCount);
-    dispatchInfo.setNumberOfWorkgroups(workGroupCount);
-
-    scheduler.createReflectionSurface();
-    GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface();
-
-    devQueueHw->resetDeviceQueue();
-
-    scheduler.setArgs(devQueueHw->getQueueBuffer(),
-                      devQueueHw->getStackBuffer(),
-                      devQueueHw->getEventPoolBuffer(),
-                      devQueueHw->getSlbBuffer(),
-                      devQueueHw->getDshBuffer(),
-                      reflectionSurface,
-                      devQueueHw->getQueueStorageBuffer(),
-                      this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation());
-
-    multiDispatchInfo.push(dispatchInfo);
-}
-
 template <typename GfxFamily>
 template <uint32_t commandType>
 void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
@@ -163,8 +128,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
    }

    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
-    auto devQueue = this->getContext().getDefaultDeviceQueue();
-    DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);

    TagNodeBase *hwTimeStamps = nullptr;
    CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
@@ -183,11 +146,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
    auto taskLevel = 0u;
    obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);

-    if (parentKernel && !blockQueue) {
-        while (!devQueueHw->isEMCriticalSectionFree())
-            ;
-    }
-
    enqueueHandlerHook(commandType, multiDispatchInfo);

    bool clearDependenciesForSubCapture = false;
@@ -262,7 +220,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,

    if (multiDispatchInfo.empty() == false) {
        processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
-                                               hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
+                                               hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(),
                                               timestampPacketDependencies);
    } else if (isCacheFlushCommand(commandType)) {
        processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
@@ -310,9 +268,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
        }
    }
    if (!blockQueue) {
-        if (parentKernel) {
-            processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
-        }

        if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
            csrDeps.makeResident(computeCommandStreamReceiver);
@@ -332,15 +287,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
                taskLevel,
                printfHandler.get(),
                getBcsForAuxTranslation());
-
-            if (parentKernel) {
-                computeCommandStreamReceiver.setMediaVFEStateDirty(true);
-
-                if (devQueueHw->getSchedulerReturnInstance() > 0) {
-                    waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
-                    this->runSchedulerSimulation(*devQueueHw, *parentKernel);
-                }
-            }
        } else if (enqueueProperties.isFlushWithoutKernelRequired()) {
            completionStamp = enqueueCommandWithoutKernel(
                surfacesForResidency,
@@ -438,7 +384,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
                                                          Event *event,
                                                          TagNodeBase *&hwTimeStamps,
                                                          bool blockQueue,
-                                                          DeviceQueueHw<GfxFamily> *devQueueHw,
                                                          CsrDependencies &csrDeps,
                                                          KernelOperation *blockedCommandsData,
                                                          TimestampPacketDependencies &timestampPacketDependencies) {
@@ -474,10 +419,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
        parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
        parentKernel->patchEventPool(context->getDefaultDeviceQueue());
        parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
-        if (!blockQueue) {
-            devQueueHw->resetDeviceQueue();
-            devQueueHw->acquireEMCriticalSection();
-        }
    }

    if (event && this->isPerfCountersEnabled()) {
@@ -651,56 +592,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
    EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress);
 }

-template <typename GfxFamily>
-void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
-                                                     const MultiDispatchInfo &multiDispatchInfo,
-                                                     TagNodeBase *hwTimeStamps,
-                                                     bool &blocking) {
-    auto parentKernel = multiDispatchInfo.peekParentKernel();
-    size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
-    bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
-
-    uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
-    devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
-                                            *devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-                                            parentKernel,
-                                            (uint32_t)multiDispatchInfo.size(),
-                                            getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
-                                            taskCount,
-                                            hwTimeStamps,
-                                            isCcsUsed);
-
-    SchedulerKernel &scheduler = getContext().getSchedulerKernel();
-
-    scheduler.setArgs(devQueueHw->getQueueBuffer(),
-                      devQueueHw->getStackBuffer(),
-                      devQueueHw->getEventPoolBuffer(),
-                      devQueueHw->getSlbBuffer(),
-                      devQueueHw->getDshBuffer(),
-                      parentKernel->getKernelReflectionSurface(),
-                      devQueueHw->getQueueStorageBuffer(),
-                      this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
-                      devQueueHw->getDebugQueue());
-
-    auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
-    GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
-        *this->commandStream,
-        *devQueueHw,
-        preemptionMode,
-        scheduler,
-        &getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
-        devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
-        isCcsUsed);
-
-    scheduler.makeResident(getGpgpuCommandStreamReceiver());
-
-    parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver());
-
-    if (parentKernel->isAuxTranslationRequired()) {
-        blocking = true;
-    }
-}
-
 template <typename GfxFamily>
 void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
    auto isQueueBlockedStatus = isQueueBlocked();
@@ -842,17 +733,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
    IndirectHeap *dsh = nullptr;
    IndirectHeap *ioh = nullptr;

-    if (multiDispatchInfo.peekParentKernel()) {
-        DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
-        DEBUG_BREAK_IF(pDevQueue == nullptr);
-        dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
-        // In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
-        ioh = dsh;
-        implicitFlush = true;
-    } else {
-        dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
-        ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
-    }
+    dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
+    ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);

    auto allocNeedsFlushDC = false;
    if (!device->isFullRangeSvm()) {
--- a/opencl/source/command_queue/gpgpu_walker.h
+++ b/opencl/source/command_queue/gpgpu_walker.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -20,7 +20,6 @@
 #include "opencl/source/command_queue/cl_local_work_size.h"
 #include "opencl/source/command_queue/command_queue.h"
 #include "opencl/source/context/context.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/dispatch_info.h"
 #include "opencl/source/helpers/hardware_commands_helper.h"
 #include "opencl/source/helpers/task_information.h"
@@ -84,15 +83,6 @@ class GpgpuWalkerHelper {
        TagNodeBase *timestampPacketNode,
        const RootDeviceEnvironment &rootDeviceEnvironment);

-    static void dispatchScheduler(
-        LinearStream &commandStream,
-        DeviceQueueHw<GfxFamily> &devQueueHw,
-        PreemptionMode preemptionMode,
-        SchedulerKernel &scheduler,
-        IndirectHeap *ssh,
-        IndirectHeap *dsh,
-        bool isCcsUsed);
-
    static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);

  private:
@@ -142,11 +132,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
    if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
        if (heapType == IndirectHeap::SURFACE_STATE) {
            expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
-        } else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
-        {
-            DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
-            DEBUG_BREAK_IF(pDevQueue == nullptr);
-            ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
        }
    }

--- a/opencl/source/command_queue/gpgpu_walker_base.inl
+++ b/opencl/source/command_queue/gpgpu_walker_base.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -191,17 +191,12 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
        return expectedSizeCS;
    }

-    Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
    for (auto &dispatchInfo : multiDispatchInfo) {
        expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
        size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
        expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
        expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
    }
-    if (parentKernel) {
-        SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
-        expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{});
-    }
    if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
        expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
        expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
--- a/opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl
+++ b/opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/command_queue/gpgpu_walker.h"
-
-namespace NEO {
-template <typename GfxFamily>
-void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
-    LinearStream &commandStream,
-    DeviceQueueHw<GfxFamily> &devQueueHw,
-    PreemptionMode preemptionMode,
-    SchedulerKernel &scheduler,
-    IndirectHeap *ssh,
-    IndirectHeap *dsh,
-    bool isCcsUsed) {
-    UNRECOVERABLE_IF(true);
-}
-} // namespace NEO
--- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl
+++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -16,7 +16,6 @@
 #include "shared/source/helpers/simd_helper.h"

 #include "opencl/source/command_queue/gpgpu_walker_base.inl"
-#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
 #include "opencl/source/platform/platform.h"

 namespace NEO {
--- a/opencl/source/device_queue/CMakeLists.txt
+++ b/opencl/source/device_queue/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2018-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 #
@@ -8,10 +8,6 @@ set(RUNTIME_SRCS_DEVICE_QUEUE
    ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h
-    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl
 )
 target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE})
 set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE})
--- a/opencl/source/device_queue/device_queue.cpp
+++ b/opencl/source/device_queue/device_queue.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -12,7 +12,6 @@

 #include "opencl/source/cl_device/cl_device.h"
 #include "opencl/source/context/context.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/dispatch_info.h"
 #include "opencl/source/helpers/queue_helpers.h"

@@ -186,10 +185,6 @@ void DeviceQueue::resetDeviceQueue() {
    return;
 }

-void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
-    return;
-}
-
 IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) {
    return nullptr;
 }
--- a/opencl/source/device_queue/device_queue.h
+++ b/opencl/source/device_queue/device_queue.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -83,7 +83,6 @@ class DeviceQueue : public BaseObject<_device_queue> {
    }

    virtual void resetDeviceQueue();
-    virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
    virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);

    void acquireEMCriticalSection() {
--- a/opencl/source/device_queue/device_queue_hw.h
+++ b/opencl/source/device_queue/device_queue_hw.h
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "shared/source/command_stream/linear_stream.h"
-#include "shared/source/helpers/ptr_math.h"
-#include "shared/source/indirect_heap/indirect_heap.h"
-
-#include "opencl/source/device_queue/device_queue.h"
-#include "opencl/source/kernel/kernel.h"
-#include "opencl/source/program/program.h"
-#include "opencl/source/scheduler/scheduler_kernel.h"
-
-#include "hw_cmds.h"
-
-namespace NEO {
-
-template <typename GfxFamily>
-class DeviceQueueHw : public DeviceQueue {
-    using BaseClass = DeviceQueue;
-    using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
-    using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
-    using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
-    using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
-    using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
-    using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
-    using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
-    using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
-    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
-    using MI_MATH = typename GfxFamily::MI_MATH;
-    using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
-
-  public:
-    DeviceQueueHw(Context *context,
-                  ClDevice *device,
-                  cl_queue_properties &properties) : BaseClass(context, device, properties) {
-        allocateSlbBuffer();
-        offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables;
-        igilQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
-    }
-
-    static DeviceQueue *create(Context *context,
-                               ClDevice *device,
-                               cl_queue_properties &properties) {
-        return new (std::nothrow) DeviceQueueHw<GfxFamily>(context, device, properties);
-    }
-
-    IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override;
-
-    LinearStream *getSlbCS() { return &slbCS; }
-    void resetDSH();
-
-    size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
-
-    void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
-
-    void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
-    void resetDeviceQueue() override;
-    void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
-
-    uint32_t getSchedulerReturnInstance() {
-        return igilQueue->m_controls.m_SchedulerEarlyReturn;
-    }
-
-    static size_t getCSPrefetchSize();
-
-  protected:
-    void allocateSlbBuffer();
-    size_t getMinimumSlbSize();
-    size_t getWaCommandsSize();
-    void addArbCheckCmdWa();
-    void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder);
-    void addLriCmdWa(bool setArbCheck);
-    void addLriCmd(bool setArbCheck);
-    void addPipeControlCmdWa(bool isNoopCmd = false);
-    void initPipeControl(PIPE_CONTROL *pc);
-    void buildSlbDummyCommands();
-    void addDcFlushToPipeControlWa(PIPE_CONTROL *pc);
-
-    void addProfilingEndCmds(uint64_t timestampAddress);
-    static size_t getProfilingEndCmdsSize();
-
-    MOCKABLE_VIRTUAL void addMediaStateClearCmds();
-    static size_t getMediaStateClearCmdsSize();
-
-    static size_t getExecutionModelCleanupSectionSize();
-    static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
-
-    LinearStream slbCS;
-    IGIL_CommandQueue *igilQueue = nullptr;
-};
-} // namespace NEO
--- a/opencl/source/device_queue/device_queue_hw_profiling.inl
+++ b/opencl/source/device_queue/device_queue_hw_profiling.inl
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2018-2021 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#include "opencl/source/command_queue/gpgpu_walker.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
-
-namespace NEO {
-
-template <typename GfxFamily>
-void DeviceQueueHw<GfxFamily>::addProfilingEndCmds(uint64_t timestampAddress) {
-
-    auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL));
-    auto pipeControlCmd = GfxFamily::cmdInitPipeControl;
-    pipeControlCmd.setCommandStreamerStallEnable(true);
-    *pipeControlSpace = pipeControlCmd;
-
-    //low part
-    auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM));
-    auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem;
-    GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(&mICmdLow);
-    mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
-    mICmdLow.setMemoryAddress(timestampAddress);
-    *mICmdLowSpace = mICmdLow;
-}
-} // namespace NEO
--- a/opencl/source/gen11/command_queue_gen11.cpp
+++ b/opencl/source/gen11/command_queue_gen11.cpp
@@ -10,7 +10,6 @@

 #include "opencl/source/command_queue/command_queue_hw.h"
 #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
-#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/enqueue_resource_barrier.h"

 namespace NEO {
--- a/opencl/source/gen11/enable_family_full_ocl_gen11.cpp
+++ b/opencl/source/gen11/enable_family_full_ocl_gen11.cpp
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"
--- a/opencl/source/gen11/gpgpu_walker_gen11.cpp
+++ b/opencl/source/gen11/gpgpu_walker_gen11.cpp
@@ -8,7 +8,6 @@
 #include "shared/source/gen11/hw_info.h"

 #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
-#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"

 namespace NEO {
--- a/opencl/source/gen12lp/command_queue_gen12lp.cpp
+++ b/opencl/source/gen12lp/command_queue_gen12lp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -9,7 +9,6 @@

 #include "opencl/source/command_queue/command_queue_hw.h"
 #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
-#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"

 #include "command_queue_helpers_gen12lp.inl"

--- a/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp
+++ b/opencl/source/gen12lp/enable_family_full_ocl_gen12lp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"
--- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp
+++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -11,7 +11,6 @@
 #include "shared/source/memory_manager/graphics_allocation.h"

 #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
-#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"

 namespace NEO {
--- a/opencl/source/gen8/command_queue_gen8.cpp
+++ b/opencl/source/gen8/command_queue_gen8.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -10,7 +10,6 @@

 #include "opencl/source/command_queue/command_queue_hw.h"
 #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
-#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/enqueue_resource_barrier.h"

 namespace NEO {
--- a/opencl/source/gen8/gpgpu_walker_gen8.cpp
+++ b/opencl/source/gen8/gpgpu_walker_gen8.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -8,7 +8,6 @@
 #include "shared/source/gen8/hw_info.h"

 #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
-#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"

 namespace NEO {
--- a/opencl/source/gen9/command_queue_gen9.cpp
+++ b/opencl/source/gen9/command_queue_gen9.cpp
@@ -10,7 +10,6 @@

 #include "opencl/source/command_queue/command_queue_hw.h"
 #include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
-#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/enqueue_resource_barrier.h"

 namespace NEO {
--- a/opencl/source/gen9/enable_family_full_ocl_gen9.cpp
+++ b/opencl/source/gen9/enable_family_full_ocl_gen9.cpp
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"
--- a/opencl/source/gen9/gpgpu_walker_gen9.cpp
+++ b/opencl/source/gen9/gpgpu_walker_gen9.cpp
@@ -8,7 +8,6 @@
 #include "shared/source/gen9/hw_cmds_base.h"

 #include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
-#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
 #include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"

 namespace NEO {
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -145,7 +145,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
    auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();

    auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
-    bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());

    if (executionModelKernel) {
        while (!devQueue->isEMCriticalSectionFree())
@@ -175,39 +174,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
    }
    makeTimestampPacketsResident(commandStreamReceiver);

-    if (executionModelKernel) {
-        uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
-        devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
-                                              commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
-
-        SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
-
-        scheduler.setArgs(devQueue->getQueueBuffer(),
-                          devQueue->getStackBuffer(),
-                          devQueue->getEventPoolBuffer(),
-                          devQueue->getSlbBuffer(),
-                          dsh->getGraphicsAllocation(),
-                          kernel->getKernelReflectionSurface(),
-                          devQueue->getQueueStorageBuffer(),
-                          ssh->getGraphicsAllocation(),
-                          devQueue->getDebugQueue());
-
-        devQueue->dispatchScheduler(
-            *kernelOperation->commandStream,
-            scheduler,
-            preemptionMode,
-            ssh,
-            dsh,
-            isCcsUsed);
-
-        scheduler.makeResident(commandStreamReceiver);
-
-        // Update SLM usage
-        slmUsed |= scheduler.getSlmTotalSize() > 0;
-
-        this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
-    }
-
    if (kernelOperation->blitPropertiesContainer.size() > 0) {
        CsrDependencies csrDeps;
        eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All);
--- a/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp
+++ b/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"
--- a/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp
+++ b/opencl/source/xe_hpc_core/enable_family_full_ocl_xe_hpc_core.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"
--- a/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp
+++ b/opencl/source/xe_hpg_core/enable_family_full_ocl_xe_hpg_core.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -8,7 +8,6 @@
 #include "shared/source/helpers/populate_factory.h"

 #include "opencl/source/command_queue/command_queue_hw.h"
-#include "opencl/source/device_queue/device_queue_hw.h"
 #include "opencl/source/helpers/cl_hw_helper.h"
 #include "opencl/source/mem_obj/buffer.h"
 #include "opencl/source/mem_obj/image.h"