refactor: adjust file names after pre-gen12 removal 4/n

Related-To: NEO-12681 Signed-off-by: Michał Pryba <michal.pryba@intel.com>
2025-12-30 01:35:20 +08:00 · 2025-01-22 13:10:44 +00:00
parent b8504913e3
commit 3c027cfedc
3 changed files with 680 additions and 695 deletions
--- a/shared/source/command_container/CMakeLists.txt
+++ b/shared/source/command_container/CMakeLists.txt
@@ -10,7 +10,6 @@ set(NEO_CORE_COMMAND_CONTAINER
    ${CMAKE_CURRENT_SOURCE_DIR}/cmdcontainer.h
    ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h
    ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl
-    ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_bdw_and_later.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_enablers.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tgllp_and_later.inl
    ${CMAKE_CURRENT_SOURCE_DIR}/encode_alu_helper.h
--- a/shared/source/command_container/command_encoder_bdw_and_later.inl
+++ b/shared/source/command_container/command_encoder_bdw_and_later.inl
@@ -1,692 +0,0 @@
-/*
- * Copyright (C) 2020-2025 Intel Corporation
- *
- * SPDX-License-Identifier: MIT
- *
- */
-
-#pragma once
-#include "shared/source/command_container/command_encoder.h"
-#include "shared/source/command_container/encode_surface_state.h"
-#include "shared/source/command_stream/linear_stream.h"
-#include "shared/source/command_stream/memory_compression_state.h"
-#include "shared/source/command_stream/preemption.h"
-#include "shared/source/execution_environment/execution_environment.h"
-#include "shared/source/gmm_helper/gmm_helper.h"
-#include "shared/source/helpers/api_specific_config.h"
-#include "shared/source/helpers/cache_policy.h"
-#include "shared/source/helpers/gfx_core_helper.h"
-#include "shared/source/helpers/in_order_cmd_helpers.h"
-#include "shared/source/helpers/pause_on_gpu_properties.h"
-#include "shared/source/helpers/pipe_control_args.h"
-#include "shared/source/helpers/pipeline_select_args.h"
-#include "shared/source/helpers/simd_helper.h"
-#include "shared/source/helpers/state_base_address.h"
-#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
-#include "shared/source/kernel/implicit_args_helper.h"
-
-#include <algorithm>
-
-namespace NEO {
-
-template <typename Family>
-template <typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount,
-                                              const size_t &sizeCrossThreadData, const size_t &sizePerThreadData,
-                                              const RootDeviceEnvironment &rootDeviceEnvironment) {
-    auto grfSize = sizeof(typename Family::GRF);
-    DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0);
-    auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / grfSize);
-    DEBUG_BREAK_IF(numGrfCrossThreadData == 0);
-    pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
-
-    DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0);
-    auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
-
-    // at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0
-    numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
-    pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData);
-}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
-
-    using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
-    using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
-
-    auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
-    auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize();
-    auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize();
-    auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
-    auto pImplicitArgs = args.dispatchInterface->getImplicitArgs();
-
-    auto &hwInfo = args.device->getHardwareInfo();
-    auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
-
-    LinearStream *listCmdBufferStream = container.getCommandStream();
-
-    auto threadGroupDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
-
-    DefaultWalkerType cmd = Family::cmdInitGpgpuWalker;
-    auto idd = Family::cmdInitInterfaceDescriptorData;
-    {
-        auto alloc = args.dispatchInterface->getIsaAllocation();
-        UNRECOVERABLE_IF(nullptr == alloc);
-        auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
-        idd.setKernelStartPointer(offset);
-        idd.setKernelStartPointerHigh(0u);
-    }
-
-    if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
-        idd.setSoftwareExceptionEnable(1);
-    }
-
-    auto numThreadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
-    idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
-
-    EncodeDispatchKernel<Family>::programBarrierEnable(idd,
-                                                       kernelDescriptor,
-                                                       hwInfo);
-    auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
-    idd.setSharedLocalMemorySize(slmSize);
-
-    uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
-    uint32_t bindingTablePointer = 0u;
-    bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
-
-    if (!isBindlessKernel) {
-        container.prepareBindfulSsh();
-        if (bindingTableStateCount > 0u) {
-            auto ssh = args.surfaceStateHeap;
-            if (ssh == nullptr) {
-                ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
-            }
-            bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
-                *ssh,
-                args.dispatchInterface->getSurfaceStateHeapData(),
-                args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
-                kernelDescriptor.payloadMappings.bindingTable.tableOffset));
-        }
-    } else {
-        bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
-        auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize();
-
-        if (sshHeapSize > 0u) {
-            auto ssh = args.surfaceStateHeap;
-            if (ssh == nullptr) {
-                container.prepareBindfulSsh();
-                ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, sshHeapSize, NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
-            }
-            uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
-            if (globalBindlessSsh) {
-                bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
-            }
-
-            DEBUG_BREAK_IF(bindingTableStateCount > 0u);
-
-            // Allocate space for new ssh data
-            auto dstSurfaceState = ssh->getSpace(sshHeapSize);
-            memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize);
-
-            args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset);
-        }
-    }
-    idd.setBindingTablePointer(bindingTablePointer);
-
-    PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
-
-    uint32_t samplerStateOffset = 0;
-    uint32_t samplerCount = 0;
-
-    if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
-        auto dsHeap = args.dynamicStateHeap;
-        if (dsHeap == nullptr) {
-            dsHeap = container.getIndirectHeap(HeapType::dynamicState);
-            auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock());
-            if (dsHeap->getAvailableSpace() <= dshSizeRequired) {
-                dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::dynamicState, dsHeap->getMaxAvailableSpace(), NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
-            }
-        }
-        UNRECOVERABLE_IF(!dsHeap);
-
-        samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
-        samplerStateOffset = EncodeStates<Family>::copySamplerState(dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
-                                                                    kernelDescriptor.payloadMappings.samplerTable.numSamplers,
-                                                                    kernelDescriptor.payloadMappings.samplerTable.borderColor,
-                                                                    args.dispatchInterface->getDynamicStateHeapData(),
-                                                                    args.device->getBindlessHeapsHelper(), args.device->getRootDeviceEnvironment());
-    }
-
-    idd.setSamplerStatePointer(samplerStateOffset);
-    if (!isBindlessKernel) {
-        EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
-    }
-
-    EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData,
-                                             sizePerThreadData, rootDeviceEnvironment);
-
-    uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
-    bool isHwLocalIdGeneration = false;
-    uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, isHwLocalIdGeneration, rootDeviceEnvironment);
-    uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching;
-    uint64_t offsetThreadData = 0u;
-    {
-        auto heapIndirect = container.getIndirectHeap(HeapType::indirectObject);
-        UNRECOVERABLE_IF(!(heapIndirect));
-        heapIndirect->align(Family::cacheLineSize);
-        void *ptr = nullptr;
-        if (args.isKernelDispatchedFromImmediateCmdList) {
-            ptr = container.getHeapWithRequiredSizeAndAlignment(HeapType::indirectObject, iohRequiredSize, DefaultWalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)->getSpace(iohRequiredSize);
-        } else {
-            ptr = container.getHeapSpaceAllowGrow(HeapType::indirectObject, iohRequiredSize);
-        }
-        UNRECOVERABLE_IF(!(ptr));
-        offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast<uint64_t>(heapIndirect->getUsed() - sizeThreadData);
-
-        uint64_t implicitArgsGpuVA = 0u;
-        if (pImplicitArgs) {
-            implicitArgsGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heapIndirect->getUsed() - iohRequiredSize);
-            auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
-            *implicitArgsCrossThreadPtr = implicitArgsGpuVA;
-
-            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
-        }
-
-        memcpy_s(ptr, sizeCrossThreadData,
-                 args.dispatchInterface->getCrossThreadData(), sizeCrossThreadData);
-
-        if (args.isIndirect) {
-            auto crossThreadDataGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData;
-            EncodeIndirectParams<Family>::encode(container, crossThreadDataGpuVA, args.dispatchInterface, implicitArgsGpuVA);
-        }
-
-        ptr = ptrOffset(ptr, sizeCrossThreadData);
-        memcpy_s(ptr, sizePerThreadDataForWholeGroup,
-                 args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
-    }
-
-    uint32_t numIDD = 0u;
-    void *iddPtr = getInterfaceDescriptor(container, args.dynamicStateHeap, numIDD);
-
-    auto slmSizeNew = args.dispatchInterface->getSlmTotalSize();
-    bool dirtyHeaps = container.isAnyHeapDirty();
-    bool flush = container.slmSizeRef() != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs;
-
-    if (flush) {
-        PipeControlArgs syncArgs;
-        syncArgs.dcFlushEnable = args.dcFlushEnable;
-        if (dirtyHeaps) {
-            syncArgs.hdcPipelineFlush = true;
-        }
-        MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
-
-        if (dirtyHeaps || args.requiresUncachedMocs) {
-            STATE_BASE_ADDRESS sba;
-            auto gmmHelper = container.getDevice()->getGmmHelper();
-            uint32_t statelessMocsIndex =
-                args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
-            auto l1CachePolicy = container.l1CachePolicyDataRef()->getL1CacheValue(false);
-            auto l1CachePolicyDebuggerActive = container.l1CachePolicyDataRef()->getL1CacheValue(true);
-            EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
-                &container,                  // container
-                sba,                         // sbaCmd
-                nullptr,                     // sbaProperties
-                statelessMocsIndex,          // statelessMocsIndex
-                l1CachePolicy,               // l1CachePolicy
-                l1CachePolicyDebuggerActive, // l1CachePolicyDebuggerActive
-                false,                       // multiOsContextCapable
-                args.isRcs,                  // isRcs
-                container.doubleSbaWaRef(),  // doubleSbaWa
-                false,                       // heaplessModeEnabled
-            };
-            EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
-            container.setDirtyStateForAllHeaps(false);
-            args.requiresUncachedMocs = false;
-        }
-
-        if (container.slmSizeRef() != slmSizeNew) {
-            EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
-            container.slmSizeRef() = slmSizeNew;
-        }
-    }
-
-    if (numIDD == 0 || flush) {
-        EncodeMediaInterfaceDescriptorLoad<Family>::encode(container, args.dynamicStateHeap);
-    }
-
-    cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
-    cmd.setIndirectDataLength(sizeThreadData);
-    cmd.setInterfaceDescriptorOffset(numIDD);
-
-    EncodeDispatchKernel<Family>::encodeThreadData(cmd,
-                                                   nullptr,
-                                                   threadGroupDims,
-                                                   args.dispatchInterface->getGroupSize(),
-                                                   kernelDescriptor.kernelAttributes.simdSize,
-                                                   kernelDescriptor.kernelAttributes.numLocalIdChannels,
-                                                   numThreadsPerThreadGroup,
-                                                   args.dispatchInterface->getThreadExecutionMask(),
-                                                   true,
-                                                   false,
-                                                   args.isIndirect,
-                                                   args.dispatchInterface->getRequiredWorkgroupOrder(),
-                                                   rootDeviceEnvironment);
-
-    cmd.setPredicateEnable(args.isPredicate);
-
-    auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
-    EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
-
-    EncodeWalkerArgs walkerArgs{
-        .kernelExecutionType = KernelExecutionType::defaultType,
-        .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
-        .localRegionSize = args.localRegionSize,
-        .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
-        .requiredSystemFence = args.requiresSystemMemoryFence(),
-        .hasSample = false};
-
-    EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
-    EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
-    EncodeDispatchKernel<Family>::template encodeComputeDispatchAllWalker<WalkerType, INTERFACE_DESCRIPTOR_DATA>(cmd, nullptr, rootDeviceEnvironment, walkerArgs);
-
-    memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
-
-    if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
-        void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false));
-        args.additionalCommands->push_back(commandBuffer);
-
-        EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
-    }
-
-    auto buffer = listCmdBufferStream->getSpaceForCmd<DefaultWalkerType>();
-    *buffer = cmd;
-
-    {
-        auto mediaStateFlush = listCmdBufferStream->getSpaceForCmd<MEDIA_STATE_FLUSH>();
-        *mediaStateFlush = Family::cmdInitMediaStateFlush;
-    }
-
-    args.partitionCount = 1;
-
-    if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
-        void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false));
-        args.additionalCommands->push_back(commandBuffer);
-
-        EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
-    }
-}
-
-template <typename Family>
-void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &container, IndirectHeap *childDsh) {
-    using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
-    using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
-    void *heapBase = nullptr;
-    if (childDsh != nullptr) {
-        heapBase = childDsh->getCpuBase();
-    } else {
-        heapBase = container.getIndirectHeap(HeapType::dynamicState)->getCpuBase();
-    }
-
-    auto mediaStateFlush = container.getCommandStream()->getSpaceForCmd<MEDIA_STATE_FLUSH>();
-    *mediaStateFlush = Family::cmdInitMediaStateFlush;
-
-    auto iddOffset = static_cast<uint32_t>(ptrDiff(container.getIddBlock(), heapBase));
-
-    MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad;
-    cmd.setInterfaceDescriptorDataStartAddress(iddOffset);
-    cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock());
-
-    auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
-    *(decltype(cmd) *)buffer = cmd;
-}
-
-template <typename Family>
-inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
-                                                                              const size_t *lws,
-                                                                              std::array<uint8_t, 3> walkOrder,
-                                                                              bool requireInputWalkOrder,
-                                                                              uint32_t &requiredWalkOrder,
-                                                                              uint32_t simd) {
-    requiredWalkOrder = 0u;
-    return true;
-}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
-                                                    const uint32_t *startWorkGroup,
-                                                    const uint32_t *numWorkGroups,
-                                                    const uint32_t *workGroupSizes,
-                                                    uint32_t simd,
-                                                    uint32_t localIdDimensions,
-                                                    uint32_t threadsPerThreadGroup,
-                                                    uint32_t threadExecutionMask,
-                                                    bool localIdsGenerationByRuntime,
-                                                    bool inlineDataProgrammingRequired,
-                                                    bool isIndirect,
-                                                    uint32_t requiredWorkGroupOrder,
-                                                    const RootDeviceEnvironment &rootDeviceEnvironment) {
-
-    if (isIndirect) {
-        walkerCmd.setIndirectParameterEnable(true);
-    } else {
-        walkerCmd.setThreadGroupIdXDimension(static_cast<uint32_t>(numWorkGroups[0]));
-        walkerCmd.setThreadGroupIdYDimension(static_cast<uint32_t>(numWorkGroups[1]));
-        walkerCmd.setThreadGroupIdZDimension(static_cast<uint32_t>(numWorkGroups[2]));
-    }
-
-    if (startWorkGroup) {
-        walkerCmd.setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroup[0]));
-        walkerCmd.setThreadGroupIdStartingY(static_cast<uint32_t>(startWorkGroup[1]));
-        walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
-    }
-
-    walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
-
-    auto localWorkSize = static_cast<uint32_t>(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]);
-    if (threadsPerThreadGroup == 0) {
-        threadsPerThreadGroup = getThreadsPerWG(simd, localWorkSize);
-    }
-    walkerCmd.setThreadWidthCounterMaximum(threadsPerThreadGroup);
-
-    uint64_t executionMask = threadExecutionMask;
-    if (executionMask == 0) {
-        auto remainderSimdLanes = localWorkSize & (simd - 1);
-        executionMask = maxNBitValue(remainderSimdLanes);
-        if (!executionMask)
-            executionMask = ~executionMask;
-    }
-
-    constexpr uint32_t maxDword = std::numeric_limits<uint32_t>::max();
-    walkerCmd.setRightExecutionMask(static_cast<uint32_t>(executionMask));
-    walkerCmd.setBottomExecutionMask(maxDword);
-}
-
-template <typename Family>
-template <typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
-                                                        const KernelDescriptor &kernelDescriptor,
-                                                        const HardwareInfo &hwInfo) {
-    interfaceDescriptor.setBarrierEnable(kernelDescriptor.kernelAttributes.barrierCount);
-}
-
-template <typename Family>
-template <typename WalkerType>
-inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
-
-template <typename Family>
-template <typename WalkerType>
-inline void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
-
-template <typename Family>
-template <typename WalkerType, typename InterfaceDescriptorType>
-inline void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {}
-
-template <typename Family>
-template <typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
-
-template <typename Family>
-inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
-    return true;
-}
-
-template <typename Family>
-void EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
-    sbaAddress.indirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress();
-    sbaAddress.bindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress();
-    sbaAddress.dynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress();
-    sbaAddress.generalStateBaseAddress = sbaCmd.getGeneralStateBaseAddress();
-    sbaAddress.instructionBaseAddress = sbaCmd.getInstructionBaseAddress();
-    sbaAddress.surfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress();
-}
-
-template <typename Family>
-void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &args) {
-    auto &device = *args.container->getDevice();
-
-    if (args.container->isAnyHeapDirty()) {
-        EncodeWA<Family>::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, true, device.getRootDeviceEnvironment(), args.isRcs);
-    }
-
-    auto gmmHelper = device.getGmmHelper();
-
-    auto dsh = args.container->isHeapDirty(HeapType::dynamicState) ? args.container->getIndirectHeap(HeapType::dynamicState) : nullptr;
-    auto ioh = args.container->isHeapDirty(HeapType::indirectObject) ? args.container->getIndirectHeap(HeapType::indirectObject) : nullptr;
-    auto ssh = args.container->isHeapDirty(HeapType::surfaceState) ? args.container->getIndirectHeap(HeapType::surfaceState) : nullptr;
-    auto isDebuggerActive = device.getDebugger() != nullptr;
-    uint64_t globalHeapsBase = 0;
-    uint64_t bindlessSurfStateBase = 0;
-    bool useGlobalSshAndDsh = false;
-
-    if (device.getBindlessHeapsHelper()) {
-        bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
-        globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
-        useGlobalSshAndDsh = true;
-    }
-
-    StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
-        0,                                                  // generalStateBaseAddress
-        args.container->getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress
-        args.container->getInstructionHeapBaseAddress(),    // instructionHeapBaseAddress
-        globalHeapsBase,                                    // globalHeapsBaseAddress
-        0,                                                  // surfaceStateBaseAddress
-        bindlessSurfStateBase,                              // bindlessSurfaceStateBaseAddress
-        &args.sbaCmd,                                       // stateBaseAddressCmd
-        args.sbaProperties,                                 // sbaProperties
-        dsh,                                                // dsh
-        ioh,                                                // ioh
-        ssh,                                                // ssh
-        gmmHelper,                                          // gmmHelper
-        args.statelessMocsIndex,                            // statelessMocsIndex
-        args.l1CachePolicy,                                 // l1CachePolicy
-        args.l1CachePolicyDebuggerActive,                   // l1CachePolicyDebuggerActive
-        NEO::MemoryCompressionState::notApplicable,         // memoryCompressionState
-        false,                                              // setInstructionStateBaseAddress
-        false,                                              // setGeneralStateBaseAddress
-        useGlobalSshAndDsh,                                 // useGlobalHeapsBaseAddress
-        false,                                              // isMultiOsContextCapable
-        false,                                              // areMultipleSubDevicesInContext
-        false,                                              // overrideSurfaceStateBaseAddress
-        isDebuggerActive,                                   // isDebuggerActive
-        args.doubleSbaWa,                                   // doubleSbaWa
-        args.heaplessModeEnabled                            // heaplessModeEnabled
-    };
-
-    StateBaseAddressHelper<Family>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs,
-                                                                             *args.container->getCommandStream());
-
-    EncodeWA<Family>::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, false, device.getRootDeviceEnvironment(), args.isRcs);
-}
-
-template <typename Family>
-size_t EncodeStateBaseAddress<Family>::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs) {
-    return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA<Family>::getAdditionalPipelineSelectSize(device, isRcs);
-}
-
-template <typename GfxFamily>
-void EncodeMiFlushDW<GfxFamily>::adjust(MI_FLUSH_DW *miFlushDwCmd, const ProductHelper &productHelper) {}
-
-template <typename GfxFamily>
-inline void EncodeWA<GfxFamily>::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args,
-                                                                               const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs) {
-    MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStream, args);
-}
-
-template <typename GfxFamily>
-inline void EncodeWA<GfxFamily>::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane) {
-}
-
-template <typename Family>
-void EncodeSurfaceState<Family>::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) {
-    surfaceState->setCoherencyType(coherencyType);
-}
-
-template <typename Family>
-void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
-                                                     uint64_t compareAddress,
-                                                     uint64_t compareData,
-                                                     COMPARE_OPERATION compareMode,
-                                                     bool registerPollMode,
-                                                     bool waitMode,
-                                                     bool useQwordData,
-                                                     bool indirect,
-                                                     bool switchOnUnsuccessful) {
-    constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
-    UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
-    UNRECOVERABLE_IF(indirect);
-
-    MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
-    localCmd.setCompareOperation(compareMode);
-    localCmd.setSemaphoreDataDword(static_cast<uint32_t>(compareData));
-    localCmd.setSemaphoreGraphicsAddress(compareAddress);
-    localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE);
-
-    *cmd = localCmd;
-}
-
-template <typename GfxFamily>
-void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
-}
-
-template <typename Family>
-inline void EncodeStoreMemory<Family>::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer,
-                                                           uint64_t gpuAddress,
-                                                           uint32_t dataDword0,
-                                                           uint32_t dataDword1,
-                                                           bool storeQword,
-                                                           bool workloadPartitionOffset) {
-    MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm;
-    storeDataImmediate.setAddress(gpuAddress);
-    storeDataImmediate.setStoreQword(storeQword);
-    storeDataImmediate.setDataDword0(dataDword0);
-    if (storeQword) {
-        storeDataImmediate.setDataDword1(dataDword1);
-        storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD);
-    } else {
-        storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
-    }
-    EncodeStoreMemory<Family>::encodeForceCompletionCheck(storeDataImmediate);
-
-    *cmdBuffer = storeDataImmediate;
-}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
-
-template <typename Family>
-size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {
-    return iddCount * sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA);
-}
-
-template <typename Family>
-inline size_t EncodeDispatchKernel<Family>::getInlineDataOffset(EncodeDispatchKernelArgs &args) {
-    return 0;
-}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
-}
-
-template <typename Family>
-uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
-    if (slmSize == 0u) {
-        return 0u;
-    }
-    slmSize = std::max(slmSize, 1024u);
-    slmSize = Math::nextPowerOfTwo(slmSize);
-    UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
-    return slmSize;
-}
-
-template <typename Family>
-uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
-    auto value = std::max(slmSize, 1024u);
-    value = Math::nextPowerOfTwo(value);
-    value = Math::getMinLsbSet(value);
-    value = value - 9;
-    DEBUG_BREAK_IF(value > 7);
-    return value * !!slmSize;
-}
-
-template <typename Family>
-bool EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) {
-    return cooperativeKernel;
-}
-
-template <typename Family>
-size_t EncodeStates<Family>::getSshHeapSize() {
-    return 64 * MemoryConstants::kiloByte;
-}
-
-template <typename Family>
-void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t appendCounterValue) {
-    UNRECOVERABLE_IF(true);
-}
-
-template <typename Family>
-template <typename WalkerType, typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
-}
-
-template <typename Family>
-template <typename WalkerType, typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
-                                                             const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup,
-                                                             WalkerType &walkerCmd) {
-}
-
-template <typename Family>
-size_t EncodeDispatchKernel<Family>::getScratchPtrOffsetOfImplicitArgs() {
-    return 0;
-}
-
-template <typename Family>
-void EncodeSurfaceState<Family>::setPitchForScratch(R_SURFACE_STATE *surfaceState, uint32_t pitch, const ProductHelper &productHelper) {
-    surfaceState->setSurfacePitch(pitch);
-}
-
-template <typename Family>
-uint32_t EncodeSurfaceState<Family>::getPitchForScratchInBytes(R_SURFACE_STATE *surfaceState, const ProductHelper &productHelper) {
-    return surfaceState->getSurfacePitch();
-}
-
-template <typename Family>
-void EncodeSemaphore<Family>::appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool indirect, bool useQwordData, bool switchOnUnsuccessful) {
-    constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
-    UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
-}
-
-template <typename Family>
-template <bool isHeapless>
-void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr) {
-}
-
-template <typename Family>
-template <typename InterfaceDescriptorType>
-void EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy) {
-}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {}
-
-template <typename Family>
-template <typename WalkerType>
-void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
-} // namespace NEO
--- a/shared/source/gen12lp/command_encoder_gen12lp.cpp
+++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2024 Intel Corporation
+ * Copyright (C) 2020-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -7,20 +7,37 @@

 #include "shared/source/command_container/command_encoder.h"
 #include "shared/source/command_container/command_encoder.inl"
-#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
 #include "shared/source/command_container/command_encoder_from_gen12lp_to_xe2_hpg.inl"
 #include "shared/source/command_container/command_encoder_gen12lp_and_xe_hpg.inl"
 #include "shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl"
 #include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
+#include "shared/source/command_container/encode_surface_state.h"
+#include "shared/source/command_stream/linear_stream.h"
+#include "shared/source/command_stream/memory_compression_state.h"
+#include "shared/source/command_stream/preemption.h"
 #include "shared/source/command_stream/stream_properties.h"
+#include "shared/source/execution_environment/execution_environment.h"
 #include "shared/source/gen12lp/hw_cmds_base.h"
 #include "shared/source/gen12lp/reg_configs.h"
 #include "shared/source/gmm_helper/gmm_helper.h"
+#include "shared/source/helpers/api_specific_config.h"
+#include "shared/source/helpers/cache_policy.h"
+#include "shared/source/helpers/gfx_core_helper.h"
+#include "shared/source/helpers/in_order_cmd_helpers.h"
+#include "shared/source/helpers/pause_on_gpu_properties.h"
+#include "shared/source/helpers/pipe_control_args.h"
+#include "shared/source/helpers/pipeline_select_args.h"
 #include "shared/source/helpers/preamble.h"
+#include "shared/source/helpers/simd_helper.h"
+#include "shared/source/helpers/state_base_address.h"
+#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
+#include "shared/source/kernel/implicit_args_helper.h"
 #include "shared/source/release_helper/release_helper.h"

 #include "encode_surface_state_args.h"

+#include <algorithm>
+
 using Family = NEO::Gen12LpFamily;

 #include "shared/source/command_container/command_encoder_heap_addressing.inl"
@@ -28,6 +45,667 @@ using Family = NEO::Gen12LpFamily;

 namespace NEO {

+template <typename Family>
+template <typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount,
+                                              const size_t &sizeCrossThreadData, const size_t &sizePerThreadData,
+                                              const RootDeviceEnvironment &rootDeviceEnvironment) {
+    auto grfSize = sizeof(typename Family::GRF);
+    DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0);
+    auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / grfSize);
+    DEBUG_BREAK_IF(numGrfCrossThreadData == 0);
+    pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
+
+    DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0);
+    auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
+
+    // at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0
+    numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
+    pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData);
+}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
+
+    using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
+    using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
+
+    auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
+    auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize();
+    auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize();
+    auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
+    auto pImplicitArgs = args.dispatchInterface->getImplicitArgs();
+
+    auto &hwInfo = args.device->getHardwareInfo();
+    auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
+
+    LinearStream *listCmdBufferStream = container.getCommandStream();
+
+    auto threadGroupDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
+
+    DefaultWalkerType cmd = Family::cmdInitGpgpuWalker;
+    auto idd = Family::cmdInitInterfaceDescriptorData;
+    {
+        auto alloc = args.dispatchInterface->getIsaAllocation();
+        UNRECOVERABLE_IF(nullptr == alloc);
+        auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
+        idd.setKernelStartPointer(offset);
+        idd.setKernelStartPointerHigh(0u);
+    }
+
+    if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
+        idd.setSoftwareExceptionEnable(1);
+    }
+
+    auto numThreadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
+    idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
+
+    EncodeDispatchKernel<Family>::programBarrierEnable(idd,
+                                                       kernelDescriptor,
+                                                       hwInfo);
+    auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
+    idd.setSharedLocalMemorySize(slmSize);
+
+    uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
+    uint32_t bindingTablePointer = 0u;
+    bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
+
+    if (!isBindlessKernel) {
+        container.prepareBindfulSsh();
+        if (bindingTableStateCount > 0u) {
+            auto ssh = args.surfaceStateHeap;
+            if (ssh == nullptr) {
+                ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
+            }
+            bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
+                *ssh,
+                args.dispatchInterface->getSurfaceStateHeapData(),
+                args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
+                kernelDescriptor.payloadMappings.bindingTable.tableOffset));
+        }
+    } else {
+        bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
+        auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize();
+
+        if (sshHeapSize > 0u) {
+            auto ssh = args.surfaceStateHeap;
+            if (ssh == nullptr) {
+                container.prepareBindfulSsh();
+                ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, sshHeapSize, NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
+            }
+            uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
+            if (globalBindlessSsh) {
+                bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
+            }
+
+            DEBUG_BREAK_IF(bindingTableStateCount > 0u);
+
+            // Allocate space for new ssh data
+            auto dstSurfaceState = ssh->getSpace(sshHeapSize);
+            memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize);
+
+            args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset);
+        }
+    }
+    idd.setBindingTablePointer(bindingTablePointer);
+
+    PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
+
+    uint32_t samplerStateOffset = 0;
+    uint32_t samplerCount = 0;
+
+    if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
+        auto dsHeap = args.dynamicStateHeap;
+        if (dsHeap == nullptr) {
+            dsHeap = container.getIndirectHeap(HeapType::dynamicState);
+            auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock());
+            if (dsHeap->getAvailableSpace() <= dshSizeRequired) {
+                dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::dynamicState, dsHeap->getMaxAvailableSpace(), NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
+            }
+        }
+        UNRECOVERABLE_IF(!dsHeap);
+
+        samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
+        samplerStateOffset = EncodeStates<Family>::copySamplerState(dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
+                                                                    kernelDescriptor.payloadMappings.samplerTable.numSamplers,
+                                                                    kernelDescriptor.payloadMappings.samplerTable.borderColor,
+                                                                    args.dispatchInterface->getDynamicStateHeapData(),
+                                                                    args.device->getBindlessHeapsHelper(), args.device->getRootDeviceEnvironment());
+    }
+
+    idd.setSamplerStatePointer(samplerStateOffset);
+    if (!isBindlessKernel) {
+        EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
+    }
+
+    EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData,
+                                             sizePerThreadData, rootDeviceEnvironment);
+
+    uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
+    bool isHwLocalIdGeneration = false;
+    uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, isHwLocalIdGeneration, rootDeviceEnvironment);
+    uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching;
+    uint64_t offsetThreadData = 0u;
+    {
+        auto heapIndirect = container.getIndirectHeap(HeapType::indirectObject);
+        UNRECOVERABLE_IF(!(heapIndirect));
+        heapIndirect->align(Family::cacheLineSize);
+        void *ptr = nullptr;
+        if (args.isKernelDispatchedFromImmediateCmdList) {
+            ptr = container.getHeapWithRequiredSizeAndAlignment(HeapType::indirectObject, iohRequiredSize, DefaultWalkerType::INDIRECTDATASTARTADDRESS_ALIGN_SIZE)->getSpace(iohRequiredSize);
+        } else {
+            ptr = container.getHeapSpaceAllowGrow(HeapType::indirectObject, iohRequiredSize);
+        }
+        UNRECOVERABLE_IF(!(ptr));
+        offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast<uint64_t>(heapIndirect->getUsed() - sizeThreadData);
+
+        uint64_t implicitArgsGpuVA = 0u;
+        if (pImplicitArgs) {
+            implicitArgsGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heapIndirect->getUsed() - iohRequiredSize);
+            auto implicitArgsCrossThreadPtr = ptrOffset(const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(args.dispatchInterface->getCrossThreadData())), kernelDescriptor.payloadMappings.implicitArgs.implicitArgsBuffer);
+            *implicitArgsCrossThreadPtr = implicitArgsGpuVA;
+
+            ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, {}, rootDeviceEnvironment, nullptr);
+        }
+
+        memcpy_s(ptr, sizeCrossThreadData,
+                 args.dispatchInterface->getCrossThreadData(), sizeCrossThreadData);
+
+        if (args.isIndirect) {
+            auto crossThreadDataGpuVA = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData;
+            EncodeIndirectParams<Family>::encode(container, crossThreadDataGpuVA, args.dispatchInterface, implicitArgsGpuVA);
+        }
+
+        ptr = ptrOffset(ptr, sizeCrossThreadData);
+        memcpy_s(ptr, sizePerThreadDataForWholeGroup,
+                 args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
+    }
+
+    uint32_t numIDD = 0u;
+    void *iddPtr = getInterfaceDescriptor(container, args.dynamicStateHeap, numIDD);
+
+    auto slmSizeNew = args.dispatchInterface->getSlmTotalSize();
+    bool dirtyHeaps = container.isAnyHeapDirty();
+    bool flush = container.slmSizeRef() != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs;
+
+    if (flush) {
+        PipeControlArgs syncArgs;
+        syncArgs.dcFlushEnable = args.dcFlushEnable;
+        if (dirtyHeaps) {
+            syncArgs.hdcPipelineFlush = true;
+        }
+        MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
+
+        if (dirtyHeaps || args.requiresUncachedMocs) {
+            STATE_BASE_ADDRESS sba;
+            auto gmmHelper = container.getDevice()->getGmmHelper();
+            uint32_t statelessMocsIndex =
+                args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
+            auto l1CachePolicy = container.l1CachePolicyDataRef()->getL1CacheValue(false);
+            auto l1CachePolicyDebuggerActive = container.l1CachePolicyDataRef()->getL1CacheValue(true);
+            EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
+                &container,                  // container
+                sba,                         // sbaCmd
+                nullptr,                     // sbaProperties
+                statelessMocsIndex,          // statelessMocsIndex
+                l1CachePolicy,               // l1CachePolicy
+                l1CachePolicyDebuggerActive, // l1CachePolicyDebuggerActive
+                false,                       // multiOsContextCapable
+                args.isRcs,                  // isRcs
+                container.doubleSbaWaRef(),  // doubleSbaWa
+                false,                       // heaplessModeEnabled
+            };
+            EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
+            container.setDirtyStateForAllHeaps(false);
+            args.requiresUncachedMocs = false;
+        }
+
+        if (container.slmSizeRef() != slmSizeNew) {
+            EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
+            container.slmSizeRef() = slmSizeNew;
+        }
+    }
+
+    if (numIDD == 0 || flush) {
+        EncodeMediaInterfaceDescriptorLoad<Family>::encode(container, args.dynamicStateHeap);
+    }
+
+    cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
+    cmd.setIndirectDataLength(sizeThreadData);
+    cmd.setInterfaceDescriptorOffset(numIDD);
+
+    EncodeDispatchKernel<Family>::encodeThreadData(cmd,
+                                                   nullptr,
+                                                   threadGroupDims,
+                                                   args.dispatchInterface->getGroupSize(),
+                                                   kernelDescriptor.kernelAttributes.simdSize,
+                                                   kernelDescriptor.kernelAttributes.numLocalIdChannels,
+                                                   numThreadsPerThreadGroup,
+                                                   args.dispatchInterface->getThreadExecutionMask(),
+                                                   true,
+                                                   false,
+                                                   args.isIndirect,
+                                                   args.dispatchInterface->getRequiredWorkgroupOrder(),
+                                                   rootDeviceEnvironment);
+
+    cmd.setPredicateEnable(args.isPredicate);
+
+    auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
+    EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(idd, *args.device, hwInfo, threadGroupDims, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, numThreadsPerThreadGroup, cmd);
+
+    EncodeWalkerArgs walkerArgs{
+        .kernelExecutionType = KernelExecutionType::defaultType,
+        .requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
+        .localRegionSize = args.localRegionSize,
+        .maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
+        .requiredSystemFence = args.requiresSystemMemoryFence(),
+        .hasSample = false};
+
+    EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
+    EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
+    EncodeDispatchKernel<Family>::template encodeComputeDispatchAllWalker<WalkerType, INTERFACE_DESCRIPTOR_DATA>(cmd, nullptr, rootDeviceEnvironment, walkerArgs);
+
+    memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
+
+    if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
+        void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false));
+        args.additionalCommands->push_back(commandBuffer);
+
+        EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
+    }
+
+    auto buffer = listCmdBufferStream->getSpaceForCmd<DefaultWalkerType>();
+    *buffer = cmd;
+
+    {
+        auto mediaStateFlush = listCmdBufferStream->getSpaceForCmd<MEDIA_STATE_FLUSH>();
+        *mediaStateFlush = Family::cmdInitMediaStateFlush;
+    }
+
+    args.partitionCount = 1;
+
+    if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
+        void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false));
+        args.additionalCommands->push_back(commandBuffer);
+
+        EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
+    }
+}
+
+template <typename Family>
+void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &container, IndirectHeap *childDsh) {
+    using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
+    using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
+    void *heapBase = nullptr;
+    if (childDsh != nullptr) {
+        heapBase = childDsh->getCpuBase();
+    } else {
+        heapBase = container.getIndirectHeap(HeapType::dynamicState)->getCpuBase();
+    }
+
+    auto mediaStateFlush = container.getCommandStream()->getSpaceForCmd<MEDIA_STATE_FLUSH>();
+    *mediaStateFlush = Family::cmdInitMediaStateFlush;
+
+    auto iddOffset = static_cast<uint32_t>(ptrDiff(container.getIddBlock(), heapBase));
+
+    MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad;
+    cmd.setInterfaceDescriptorDataStartAddress(iddOffset);
+    cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock());
+
+    auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
+    *(decltype(cmd) *)buffer = cmd;
+}
+
+template <typename Family>
+inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
+                                                                              const size_t *lws,
+                                                                              std::array<uint8_t, 3> walkOrder,
+                                                                              bool requireInputWalkOrder,
+                                                                              uint32_t &requiredWalkOrder,
+                                                                              uint32_t simd) {
+    requiredWalkOrder = 0u;
+    return true;
+}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
+                                                    const uint32_t *startWorkGroup,
+                                                    const uint32_t *numWorkGroups,
+                                                    const uint32_t *workGroupSizes,
+                                                    uint32_t simd,
+                                                    uint32_t localIdDimensions,
+                                                    uint32_t threadsPerThreadGroup,
+                                                    uint32_t threadExecutionMask,
+                                                    bool localIdsGenerationByRuntime,
+                                                    bool inlineDataProgrammingRequired,
+                                                    bool isIndirect,
+                                                    uint32_t requiredWorkGroupOrder,
+                                                    const RootDeviceEnvironment &rootDeviceEnvironment) {
+
+    if (isIndirect) {
+        walkerCmd.setIndirectParameterEnable(true);
+    } else {
+        walkerCmd.setThreadGroupIdXDimension(static_cast<uint32_t>(numWorkGroups[0]));
+        walkerCmd.setThreadGroupIdYDimension(static_cast<uint32_t>(numWorkGroups[1]));
+        walkerCmd.setThreadGroupIdZDimension(static_cast<uint32_t>(numWorkGroups[2]));
+    }
+
+    if (startWorkGroup) {
+        walkerCmd.setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroup[0]));
+        walkerCmd.setThreadGroupIdStartingY(static_cast<uint32_t>(startWorkGroup[1]));
+        walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
+    }
+
+    walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
+
+    auto localWorkSize = static_cast<uint32_t>(workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]);
+    if (threadsPerThreadGroup == 0) {
+        threadsPerThreadGroup = getThreadsPerWG(simd, localWorkSize);
+    }
+    walkerCmd.setThreadWidthCounterMaximum(threadsPerThreadGroup);
+
+    uint64_t executionMask = threadExecutionMask;
+    if (executionMask == 0) {
+        auto remainderSimdLanes = localWorkSize & (simd - 1);
+        executionMask = maxNBitValue(remainderSimdLanes);
+        if (!executionMask)
+            executionMask = ~executionMask;
+    }
+
+    constexpr uint32_t maxDword = std::numeric_limits<uint32_t>::max();
+    walkerCmd.setRightExecutionMask(static_cast<uint32_t>(executionMask));
+    walkerCmd.setBottomExecutionMask(maxDword);
+}
+
+template <typename Family>
+template <typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType &interfaceDescriptor,
+                                                        const KernelDescriptor &kernelDescriptor,
+                                                        const HardwareInfo &hwInfo) {
+    interfaceDescriptor.setBarrierEnable(kernelDescriptor.kernelAttributes.barrierCount);
+}
+
+template <typename Family>
+template <typename WalkerType>
+inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
+
+template <typename Family>
+template <typename WalkerType>
+inline void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
+
+template <typename Family>
+template <typename WalkerType, typename InterfaceDescriptorType>
+inline void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {}
+
+template <typename Family>
+template <typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
+
+template <typename Family>
+inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
+    return true;
+}
+
+template <typename Family>
+void EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
+    sbaAddress.indirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress();
+    sbaAddress.bindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress();
+    sbaAddress.dynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress();
+    sbaAddress.generalStateBaseAddress = sbaCmd.getGeneralStateBaseAddress();
+    sbaAddress.instructionBaseAddress = sbaCmd.getInstructionBaseAddress();
+    sbaAddress.surfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress();
+}
+
+template <typename Family>
+void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &args) {
+    auto &device = *args.container->getDevice();
+
+    if (args.container->isAnyHeapDirty()) {
+        EncodeWA<Family>::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, true, device.getRootDeviceEnvironment(), args.isRcs);
+    }
+
+    auto gmmHelper = device.getGmmHelper();
+
+    auto dsh = args.container->isHeapDirty(HeapType::dynamicState) ? args.container->getIndirectHeap(HeapType::dynamicState) : nullptr;
+    auto ioh = args.container->isHeapDirty(HeapType::indirectObject) ? args.container->getIndirectHeap(HeapType::indirectObject) : nullptr;
+    auto ssh = args.container->isHeapDirty(HeapType::surfaceState) ? args.container->getIndirectHeap(HeapType::surfaceState) : nullptr;
+    auto isDebuggerActive = device.getDebugger() != nullptr;
+    uint64_t globalHeapsBase = 0;
+    uint64_t bindlessSurfStateBase = 0;
+    bool useGlobalSshAndDsh = false;
+
+    if (device.getBindlessHeapsHelper()) {
+        bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
+        globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
+        useGlobalSshAndDsh = true;
+    }
+
+    StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
+        0,                                                  // generalStateBaseAddress
+        args.container->getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress
+        args.container->getInstructionHeapBaseAddress(),    // instructionHeapBaseAddress
+        globalHeapsBase,                                    // globalHeapsBaseAddress
+        0,                                                  // surfaceStateBaseAddress
+        bindlessSurfStateBase,                              // bindlessSurfaceStateBaseAddress
+        &args.sbaCmd,                                       // stateBaseAddressCmd
+        args.sbaProperties,                                 // sbaProperties
+        dsh,                                                // dsh
+        ioh,                                                // ioh
+        ssh,                                                // ssh
+        gmmHelper,                                          // gmmHelper
+        args.statelessMocsIndex,                            // statelessMocsIndex
+        args.l1CachePolicy,                                 // l1CachePolicy
+        args.l1CachePolicyDebuggerActive,                   // l1CachePolicyDebuggerActive
+        NEO::MemoryCompressionState::notApplicable,         // memoryCompressionState
+        false,                                              // setInstructionStateBaseAddress
+        false,                                              // setGeneralStateBaseAddress
+        useGlobalSshAndDsh,                                 // useGlobalHeapsBaseAddress
+        false,                                              // isMultiOsContextCapable
+        false,                                              // areMultipleSubDevicesInContext
+        false,                                              // overrideSurfaceStateBaseAddress
+        isDebuggerActive,                                   // isDebuggerActive
+        args.doubleSbaWa,                                   // doubleSbaWa
+        args.heaplessModeEnabled                            // heaplessModeEnabled
+    };
+
+    StateBaseAddressHelper<Family>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs,
+                                                                             *args.container->getCommandStream());
+
+    EncodeWA<Family>::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, false, device.getRootDeviceEnvironment(), args.isRcs);
+}
+
+template <typename Family>
+size_t EncodeStateBaseAddress<Family>::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs) {
+    return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA<Family>::getAdditionalPipelineSelectSize(device, isRcs);
+}
+
+template <typename GfxFamily>
+void EncodeMiFlushDW<GfxFamily>::adjust(MI_FLUSH_DW *miFlushDwCmd, const ProductHelper &productHelper) {}
+
+template <typename GfxFamily>
+inline void EncodeWA<GfxFamily>::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args,
+                                                                               const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs) {
+    MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStream, args);
+}
+
+template <typename GfxFamily>
+inline void EncodeWA<GfxFamily>::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane) {
+}
+
+template <typename Family>
+void EncodeSurfaceState<Family>::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) {
+    surfaceState->setCoherencyType(coherencyType);
+}
+
+template <typename Family>
+void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
+                                                     uint64_t compareAddress,
+                                                     uint64_t compareData,
+                                                     COMPARE_OPERATION compareMode,
+                                                     bool registerPollMode,
+                                                     bool waitMode,
+                                                     bool useQwordData,
+                                                     bool indirect,
+                                                     bool switchOnUnsuccessful) {
+    constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
+    UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
+    UNRECOVERABLE_IF(indirect);
+
+    MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
+    localCmd.setCompareOperation(compareMode);
+    localCmd.setSemaphoreDataDword(static_cast<uint32_t>(compareData));
+    localCmd.setSemaphoreGraphicsAddress(compareAddress);
+    localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE);
+
+    *cmd = localCmd;
+}
+
+template <typename GfxFamily>
+void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, uint64_t backBuffer) {
+}
+
+template <typename Family>
+inline void EncodeStoreMemory<Family>::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer,
+                                                           uint64_t gpuAddress,
+                                                           uint32_t dataDword0,
+                                                           uint32_t dataDword1,
+                                                           bool storeQword,
+                                                           bool workloadPartitionOffset) {
+    MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm;
+    storeDataImmediate.setAddress(gpuAddress);
+    storeDataImmediate.setStoreQword(storeQword);
+    storeDataImmediate.setDataDword0(dataDword0);
+    if (storeQword) {
+        storeDataImmediate.setDataDword1(dataDword1);
+        storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD);
+    } else {
+        storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
+    }
+    EncodeStoreMemory<Family>::encodeForceCompletionCheck(storeDataImmediate);
+
+    *cmdBuffer = storeDataImmediate;
+}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
+
+template <typename Family>
+size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {
+    return iddCount * sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA);
+}
+
+template <typename Family>
+inline size_t EncodeDispatchKernel<Family>::getInlineDataOffset(EncodeDispatchKernelArgs &args) {
+    return 0;
+}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
+}
+
+template <typename Family>
+uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
+    if (slmSize == 0u) {
+        return 0u;
+    }
+    slmSize = std::max(slmSize, 1024u);
+    slmSize = Math::nextPowerOfTwo(slmSize);
+    UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
+    return slmSize;
+}
+
+template <typename Family>
+uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
+    auto value = std::max(slmSize, 1024u);
+    value = Math::nextPowerOfTwo(value);
+    value = Math::getMinLsbSet(value);
+    value = value - 9;
+    DEBUG_BREAK_IF(value > 7);
+    return value * !!slmSize;
+}
+
+template <typename Family>
+bool EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) {
+    return cooperativeKernel;
+}
+
+template <typename Family>
+size_t EncodeStates<Family>::getSshHeapSize() {
+    return 64 * MemoryConstants::kiloByte;
+}
+
+template <typename Family>
+void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t appendCounterValue) {
+    UNRECOVERABLE_IF(true);
+}
+
+template <typename Family>
+template <typename WalkerType, typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor) {
+}
+
+template <typename Family>
+template <typename WalkerType, typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::encodeThreadGroupDispatch(InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo,
+                                                             const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t grfCount, const uint32_t threadsPerThreadGroup,
+                                                             WalkerType &walkerCmd) {
+}
+
+template <typename Family>
+size_t EncodeDispatchKernel<Family>::getScratchPtrOffsetOfImplicitArgs() {
+    return 0;
+}
+
+template <typename Family>
+void EncodeSurfaceState<Family>::setPitchForScratch(R_SURFACE_STATE *surfaceState, uint32_t pitch, const ProductHelper &productHelper) {
+    surfaceState->setSurfacePitch(pitch);
+}
+
+template <typename Family>
+uint32_t EncodeSurfaceState<Family>::getPitchForScratchInBytes(R_SURFACE_STATE *surfaceState, const ProductHelper &productHelper) {
+    return surfaceState->getSurfacePitch();
+}
+
+template <typename Family>
+void EncodeSemaphore<Family>::appendSemaphoreCommand(MI_SEMAPHORE_WAIT &cmd, uint64_t compareData, bool indirect, bool useQwordData, bool switchOnUnsuccessful) {
+    constexpr uint64_t upper32b = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) << 32;
+    UNRECOVERABLE_IF(useQwordData || (compareData & upper32b));
+}
+
+template <typename Family>
+template <bool isHeapless>
+void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr) {
+}
+
+template <typename Family>
+template <typename InterfaceDescriptorType>
+void EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy) {
+}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::setWalkerRegionSettings(WalkerType &walkerCmd, const NEO::Device &device, uint32_t partitionCount, uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder) {}
+
+template <typename Family>
+template <typename WalkerType>
+void EncodeDispatchKernel<Family>::adjustTimestampPacket(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {}
+
 template <>
 size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device, bool isRcs) {
    size_t size = 0;