From 307f45f5d261b5b33380bc6cc2e01ecb78728d7e Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Thu, 21 Dec 2023 12:10:25 +0000 Subject: [PATCH] refactor: add common command encoder enablers Related-To: NEO-7621 Signed-off-by: Kamil Kopryk --- .../source/command_container/CMakeLists.txt | 1 + .../command_encoder_enablers.inl | 47 +++++++++++++++++ shared/source/gen11/command_encoder_gen11.cpp | 49 ++++-------------- .../gen12lp/command_encoder_gen12lp.cpp | 49 ++++-------------- shared/source/gen8/command_encoder_gen8.cpp | 50 ++++--------------- shared/source/gen9/command_encoder_gen9.cpp | 49 ++++-------------- .../command_encoder_xe_hpc_core.cpp | 44 +++------------- .../command_encoder_xe_hpg_core.cpp | 47 +++-------------- 8 files changed, 97 insertions(+), 239 deletions(-) create mode 100644 shared/source/command_container/command_encoder_enablers.inl diff --git a/shared/source/command_container/CMakeLists.txt b/shared/source/command_container/CMakeLists.txt index d36a729e3c..98b849c872 100644 --- a/shared/source/command_container/CMakeLists.txt +++ b/shared/source/command_container/CMakeLists.txt @@ -11,6 +11,7 @@ set(NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.h ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_bdw_and_later.inl + ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_enablers.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tgllp_and_later.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_alu_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_bdw_and_later.inl diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl new file mode 100644 index 0000000000..71494d4249 --- /dev/null +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/command_container/command_encoder.h" + +template struct NEO::EncodeDispatchKernel; +template void NEO::EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); +template void NEO::EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); +template void NEO::EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); +template void NEO::EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); +template void NEO::EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); +template void NEO::EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); +template void NEO::EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); +template void NEO::EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); +template void NEO::EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); +template void NEO::EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void NEO::EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); + +template struct NEO::EncodeStates; +template struct NEO::EncodeMath; +template struct NEO::EncodeMathMMIO; +template struct NEO::EncodeIndirectParams; +template struct NEO::EncodeSetMMIO; +template struct NEO::EncodeMediaInterfaceDescriptorLoad; +template struct NEO::EncodeStateBaseAddress; +template struct NEO::EncodeStoreMMIO; +template struct NEO::EncodeSurfaceState; +template struct NEO::EncodeComputeMode; +template struct NEO::EncodeAtomic; +template struct NEO::EncodeSemaphore; +template struct NEO::EncodeBatchBufferStartOrEnd; +template struct NEO::EncodeMiFlushDW; +template struct NEO::EncodeMiPredicate; +template struct NEO::EncodeMemoryPrefetch; +template struct NEO::EncodeMiArbCheck; +template struct NEO::EncodeWA; +template struct NEO::EncodeEnableRayTracing; +template struct NEO::EncodeNoop; +template struct NEO::EncodeStoreMemory; +template struct NEO::EncodeMemoryFence; +template struct NEO::EnodeUserInterrupt; diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 7fba93f4b7..1743472a65 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -71,44 +71,13 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta } } -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeL3State; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeMemoryPrefetch; -template struct EncodeWA; -template struct EncodeMiArbCheck; -template struct EncodeComputeMode; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template struct EncodeL3State; + +template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); } // namespace NEO diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index d038f022cd..dcdd2e6d42 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -111,44 +111,13 @@ void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container container.getDevice()->getRootDeviceEnvironment()); } -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeL3State; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeWA; -template struct EncodeMemoryPrefetch; -template struct EncodeMiArbCheck; -template struct EncodeComputeMode; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template struct EncodeL3State; + +template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); } // namespace NEO diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index b91d48ec63..f0820e2229 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -55,45 +55,13 @@ template <> void EncodeBatchBufferStartOrEnd::appendBatchBufferStart(MI_BATCH_BUFFER_START &cmd, bool indirect, bool predicate) { } -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeL3State; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeMemoryPrefetch; -template struct EncodeWA; -template struct EncodeMiArbCheck; -template struct EncodeComputeMode; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; - +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template struct EncodeL3State; + +template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); } // namespace NEO diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index df25cb2d9c..9718ee11a0 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -56,44 +56,13 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta } } -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeL3State; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeMemoryPrefetch; -template struct EncodeWA; -template struct EncodeMiArbCheck; -template struct EncodeComputeMode; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template struct EncodeL3State; + +template void EncodeDispatchKernel::programBarrierEnable(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); } // namespace NEO diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index c0d92072c5..51d8405020 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -366,42 +366,10 @@ void EncodeDispatchKernel::adjustBindingTablePrefetch(INTERFACE_DESCRIPT } } -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeComputeMode; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeMemoryPrefetch; -template struct EncodeMiArbCheck; -template struct EncodeWA; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); } // namespace NEO diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index e53cb481c6..30b4349b2b 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -217,44 +217,11 @@ void EncodeBatchBufferStartOrEnd::appendBatchBufferStart(MI_BATCH_BUFFER cmd.setPredicationEnable(predicate); } -template void flushGpuCache(LinearStream *commandStream, const Range &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo); - -template struct EncodeDispatchKernel; -template void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); -template void EncodeDispatchKernel::adjustTimestampPacket(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); -template void EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf, Family::DefaultWalkerType &walkerCmd); -template void EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); -template void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); -template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); -template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); - -template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); - -template struct EncodeStates; -template struct EncodeMath; -template struct EncodeMathMMIO; -template struct EncodeIndirectParams; -template struct EncodeSetMMIO; -template struct EncodeMediaInterfaceDescriptorLoad; -template struct EncodeStateBaseAddress; -template struct EncodeStoreMMIO; -template struct EncodeSurfaceState; -template struct EncodeComputeMode; -template struct EncodeAtomic; -template struct EncodeSemaphore; -template struct EncodeBatchBufferStartOrEnd; -template struct EncodeMiFlushDW; -template struct EncodeMiPredicate; -template struct EncodeMemoryPrefetch; -template struct EncodeMiArbCheck; -template struct EncodeWA; -template struct EncodeEnableRayTracing; -template struct EncodeNoop; -template struct EncodeStoreMemory; -template struct EncodeMemoryFence; -template struct EnodeUserInterrupt; +} // namespace NEO + +#include "shared/source/command_container/command_encoder_enablers.inl" + +namespace NEO { +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); +template void flushGpuCache(LinearStream *commandStream, const Range &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo); } // namespace NEO