From 0f275ab893b7beff339b68ccc12261c43f09ab58 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 21 Dec 2023 13:16:43 +0000 Subject: [PATCH] refactor: improve InOrder Walker patching Signed-off-by: Dunajski, Bartosz --- .../command_encoder_bdw_and_later.inl | 6 ++++++ .../command_encoder_xehp_and_later.inl | 7 +++++++ shared/source/gen11/command_encoder_gen11.cpp | 2 ++ shared/source/gen12lp/command_encoder_gen12lp.cpp | 2 ++ shared/source/gen8/command_encoder_gen8.cpp | 2 ++ shared/source/gen9/command_encoder_gen9.cpp | 2 ++ shared/source/helpers/in_order_cmd_helpers.h | 10 +--------- .../source/xe_hpc_core/command_encoder_xe_hpc_core.cpp | 2 ++ .../source/xe_hpg_core/command_encoder_xe_hpg_core.cpp | 2 ++ 9 files changed, 26 insertions(+), 9 deletions(-) diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 428cfbad0c..a5517da76c 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -16,6 +16,7 @@ #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/gfx_core_helper.h" +#include "shared/source/helpers/in_order_cmd_helpers.h" #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/pipeline_select_args.h" @@ -640,4 +641,9 @@ void EncodeBatchBufferStartOrEnd::appendBatchBufferStart(MI_BATCH_BUFFER cmd.setPredicationEnable(predicate); } +template +void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue) { + UNRECOVERABLE_IF(true); +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index bee5607ef0..a1219ba956 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -918,4 +918,11 @@ size_t EncodeStates::getSshHeapSize() { return 2 * MemoryConstants::megaByte; } +template +void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue) { + auto walkerCmd = reinterpret_cast(cmd1); + auto &postSync = walkerCmd->getPostSync(); + postSync.setImmediateData(baseCounterValue + appendCounterValue); +} + } // namespace NEO diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 9be45908cd..7fba93f4b7 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -85,6 +85,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO; diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 76c423db15..d038f022cd 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -125,6 +125,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO; diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index 879feb3387..b91d48ec63 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -69,6 +69,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO; diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index 4f56bb769c..df25cb2d9c 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -70,6 +70,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO; diff --git a/shared/source/helpers/in_order_cmd_helpers.h b/shared/source/helpers/in_order_cmd_helpers.h index 49def072f9..b8ce851358 100644 --- a/shared/source/helpers/in_order_cmd_helpers.h +++ b/shared/source/helpers/in_order_cmd_helpers.h @@ -137,15 +137,7 @@ struct PatchCmd { semaphoreCmd->setSemaphoreDataDword(static_cast(baseCounterValue + appendCounterValue)); } - void patchComputeWalker(uint64_t appendCounterValue) { - if constexpr (GfxFamily::walkerPostSyncSupport) { - auto walkerCmd = reinterpret_cast(cmd1); - auto &postSync = walkerCmd->getPostSync(); - postSync.setImmediateData(baseCounterValue + appendCounterValue); - } else { - UNRECOVERABLE_IF(true); - } - } + void patchComputeWalker(uint64_t appendCounterValue); void patchLri64b(uint64_t appendCounterValue) { if (isExternalDependency()) { diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index fad9f1fee9..c0d92072c5 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -379,6 +379,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO; diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index eaacbec6e8..e53cb481c6 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -232,6 +232,8 @@ template void EncodeDispatchKernel::encode(Co template void EncodeDispatchKernel::encodeThreadData(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); template void EncodeDispatchKernel::adjustWalkOrder(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); +template void InOrderPatchCommandHelpers::PatchCmd::patchComputeWalker(uint64_t appendCounterValue); + template struct EncodeStates; template struct EncodeMath; template struct EncodeMathMMIO;