refactor: unify programming of preferred slm size 5/n

- remove xe2 hpg encode preferred slm size - add xe2 release helper preferred slm array - add dedicated method to calculate thread count per sub slice Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2025-12-21 01:04:57 +08:00 · 2024-10-08 17:33:33 +00:00
parent 946e421f77
commit 46a63d3e0e
10 changed files with 117 additions and 92 deletions
--- a/shared/source/command_container/command_encoder.h
+++ b/shared/source/command_container/command_encoder.h
@@ -132,6 +132,8 @@ struct EncodeDispatchKernel {
    static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
                                      const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);

+    static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
+
    template <typename InterfaceDescriptorType>
    static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);

--- a/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl
+++ b/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl
@@ -10,4 +10,10 @@ template <typename Family>
 size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
    return 1;
 }
+
+template <typename Family>
+uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
+    return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
+}
+
 } // namespace NEO
--- a/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl
+++ b/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl
@@ -15,4 +15,9 @@ size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
    return alignment;
 }

+template <typename Family>
+uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
+    return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
+}
+
 } // namespace NEO
--- a/shared/source/command_container/command_encoder_xehp_and_later.inl
+++ b/shared/source/command_container/command_encoder_xehp_and_later.inl
@@ -1066,7 +1066,7 @@ template <typename InterfaceDescriptorType>
 void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
    using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
    auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
-    const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
+    const uint32_t threadsPerDssCount = EncodeDispatchKernel<Family>::getThreadCountPerSubslice(hwInfo);
    const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));

    uint32_t slmSize = 0u;