From 227ce579fe18e9cb48c80afbb284b74f89d10522 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 10 Oct 2024 18:22:42 +0000 Subject: [PATCH] fix: align total slm size for preferred size Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz --- shared/source/command_container/command_encoder.h | 1 + .../command_container/command_encoder_pre_xe2_hpg_core.inl | 5 +++++ .../command_encoder_xe2_hpg_core_and_later.inl | 5 +++++ .../command_container/command_encoder_xehp_and_later.inl | 2 ++ 4 files changed, 13 insertions(+) diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index c75582554f..c8ea787f60 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -133,6 +133,7 @@ struct EncodeDispatchKernel { const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo); + static uint32_t alignPreferredSlmSize(uint32_t slmSize); template static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy); diff --git a/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl b/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl index 5f24d7a877..136233a8fb 100644 --- a/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl +++ b/shared/source/command_container/command_encoder_pre_xe2_hpg_core.inl @@ -16,4 +16,9 @@ uint32_t EncodeDispatchKernel::getThreadCountPerSubslice(const HardwareI return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount; } +template +uint32_t EncodeDispatchKernel::alignPreferredSlmSize(uint32_t slmSize) { + return EncodeDispatchKernel::alignSlmSize(slmSize); +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl b/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl index 7570b4cb01..1f2b7f0772 100644 --- a/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl +++ b/shared/source/command_container/command_encoder_xe2_hpg_core_and_later.inl @@ -20,4 +20,9 @@ uint32_t EncodeDispatchKernel::getThreadCountPerSubslice(const HardwareI return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount; } +template +uint32_t EncodeDispatchKernel::alignPreferredSlmSize(uint32_t slmSize) { + return slmSize; +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index e5f583593b..56892e7171 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -1070,6 +1070,8 @@ void EncodeDispatchKernel::setupPreferredSlmSize(InterfaceDescriptorType const uint32_t threadsPerDssCount = EncodeDispatchKernel::getThreadCountPerSubslice(hwInfo); const uint32_t workGroupCountPerDss = static_cast(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup)); + slmTotalSize = EncodeDispatchKernel::alignPreferredSlmSize(slmTotalSize); + uint32_t slmSize = 0u; switch (slmPolicy) {