refactor: unify programming of preferred slm size 5/n

- remove xe2 hpg encode preferred slm size
- add xe2 release helper preferred slm array
- add dedicated method to calculate thread count per sub slice

Related-To: NEO-12639

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-10-08 17:33:33 +00:00
committed by Compute-Runtime-Automation
parent 946e421f77
commit 46a63d3e0e
10 changed files with 117 additions and 92 deletions

View File

@@ -132,6 +132,8 @@ struct EncodeDispatchKernel {
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
template <typename InterfaceDescriptorType>
static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);

View File

@@ -10,4 +10,10 @@ template <typename Family>
size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
return 1;
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
}
} // namespace NEO

View File

@@ -15,4 +15,9 @@ size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
return alignment;
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
}
} // namespace NEO

View File

@@ -1066,7 +1066,7 @@ template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
const uint32_t threadsPerDssCount = EncodeDispatchKernel<Family>::getThreadCountPerSubslice(hwInfo);
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
uint32_t slmSize = 0u;