mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
refactor: unify programming of preferred slm size 5/n
- remove xe2 hpg encode preferred slm size - add xe2 release helper preferred slm array - add dedicated method to calculate thread count per sub slice Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
946e421f77
commit
46a63d3e0e
@@ -132,6 +132,8 @@ struct EncodeDispatchKernel {
|
||||
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
|
||||
static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
||||
|
||||
|
||||
@@ -10,4 +10,10 @@ template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -15,4 +15,9 @@ size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1066,7 +1066,7 @@ template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t threadsPerDssCount = EncodeDispatchKernel<Family>::getThreadCountPerSubslice(hwInfo);
|
||||
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
Reference in New Issue
Block a user