mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
refactor: unify programming of preferred slm size 3/n
- add shared implementation to encode preferred slm size - add pvc release helper preferred slm array - drop pvc preproduction steppings values for preferred slm size - remove obsolete product helper method Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3685852ce0
commit
d6016e1b91
@@ -185,64 +185,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||
|
||||
const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
switch (slmPolicy) {
|
||||
case SlmPolicy::slmPolicyLargeData:
|
||||
slmSize = workgroupSlmSize;
|
||||
break;
|
||||
case SlmPolicy::slmPolicyLargeSlm:
|
||||
default:
|
||||
slmSize = workgroupSlmSize * workGroupCountPerDss;
|
||||
break;
|
||||
}
|
||||
|
||||
struct SizeToPreferredSlmValue {
|
||||
uint32_t upperLimit;
|
||||
PREFERRED_SLM_ALLOCATION_SIZE valueToProgram;
|
||||
};
|
||||
const std::array<SizeToPreferredSlmValue, 6> ranges = {{
|
||||
// upper limit, retVal
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||
}};
|
||||
|
||||
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K;
|
||||
for (auto &range : ranges) {
|
||||
if (slmSize <= range.upperLimit) {
|
||||
programmableIdPreferredSlmSize = range.valueToProgram;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
|
||||
if ((slmSize == 0) && (productHelper.isAdjustProgrammableIdPreferredSlmSizeRequired(hwInfo))) {
|
||||
programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K;
|
||||
}
|
||||
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize);
|
||||
|
||||
if (debugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
||||
auto toProgram =
|
||||
static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(debugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
|
||||
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
|
||||
|
||||
@@ -155,6 +155,11 @@ struct XeHpcCoreFamily : public XeHpcCore {
|
||||
static constexpr bool isHeaplessMode() {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static constexpr bool isInterfaceDescriptorHeaplessMode() {
|
||||
return false;
|
||||
}
|
||||
using WalkerVariant = std::variant<COMPUTE_WALKER *>;
|
||||
};
|
||||
|
||||
|
||||
@@ -89,11 +89,6 @@ bool ProductHelperHw<gfxProduct>::isGlobalFenceInCommandStreamRequired(const Har
|
||||
return !PVC::isXlA0(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::isAdjustProgrammableIdPreferredSlmSizeRequired(const HardwareInfo &hwInfo) const {
|
||||
return PVC::isXlA0(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const {
|
||||
return getSteppingFromHwRevId(hwInfo) >= REVISION_B;
|
||||
|
||||
Reference in New Issue
Block a user