mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
refactor: unify programming of preferred slm size 5/n
- remove xe2 hpg encode preferred slm size - add xe2 release helper preferred slm array - add dedicated method to calculate thread count per sub slice Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
946e421f77
commit
46a63d3e0e
@@ -132,6 +132,8 @@ struct EncodeDispatchKernel {
|
||||
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
|
||||
static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
||||
|
||||
|
||||
@@ -10,4 +10,10 @@ template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -15,4 +15,9 @@ size_t EncodeDispatchKernel<Family>::getDefaultIOHAlignment() {
|
||||
return alignment;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::getThreadCountPerSubslice(const HardwareInfo &hwInfo) {
|
||||
return hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1066,7 +1066,7 @@ template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t threadsPerDssCount = EncodeDispatchKernel<Family>::getThreadCountPerSubslice(hwInfo);
|
||||
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/release_helper/release_helper.h"
|
||||
#include "shared/source/release_helper/release_helper_base.inl"
|
||||
#include "shared/source/xe2_hpg_core/hw_cmds_base.h"
|
||||
|
||||
#include "release_definitions.h"
|
||||
|
||||
@@ -32,6 +33,22 @@ template <>
|
||||
bool ReleaseHelperHw<release>::isBindlessAddressingDisabled() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
const SizeToPreferredSlmValueArray &ReleaseHelperHw<release>::getSizeToPreferredSlmValue(bool isHeapless) const {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename Xe2HpgCoreFamily::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
static const SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {{
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||
{128 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||
{std::numeric_limits<uint32_t>::max(), PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_160K},
|
||||
}};
|
||||
return sizeToPreferredSlmValue;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/release_helper/release_helper_common_xe2_hpg.inl"
|
||||
|
||||
@@ -27,12 +27,6 @@ inline bool ReleaseHelperHw<release>::isAuxSurfaceModeOverrideRequired() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
int ReleaseHelperHw<release>::getProductMaxPreferredSlmSize(int preferredEnumValue) const {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename Xe2HpgCoreFamily::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
return std::min(preferredEnumValue, static_cast<int>(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K));
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ReleaseHelperHw<release>::isLocalOnlyAllowed() const {
|
||||
return false;
|
||||
@@ -43,6 +37,20 @@ bool ReleaseHelperHw<release>::isBindlessAddressingDisabled() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
const SizeToPreferredSlmValueArray &ReleaseHelperHw<release>::getSizeToPreferredSlmValue(bool isHeapless) const {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename Xe2HpgCoreFamily::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
static const SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {{
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||
{std::numeric_limits<uint32_t>::max(), PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||
}};
|
||||
return sizeToPreferredSlmValue;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/release_helper/release_helper_common_xe2_hpg.inl"
|
||||
|
||||
@@ -253,64 +253,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
|
||||
walkerCmd.setComputeDispatchAllWalkerEnable(computeDispatchAllWalkerEnable);
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount /
|
||||
hwInfo.gtSystemInfo.SubSliceCount;
|
||||
uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
switch (slmPolicy) {
|
||||
case SlmPolicy::slmPolicyLargeData:
|
||||
slmSize = slmTotalSize;
|
||||
break;
|
||||
case SlmPolicy::slmPolicyLargeSlm:
|
||||
default:
|
||||
slmSize = slmTotalSize * workGroupCountPerDss;
|
||||
break;
|
||||
}
|
||||
|
||||
struct SizeToPreferredSlmValue {
|
||||
uint32_t upperLimit;
|
||||
PREFERRED_SLM_ALLOCATION_SIZE valueToProgram;
|
||||
};
|
||||
const std::array<SizeToPreferredSlmValue, 10> ranges = {{
|
||||
// upper limit, retVal
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K},
|
||||
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K},
|
||||
{128 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K},
|
||||
}};
|
||||
|
||||
auto programmableIdPreferredSlmSize = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_160K;
|
||||
|
||||
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
programmableIdPreferredSlmSize = static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(
|
||||
releaseHelper->getProductMaxPreferredSlmSize(programmableIdPreferredSlmSize));
|
||||
|
||||
for (auto &range : ranges) {
|
||||
if (slmSize <= range.upperLimit) {
|
||||
programmableIdPreferredSlmSize = range.valueToProgram;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSize(programmableIdPreferredSlmSize);
|
||||
|
||||
if (debugManager.flags.OverridePreferredSlmAllocationSizePerDss.get() != -1) {
|
||||
auto toProgram =
|
||||
static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(debugManager.flags.OverridePreferredSlmAllocationSizePerDss.get());
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSize(toProgram);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount,
|
||||
|
||||
@@ -1644,3 +1644,15 @@ HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenForcingDifferent
|
||||
debugManager.flags.ForceIOHAlignment.set(expectedAlignemnt);
|
||||
EXPECT_EQ(NEO::EncodeDispatchKernel<FamilyType>::getDefaultIOHAlignment(), expectedAlignemnt);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenGettingThreadCountPerSubsliceThenUseDualSubSliceAsDenominator, IsAtMostXeHpcCore) {
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto expectedValue = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
EXPECT_EQ(expectedValue, NEO::EncodeDispatchKernel<FamilyType>::getThreadCountPerSubslice(hwInfo));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTest, givenEncodeDispatchKernelWhenGettingThreadCountPerSubsliceThenUseSubSliceAsDenominator, IsAtLeastXe2HpgCore) {
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto expectedValue = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.SubSliceCount;
|
||||
EXPECT_EQ(expectedValue, NEO::EncodeDispatchKernel<FamilyType>::getThreadCountPerSubslice(hwInfo));
|
||||
}
|
||||
|
||||
@@ -75,6 +75,34 @@ TEST_F(ReleaseHelper2001Tests, whenIsLocalOnlyAllowedCalledThenFalseReturned) {
|
||||
whenIsLocalOnlyAllowedCalledThenFalseReturned();
|
||||
}
|
||||
|
||||
TEST_F(ReleaseHelper2001Tests, whenGettingPreferredSlmSizeThenAllEntriesEmpty) {
|
||||
whenGettingPreferredSlmSizeThenAllEntriesEmpty();
|
||||
TEST_F(ReleaseHelper2001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) {
|
||||
for (auto &revision : getRevisions()) {
|
||||
ipVersion.revision = revision;
|
||||
releaseHelper = ReleaseHelper::create(ipVersion);
|
||||
ASSERT_NE(nullptr, releaseHelper);
|
||||
|
||||
constexpr uint32_t kB = 1024;
|
||||
|
||||
auto &preferredSlmValueArray = releaseHelper->getSizeToPreferredSlmValue(false);
|
||||
EXPECT_EQ(0u, preferredSlmValueArray[0].upperLimit);
|
||||
EXPECT_EQ(0u, preferredSlmValueArray[0].valueToProgram);
|
||||
|
||||
EXPECT_EQ(16 * kB, preferredSlmValueArray[1].upperLimit);
|
||||
EXPECT_EQ(1u, preferredSlmValueArray[1].valueToProgram);
|
||||
|
||||
EXPECT_EQ(32 * kB, preferredSlmValueArray[2].upperLimit);
|
||||
EXPECT_EQ(2u, preferredSlmValueArray[2].valueToProgram);
|
||||
|
||||
EXPECT_EQ(64 * kB, preferredSlmValueArray[3].upperLimit);
|
||||
EXPECT_EQ(3u, preferredSlmValueArray[3].valueToProgram);
|
||||
|
||||
EXPECT_EQ(96 * kB, preferredSlmValueArray[4].upperLimit);
|
||||
EXPECT_EQ(4u, preferredSlmValueArray[4].valueToProgram);
|
||||
|
||||
EXPECT_EQ(128 * kB, preferredSlmValueArray[5].upperLimit);
|
||||
EXPECT_EQ(5u, preferredSlmValueArray[5].valueToProgram);
|
||||
|
||||
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), preferredSlmValueArray[6].upperLimit);
|
||||
EXPECT_EQ(6u, preferredSlmValueArray[6].valueToProgram);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,28 +44,8 @@ TEST_F(ReleaseHelper2004Tests, whenGettingCapabilitiesThenCorrectPropertiesAreRe
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ReleaseHelper2004Tests, whenGettingMaxPreferredSlmSizeThenSizeSizeIsLimitedBy128K) {
|
||||
for (auto &revision : getRevisions()) {
|
||||
ipVersion.revision = revision;
|
||||
releaseHelper = ReleaseHelper::create(ipVersion);
|
||||
ASSERT_NE(nullptr, releaseHelper);
|
||||
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename Xe2HpgCoreFamily::INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
for (auto &preferredSlmSize : {PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0K,
|
||||
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K,
|
||||
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K,
|
||||
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K,
|
||||
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_96K,
|
||||
PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K}) {
|
||||
|
||||
auto maxPreferredSlmValue = releaseHelper->getProductMaxPreferredSlmSize(preferredSlmSize);
|
||||
EXPECT_EQ(maxPreferredSlmValue, preferredSlmSize);
|
||||
}
|
||||
auto preferredSlmSize128k = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_128K;
|
||||
auto preferredSlmSize160k = PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_160K;
|
||||
auto maxPreferredSlmValue = releaseHelper->getProductMaxPreferredSlmSize(preferredSlmSize160k);
|
||||
EXPECT_EQ(maxPreferredSlmValue, preferredSlmSize128k);
|
||||
}
|
||||
TEST_F(ReleaseHelper2004Tests, whenGettingMaxPreferredSlmSizeThenSizeIsNotModified) {
|
||||
whenGettingMaxPreferredSlmSizeThenSizeIsNotModified();
|
||||
}
|
||||
|
||||
TEST_F(ReleaseHelper2004Tests, whenShouldAdjustCalledThenTrueReturned) {
|
||||
@@ -96,6 +76,31 @@ TEST_F(ReleaseHelper2004Tests, whenIsLocalOnlyAllowedCalledThenFalseReturned) {
|
||||
whenIsLocalOnlyAllowedCalledThenFalseReturned();
|
||||
}
|
||||
|
||||
TEST_F(ReleaseHelper2004Tests, whenGettingPreferredSlmSizeThenAllEntriesEmpty) {
|
||||
whenGettingPreferredSlmSizeThenAllEntriesEmpty();
|
||||
TEST_F(ReleaseHelper2004Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) {
|
||||
for (auto &revision : getRevisions()) {
|
||||
ipVersion.revision = revision;
|
||||
releaseHelper = ReleaseHelper::create(ipVersion);
|
||||
ASSERT_NE(nullptr, releaseHelper);
|
||||
|
||||
constexpr uint32_t kB = 1024;
|
||||
|
||||
auto &preferredSlmValueArray = releaseHelper->getSizeToPreferredSlmValue(false);
|
||||
EXPECT_EQ(0u, preferredSlmValueArray[0].upperLimit);
|
||||
EXPECT_EQ(0u, preferredSlmValueArray[0].valueToProgram);
|
||||
|
||||
EXPECT_EQ(16 * kB, preferredSlmValueArray[1].upperLimit);
|
||||
EXPECT_EQ(1u, preferredSlmValueArray[1].valueToProgram);
|
||||
|
||||
EXPECT_EQ(32 * kB, preferredSlmValueArray[2].upperLimit);
|
||||
EXPECT_EQ(2u, preferredSlmValueArray[2].valueToProgram);
|
||||
|
||||
EXPECT_EQ(64 * kB, preferredSlmValueArray[3].upperLimit);
|
||||
EXPECT_EQ(3u, preferredSlmValueArray[3].valueToProgram);
|
||||
|
||||
EXPECT_EQ(96 * kB, preferredSlmValueArray[4].upperLimit);
|
||||
EXPECT_EQ(4u, preferredSlmValueArray[4].valueToProgram);
|
||||
|
||||
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), preferredSlmValueArray[5].upperLimit);
|
||||
EXPECT_EQ(5u, preferredSlmValueArray[5].valueToProgram);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user