refactor: Add extra parameters to computeSlmValues function

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2025-02-20 12:53:27 +00:00
committed by Compute-Runtime-Automation
parent b7d21b135c
commit 794b21a3fa
17 changed files with 53 additions and 36 deletions

View File

@@ -29,6 +29,7 @@ class GmmHelper;
class IndirectHeap;
class InOrderExecInfo;
class ProductHelper;
class ReleaseHelper;
struct DeviceInfo;
struct DispatchKernelEncoderI;
@@ -229,7 +230,7 @@ struct EncodeDispatchKernel {
static void forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd);
static uint32_t alignSlmSize(uint32_t slmSize);
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize);
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless);
static bool singleTileExecImplicitScalingRequired(bool cooperativeKernel);

View File

@@ -115,7 +115,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), releaseHelper, heaplessModeEnabled);
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
@@ -953,7 +954,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
if (slmSize == 0u) {

View File

@@ -104,7 +104,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor,
hwInfo);
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), nullptr, false);
idd.setSharedLocalMemorySize(slmSize);
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
@@ -631,7 +631,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
auto value = std::max(slmSize, 1024u);
value = Math::nextPowerOfTwo(value);
value = Math::getMinLsbSet(value);

View File

@@ -45,6 +45,7 @@ struct RootDeviceEnvironment;
struct PipeControlArgs;
struct KernelDescriptor;
class ProductHelper;
class ReleaseHelper;
class GfxCoreHelper;
class AILConfiguration;
@@ -101,7 +102,7 @@ class GfxCoreHelper {
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const = 0;
virtual uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) const = 0;
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const = 0;
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const = 0;
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
virtual uint32_t getMinimalSIMDSize() const = 0;
@@ -313,7 +314,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
uint32_t alignSlmSize(uint32_t slmSize) const override;
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const override;
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const override;
static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo);

View File

@@ -406,8 +406,8 @@ uint32_t GfxCoreHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) const {
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
return EncodeDispatchKernel<GfxFamily>::computeSlmValues(hwInfo, slmSize);
uint32_t GfxCoreHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const {
return EncodeDispatchKernel<GfxFamily>::computeSlmValues(hwInfo, slmSize, releaseHelper, isHeapless);
}
template <typename GfxFamily>

View File

@@ -63,6 +63,7 @@ class ReleaseHelper {
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
virtual bool getFtrXe2Compression() const = 0;
virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0;
protected:
ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {}
@@ -105,6 +106,7 @@ class ReleaseHelperHw : public ReleaseHelper {
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
bool isNumRtStacksPerDssFixedValue() const override;
bool getFtrXe2Compression() const override;
uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override;
protected:
ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {}

View File

@@ -162,4 +162,10 @@ template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::getFtrXe2Compression() const {
return true;
}
template <ReleaseType releaseType>
uint32_t ReleaseHelperHw<releaseType>::computeSlmValues(uint32_t slmSize, bool isHeapless) const {
return 0u;
}
} // namespace NEO

View File

@@ -144,7 +144,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
}
template <>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
auto slmValue = std::max(slmSize, 1024u);
slmValue = Math::nextPowerOfTwo(slmValue);
slmValue = Math::getMinLsbSet(slmValue);

View File

@@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper {
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};

View File

@@ -263,7 +263,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterT
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
uint32_t expectedValue = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize));
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize, nullptr, false));
EXPECT_EQ(expectedValue, interfaceDescriptorData->getSharedLocalMemorySize());
}

View File

@@ -10,11 +10,13 @@
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/release_helper/release_helper.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -59,9 +61,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterTha
auto cmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &idd = cmd->getInterfaceDescriptor();
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
auto releaseHelper = ReleaseHelper::create(pDevice->getHardwareInfo().ipVersion);
bool isHeapless = pDevice->getCompilerProductHelper().isHeaplessModeEnabled();
uint32_t expectedValue = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize));
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize, releaseHelper.get(), isHeapless));
EXPECT_EQ(expectedValue, idd.getSharedLocalMemorySize());
}

View File

@@ -1192,28 +1192,28 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, GfxCoreHelperTest, GivenVariousValuesWhenAlignSlm
HWCMDTEST_F(IGFX_GEN12LP_CORE, GfxCoreHelperTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
auto hwInfo = *defaultHwInfo;
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(0u, gfxCoreHelper.computeSlmValues(hwInfo, 0));
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1));
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1024));
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 1025));
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 2048));
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 2049));
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 4096));
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 4097));
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 8192));
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 8193));
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 16384));
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 16385));
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 32768));
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 32769));
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 65536));
EXPECT_EQ(0u, gfxCoreHelper.computeSlmValues(hwInfo, 0, nullptr, false));
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1, nullptr, false));
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1024, nullptr, false));
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 1025, nullptr, false));
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 2048, nullptr, false));
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 2049, nullptr, false));
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 4096, nullptr, false));
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 4097, nullptr, false));
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 8192, nullptr, false));
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 8193, nullptr, false));
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 16384, nullptr, false));
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 16385, nullptr, false));
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 32768, nullptr, false));
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 32769, nullptr, false));
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 65536, nullptr, false));
}
HWTEST_F(GfxCoreHelperTest, GivenZeroSlmSizeWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
using SHARED_LOCAL_MEMORY_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto hwInfo = *defaultHwInfo;
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
auto receivedSlmSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(gfxCoreHelper.computeSlmValues(hwInfo, 0));
auto receivedSlmSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(gfxCoreHelper.computeSlmValues(hwInfo, 0, nullptr, false));
EXPECT_EQ(SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_SLM_ENCODES_0K, receivedSlmSize);
}

View File

@@ -84,8 +84,8 @@ XE2_HPG_CORETEST_F(Xe2HpgCoreDeviceCaps, givenSlmSizeWhenEncodingThenReturnCorre
{11, 128 * MemoryConstants::kiloByte}};
for (const auto &testInput : computeSlmValuesXe2AndLaterTestsInput) {
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
}
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1), std::exception);
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1, nullptr, false), std::exception);
}

View File

@@ -96,8 +96,8 @@ XE3_CORETEST_F(Xe3CoreDeviceCaps, givenSlmSizeWhenEncodingThenReturnCorrectValue
{11, 128 * MemoryConstants::kiloByte}};
for (const auto &testInput : computeSlmValuesXe3AndLaterTestsInput) {
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
}
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1), std::exception);
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1, nullptr, false), std::exception);
}

View File

@@ -51,10 +51,10 @@ XE_HPC_CORETEST_F(GfxCoreHelperXeHpcCoreTest, givenSlmSizeWhenEncodingThenReturn
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
for (auto &testInput : computeSlmValuesXeHpcTestsInput) {
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
}
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 129 * MemoryConstants::kiloByte), std::exception);
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 129 * MemoryConstants::kiloByte, nullptr, false), std::exception);
}
XE_HPC_CORETEST_F(GfxCoreHelperXeHpcCoreTest, WhenGettingIsCpuImageTransferPreferredThenTrueIsReturned) {

View File

@@ -320,7 +320,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, GivenVariousValuesWhenComputeSlmSi
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
auto hardwareInfo = *defaultHwInfo;
for (auto &testInput : computeSlmValuesXeHpgTestsInput) {
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hardwareInfo, testInput.slmSize));
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hardwareInfo, testInput.slmSize, nullptr, false));
}
}