mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
refactor: Add extra parameters to computeSlmValues function
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b7d21b135c
commit
794b21a3fa
@@ -29,6 +29,7 @@ class GmmHelper;
|
||||
class IndirectHeap;
|
||||
class InOrderExecInfo;
|
||||
class ProductHelper;
|
||||
class ReleaseHelper;
|
||||
|
||||
struct DeviceInfo;
|
||||
struct DispatchKernelEncoderI;
|
||||
@@ -229,7 +230,7 @@ struct EncodeDispatchKernel {
|
||||
static void forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd);
|
||||
|
||||
static uint32_t alignSlmSize(uint32_t slmSize);
|
||||
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize);
|
||||
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless);
|
||||
|
||||
static bool singleTileExecImplicitScalingRequired(bool cooperativeKernel);
|
||||
|
||||
|
||||
@@ -115,7 +115,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
|
||||
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
|
||||
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
|
||||
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), releaseHelper, heaplessModeEnabled);
|
||||
|
||||
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
|
||||
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
|
||||
@@ -953,7 +954,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
|
||||
if (slmSize == 0u) {
|
||||
|
||||
@@ -104,7 +104,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
|
||||
kernelDescriptor,
|
||||
hwInfo);
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), nullptr, false);
|
||||
idd.setSharedLocalMemorySize(slmSize);
|
||||
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
@@ -631,7 +631,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
|
||||
auto value = std::max(slmSize, 1024u);
|
||||
value = Math::nextPowerOfTwo(value);
|
||||
value = Math::getMinLsbSet(value);
|
||||
|
||||
@@ -45,6 +45,7 @@ struct RootDeviceEnvironment;
|
||||
struct PipeControlArgs;
|
||||
struct KernelDescriptor;
|
||||
class ProductHelper;
|
||||
class ReleaseHelper;
|
||||
class GfxCoreHelper;
|
||||
class AILConfiguration;
|
||||
|
||||
@@ -101,7 +102,7 @@ class GfxCoreHelper {
|
||||
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const = 0;
|
||||
virtual uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) const = 0;
|
||||
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const = 0;
|
||||
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const = 0;
|
||||
|
||||
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
|
||||
virtual uint32_t getMinimalSIMDSize() const = 0;
|
||||
@@ -313,7 +314,7 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) const override;
|
||||
|
||||
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const override;
|
||||
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const override;
|
||||
|
||||
static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo);
|
||||
|
||||
|
||||
@@ -406,8 +406,8 @@ uint32_t GfxCoreHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) const {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
|
||||
return EncodeDispatchKernel<GfxFamily>::computeSlmValues(hwInfo, slmSize);
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) const {
|
||||
return EncodeDispatchKernel<GfxFamily>::computeSlmValues(hwInfo, slmSize, releaseHelper, isHeapless);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -63,6 +63,7 @@ class ReleaseHelper {
|
||||
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
|
||||
virtual bool isNumRtStacksPerDssFixedValue() const = 0;
|
||||
virtual bool getFtrXe2Compression() const = 0;
|
||||
virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0;
|
||||
|
||||
protected:
|
||||
ReleaseHelper(HardwareIpVersion hardwareIpVersion) : hardwareIpVersion(hardwareIpVersion) {}
|
||||
@@ -105,6 +106,7 @@ class ReleaseHelperHw : public ReleaseHelper {
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
|
||||
bool isNumRtStacksPerDssFixedValue() const override;
|
||||
bool getFtrXe2Compression() const override;
|
||||
uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override;
|
||||
|
||||
protected:
|
||||
ReleaseHelperHw(HardwareIpVersion hardwareIpVersion) : ReleaseHelper(hardwareIpVersion) {}
|
||||
|
||||
@@ -162,4 +162,10 @@ template <ReleaseType releaseType>
|
||||
bool ReleaseHelperHw<releaseType>::getFtrXe2Compression() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <ReleaseType releaseType>
|
||||
uint32_t ReleaseHelperHw<releaseType>::computeSlmValues(uint32_t slmSize, bool isHeapless) const {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -144,7 +144,7 @@ uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize, ReleaseHelper *releaseHelper, bool isHeapless) {
|
||||
auto slmValue = std::max(slmSize, 1024u);
|
||||
slmValue = Math::nextPowerOfTwo(slmValue);
|
||||
slmValue = Math::getMinLsbSet(slmValue);
|
||||
|
||||
@@ -42,6 +42,7 @@ class MockReleaseHelper : public ReleaseHelper {
|
||||
ADDMETHOD_CONST_NOBASE(isNumRtStacksPerDssFixedValue, bool, true, ());
|
||||
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
|
||||
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
|
||||
|
||||
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {
|
||||
static SizeToPreferredSlmValueArray sizeToPreferredSlmValue = {};
|
||||
|
||||
@@ -263,7 +263,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterT
|
||||
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(cmdContainer->getIddBlock());
|
||||
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
|
||||
uint32_t expectedValue = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
|
||||
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize));
|
||||
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize, nullptr, false));
|
||||
|
||||
EXPECT_EQ(expectedValue, interfaceDescriptorData->getSharedLocalMemorySize());
|
||||
}
|
||||
|
||||
@@ -10,11 +10,13 @@
|
||||
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/source/release_helper/release_helper.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
@@ -59,9 +61,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterTha
|
||||
auto cmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &idd = cmd->getInterfaceDescriptor();
|
||||
auto &gfxcoreHelper = this->getHelper<GfxCoreHelper>();
|
||||
auto releaseHelper = ReleaseHelper::create(pDevice->getHardwareInfo().ipVersion);
|
||||
bool isHeapless = pDevice->getCompilerProductHelper().isHeaplessModeEnabled();
|
||||
|
||||
uint32_t expectedValue = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
|
||||
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize));
|
||||
gfxcoreHelper.computeSlmValues(pDevice->getHardwareInfo(), slmTotalSize, releaseHelper.get(), isHeapless));
|
||||
|
||||
EXPECT_EQ(expectedValue, idd.getSharedLocalMemorySize());
|
||||
}
|
||||
|
||||
@@ -1192,28 +1192,28 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, GfxCoreHelperTest, GivenVariousValuesWhenAlignSlm
|
||||
HWCMDTEST_F(IGFX_GEN12LP_CORE, GfxCoreHelperTest, GivenVariousValuesWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_EQ(0u, gfxCoreHelper.computeSlmValues(hwInfo, 0));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1024));
|
||||
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 1025));
|
||||
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 2048));
|
||||
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 2049));
|
||||
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 4096));
|
||||
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 4097));
|
||||
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 8192));
|
||||
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 8193));
|
||||
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 16384));
|
||||
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 16385));
|
||||
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 32768));
|
||||
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 32769));
|
||||
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 65536));
|
||||
EXPECT_EQ(0u, gfxCoreHelper.computeSlmValues(hwInfo, 0, nullptr, false));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1, nullptr, false));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.computeSlmValues(hwInfo, 1024, nullptr, false));
|
||||
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 1025, nullptr, false));
|
||||
EXPECT_EQ(2u, gfxCoreHelper.computeSlmValues(hwInfo, 2048, nullptr, false));
|
||||
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 2049, nullptr, false));
|
||||
EXPECT_EQ(3u, gfxCoreHelper.computeSlmValues(hwInfo, 4096, nullptr, false));
|
||||
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 4097, nullptr, false));
|
||||
EXPECT_EQ(4u, gfxCoreHelper.computeSlmValues(hwInfo, 8192, nullptr, false));
|
||||
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 8193, nullptr, false));
|
||||
EXPECT_EQ(5u, gfxCoreHelper.computeSlmValues(hwInfo, 16384, nullptr, false));
|
||||
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 16385, nullptr, false));
|
||||
EXPECT_EQ(6u, gfxCoreHelper.computeSlmValues(hwInfo, 32768, nullptr, false));
|
||||
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 32769, nullptr, false));
|
||||
EXPECT_EQ(7u, gfxCoreHelper.computeSlmValues(hwInfo, 65536, nullptr, false));
|
||||
}
|
||||
|
||||
HWTEST_F(GfxCoreHelperTest, GivenZeroSlmSizeWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned) {
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename FamilyType::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
auto receivedSlmSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(gfxCoreHelper.computeSlmValues(hwInfo, 0));
|
||||
auto receivedSlmSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(gfxCoreHelper.computeSlmValues(hwInfo, 0, nullptr, false));
|
||||
EXPECT_EQ(SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_SLM_ENCODES_0K, receivedSlmSize);
|
||||
}
|
||||
|
||||
|
||||
@@ -84,8 +84,8 @@ XE2_HPG_CORETEST_F(Xe2HpgCoreDeviceCaps, givenSlmSizeWhenEncodingThenReturnCorre
|
||||
{11, 128 * MemoryConstants::kiloByte}};
|
||||
|
||||
for (const auto &testInput : computeSlmValuesXe2AndLaterTestsInput) {
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
|
||||
}
|
||||
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1), std::exception);
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1, nullptr, false), std::exception);
|
||||
}
|
||||
|
||||
@@ -96,8 +96,8 @@ XE3_CORETEST_F(Xe3CoreDeviceCaps, givenSlmSizeWhenEncodingThenReturnCorrectValue
|
||||
{11, 128 * MemoryConstants::kiloByte}};
|
||||
|
||||
for (const auto &testInput : computeSlmValuesXe3AndLaterTestsInput) {
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
|
||||
}
|
||||
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1), std::exception);
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 128 * MemoryConstants::kiloByte + 1, nullptr, false), std::exception);
|
||||
}
|
||||
|
||||
@@ -51,10 +51,10 @@ XE_HPC_CORETEST_F(GfxCoreHelperXeHpcCoreTest, givenSlmSizeWhenEncodingThenReturn
|
||||
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
|
||||
|
||||
for (auto &testInput : computeSlmValuesXeHpcTestsInput) {
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize));
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hwInfo, testInput.slmSize, nullptr, false));
|
||||
}
|
||||
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 129 * MemoryConstants::kiloByte), std::exception);
|
||||
EXPECT_THROW(gfxCoreHelper.computeSlmValues(hwInfo, 129 * MemoryConstants::kiloByte, nullptr, false), std::exception);
|
||||
}
|
||||
|
||||
XE_HPC_CORETEST_F(GfxCoreHelperXeHpcCoreTest, WhenGettingIsCpuImageTransferPreferredThenTrueIsReturned) {
|
||||
|
||||
@@ -320,7 +320,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, GivenVariousValuesWhenComputeSlmSi
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
auto hardwareInfo = *defaultHwInfo;
|
||||
for (auto &testInput : computeSlmValuesXeHpgTestsInput) {
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hardwareInfo, testInput.slmSize));
|
||||
EXPECT_EQ(testInput.expected, gfxCoreHelper.computeSlmValues(hardwareInfo, testInput.slmSize, nullptr, false));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user