refactor: correct programming of SLM size

Unify reading hw avaialble slm size:
Pre-Xe2 SLMSizeInKb stores total SLM size across all DSS
Xe2+ SLMSizeInKb stores SLM size per SS

apply restrictions for preferred/programmable SLM size

Related-To: NEO-12949
Signed-off-by: Marcel Skierkowski <marcel.skierkowski@intel.com>
This commit is contained in:
Marcel Skierkowski
2025-09-30 12:38:50 +00:00
committed by Compute-Runtime-Automation
parent 162f38972f
commit 047f6851f7
48 changed files with 302 additions and 17 deletions

View File

@@ -234,6 +234,9 @@ struct EncodeDispatchKernel : public EncodeDispatchKernelBase<GfxFamily> {
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template <typename InterfaceDescriptorType>
static void setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled);
static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
static uint32_t alignPreferredSlmSize(uint32_t slmSize);

View File

@@ -11,6 +11,7 @@ template struct NEO::EncodeDispatchKernel<Family>;
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
template void NEO::EncodeDispatchKernel<Family>::setGrfInfo<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::setupPreferredSlmSize<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
template void NEO::EncodeDispatchKernel<Family>::setupProgrammableSlmSize<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled);
template void NEO::EncodeDispatchKernel<Family>::encodeThreadGroupDispatch<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, Family::DefaultWalkerType &walkerCmd);
template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerType>(CommandContainer &container, EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@@ -120,13 +120,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), releaseHelper, heaplessModeEnabled);
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
}
idd.setSharedLocalMemorySize(slmSize);
EncodeDispatchKernel<Family>::setupProgrammableSlmSize(&idd, rootDeviceEnvironment, args.dispatchInterface->getSlmTotalSize(), heaplessModeEnabled);
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
bool sshProgrammingRequired = true;
@@ -1042,6 +1036,9 @@ void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType
break;
}
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
slmSize = std::min(slmSize, static_cast<uint32_t>(actualHwSlmSizeKb * MemoryConstants::kiloByte));
constexpr bool isHeapless = Family::template isInterfaceDescriptorHeaplessMode<InterfaceDescriptorType>();
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
@@ -1062,6 +1059,24 @@ void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType
pInterfaceDescriptor->setPreferredSlmAllocationSize(static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(programmableIdPreferredSlmSize));
}
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled) {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
auto maxProgrammableSlmSizeKb = std::min(hwInfo.capabilityTable.maxProgrammableSlmSize, actualHwSlmSizeKb);
auto programmableSlmSize = std::min(slmTotalSize, static_cast<uint32_t>(maxProgrammableSlmSizeKb * MemoryConstants::kiloByte));
auto programmableIDSLMSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, programmableSlmSize, releaseHelper, heaplessModeEnabled);
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
programmableIDSLMSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
}
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
}
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
return 2 * MemoryConstants::megaByte;

View File

@@ -103,8 +103,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor,
hwInfo);
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), nullptr, false);
idd.setSharedLocalMemorySize(slmSize);
EncodeDispatchKernel<Family>::setupProgrammableSlmSize(&idd, args.device->getRootDeviceEnvironment(), args.dispatchInterface->getSlmTotalSize(), false);
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
uint32_t bindingTablePointer = 0u;
@@ -444,6 +444,24 @@ template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
template <typename Family>
template <typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled) {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
auto maxProgrammableSlmSizeKb = std::min(hwInfo.capabilityTable.maxProgrammableSlmSize, actualHwSlmSizeKb);
auto programmableSlmSize = std::min(slmTotalSize, static_cast<uint32_t>(maxProgrammableSlmSizeKb * MemoryConstants::kiloByte));
auto programmableIDSLMSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, programmableSlmSize, nullptr, heaplessModeEnabled);
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
programmableIDSLMSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
}
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
}
template <typename Family>
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
return true;

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_N;
#include "shared/source/gen12lp/adln/os_agnostic_product_helper_adln.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_P;
#include "shared/source/gen12lp/adlp/os_agnostic_product_helper_adlp.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
namespace NEO {
template <>

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_S;
#include "shared/source/gen12lp/adls/os_agnostic_product_helper_adls.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_DG1;
#include "shared/source/gen12lp/dg1/os_agnostic_product_helper_dg1.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
namespace NEO {
template <>

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_ROCKETLAKE;
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/gen12lp/rkl/os_agnostic_product_helper_rkl.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
namespace NEO {
template <>

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_TIGERLAKE_LP;
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/gen12lp/tgllp/os_agnostic_product_helper_tgllp.inl"
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
namespace NEO {

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_N;
#include "shared/source/gen12lp/adln/os_agnostic_product_helper_adln.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_P;
#include "shared/source/gen12lp/adlp/os_agnostic_product_helper_adlp.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_S;
#include "shared/source/gen12lp/adls/os_agnostic_product_helper_adls.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_DG1;
#include "shared/source/gen12lp/dg1/os_agnostic_product_helper_dg1.inl"
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
namespace NEO {

View File

@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ROCKETLAKE;
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/gen12lp/rkl/os_agnostic_product_helper_rkl.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
template class NEO::ProductHelperHw<gfxProduct>;

View File

@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_TIGERLAKE_LP;
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
#include "shared/source/gen12lp/tgllp/os_agnostic_product_helper_tgllp.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
namespace NEO {

View File

@@ -146,6 +146,8 @@ bool DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(ExecutionE
rootDeviceEnvironment.setRcsExposure();
hardwareInfo->gtSystemInfo.SLMSizeInKb = hardwareInfo->capabilityTable.maxProgrammableSlmSize;
if (debugManager.flags.OverrideSlmSize.get() != -1) {
hardwareInfo->capabilityTable.maxProgrammableSlmSize = debugManager.flags.OverrideSlmSize.get();
hardwareInfo->gtSystemInfo.SLMSizeInKb = debugManager.flags.OverrideSlmSize.get();
@@ -194,6 +196,7 @@ static bool initHwDeviceIdResources(ExecutionEnvironment &executionEnvironment,
executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo()->platform.usRevId =
static_cast<unsigned short>(debugManager.flags.OverrideRevision.get());
}
if (debugManager.flags.OverrideSlmSize.get() != -1) {
auto hardwareInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo();
hardwareInfo->capabilityTable.maxProgrammableSlmSize = debugManager.flags.OverrideSlmSize.get();

View File

@@ -110,6 +110,18 @@ if(SUPPORT_PVC_AND_LATER)
list(APPEND NEO_CORE_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe_hpc_and_later.inl)
endif()
if(SUPPORT_XE2_AND_LATER)
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe2_and_later_drm_slm.inl
)
endif()
if(SUPPORT_XE_HPC_AND_BEFORE)
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_before_xe2_drm_slm.inl
)
endif()
if(DISABLE_WDDM_LINUX)
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_wddm_stub.cpp

View File

@@ -514,6 +514,10 @@ int Drm::setupHardwareInfo(const DeviceDescriptor *device, bool setupFeatureTabl
hwInfo->gtSystemInfo.NumThreadsPerEu = systemInfo->getNumThreadsPerEu();
}
if (hwInfo->gtSystemInfo.SLMSizeInKb == 0) {
hwInfo->gtSystemInfo.SLMSizeInKb = hwInfo->capabilityTable.maxProgrammableSlmSize;
}
auto &productHelper = rootDeviceEnvironment.getProductHelper();
auto capsReader = productHelper.getDeviceCapsReader(*this);
if (capsReader) {

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/product_helper_hw.h"
namespace NEO {
template <>
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
if (rootDeviceEnvironment.isWddmOnLinux()) {
return hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount;
} else {
return hwInfo.gtSystemInfo.SLMSizeInKb;
}
}
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/product_helper_hw.h"
namespace NEO {
template <>
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
return hwInfo.gtSystemInfo.SLMSizeInKb;
}
} // namespace NEO

View File

@@ -280,6 +280,8 @@ class ProductHelper {
virtual bool isInterruptSupported() const = 0;
virtual bool isCompressionFormatFromGmmRequired() const = 0;
virtual uint32_t getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
virtual ~ProductHelper() = default;

View File

@@ -214,6 +214,7 @@ class ProductHelperHw : public ProductHelper {
bool isPidFdOrSocketForIpcSupported() const override;
bool checkBcsForDirectSubmissionStop() const override;
bool shouldRegisterEnqueuedWalkerWithProfiling() const override;
uint32_t getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
bool isInterruptSupported() const override;
bool isCompressionFormatFromGmmRequired() const override;

View File

@@ -123,6 +123,18 @@ set(NEO_CORE_OS_INTERFACE_WDDM
${CMAKE_CURRENT_SOURCE_DIR}/wddm${BRANCH_DIR_SUFFIX}/wddm_features_extra.cpp
)
if(SUPPORT_XE2_AND_LATER)
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe2_and_later_wddm.inl
)
endif()
if(SUPPORT_XE_HPC_AND_BEFORE)
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_before_xe2_wddm.inl
)
endif()
if(NOT WIN32 AND NOT DISABLE_WDDM_LINUX)
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
${CMAKE_CURRENT_SOURCE_DIR}/wddm/adapter_factory_create_dxcore.cpp

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/product_helper_hw.h"
namespace NEO {
template <>
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
return hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount;
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
namespace NEO {
template <>
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
return hwInfo.gtSystemInfo.SLMSizeInKb;
}
} // namespace NEO

View File

@@ -108,6 +108,10 @@ bool Wddm::init() {
hardwareInfo->capabilityTable.instrumentationEnabled =
(hardwareInfo->capabilityTable.instrumentationEnabled && instrumentationEnabled);
if (hardwareInfo->gtSystemInfo.SLMSizeInKb == 0) {
hardwareInfo->gtSystemInfo.SLMSizeInKb = hardwareInfo->capabilityTable.maxProgrammableSlmSize;
}
DEBUG_BREAK_IF(hardwareInfo->gtSystemInfo.NumThreadsPerEu != hardwareInfo->gtSystemInfo.ThreadCount / hardwareInfo->gtSystemInfo.EUCount);
hardwareInfo->gtSystemInfo.NumThreadsPerEu = hardwareInfo->gtSystemInfo.ThreadCount / hardwareInfo->gtSystemInfo.EUCount;

View File

@@ -13,6 +13,7 @@
constexpr static auto gfxProduct = IGFX_BMG;
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
#include "shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl"
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"

View File

@@ -13,6 +13,7 @@
constexpr static auto gfxProduct = IGFX_LUNARLAKE;
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
#include "shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl"
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"

View File

@@ -12,6 +12,7 @@
constexpr static auto gfxProduct = IGFX_BMG;
#include "shared/source/helpers/windows/product_helper_dg2_and_later_discrete.inl"
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
#include "shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl"
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"

View File

@@ -11,6 +11,7 @@
constexpr static auto gfxProduct = IGFX_LUNARLAKE;
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
#include "shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl"
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"

View File

@@ -13,6 +13,7 @@
constexpr static auto gfxProduct = IGFX_PTL;
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
#include "shared/source/xe3_core/os_agnostic_product_helper_xe3_core.inl"
#include "shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl"

View File

@@ -11,6 +11,7 @@
constexpr static auto gfxProduct = IGFX_PTL;
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
#include "shared/source/xe3_core/os_agnostic_product_helper_xe3_core.inl"
#include "shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl"

View File

@@ -27,6 +27,7 @@ const std::map<std::string, std::pair<uint32_t, uint32_t>> guidUuidOffsetMap = {
#include "shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl"
#include "shared/source/os_interface/linux/product_helper_xe_hpc_and_later.inl"
} // namespace NEO
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
#include "shared/source/xe_hpc_core/os_agnostic_product_helper_xe_hpc_core.inl"
#include "shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl"
namespace NEO {

View File

@@ -11,6 +11,7 @@
constexpr static auto gfxProduct = IGFX_PVC;
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
#include "shared/source/xe_hpc_core/os_agnostic_product_helper_xe_hpc_core.inl"
#include "shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl"

View File

@@ -10,6 +10,7 @@
constexpr static auto gfxProduct = IGFX_ARROWLAKE;
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
#include "shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl"
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"

View File

@@ -14,6 +14,7 @@
constexpr static auto gfxProduct = IGFX_DG2;
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
#include "shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"

View File

@@ -10,6 +10,7 @@
constexpr static auto gfxProduct = IGFX_METEORLAKE;
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
#include "shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl"
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"

View File

@@ -10,6 +10,7 @@
constexpr static auto gfxProduct = IGFX_ARROWLAKE;
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
#include "shared/source/xe_hpg_core/xe_lpg/windows/product_helper_xe_lpg_windows.inl"

View File

@@ -12,6 +12,7 @@
constexpr static auto gfxProduct = IGFX_DG2;
#include "shared/source/helpers/windows/product_helper_dg2_and_later_discrete.inl"
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
#include "shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"

View File

@@ -10,6 +10,7 @@
constexpr static auto gfxProduct = IGFX_METEORLAKE;
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
#include "shared/source/xe_hpg_core/xe_lpg/windows/product_helper_xe_lpg_windows.inl"

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/hw_info.h"
@@ -496,6 +497,12 @@ template <>
void ProductHelperHw<IGFX_UNKNOWN>::overrideDirectSubmissionTimeouts(uint64_t &timeoutUs, uint64_t &maxTimeoutUs) const {
}
template <PRODUCT_FAMILY gfxProduct>
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
return hwInfo.gtSystemInfo.SLMSizeInKb;
}
} // namespace NEO
#include "shared/source/os_interface/product_helper.inl"

View File

@@ -51,7 +51,7 @@ void adjustHwInfoForTests(HardwareInfo &hwInfoForTests, uint32_t euPerSubSlice,
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount);
gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount);
gtSystemInfo.SLMSizeInKb = 1;
gtSystemInfo.SLMSizeInKb = 999;
}
void adjustCsrType(TestMode testMode) {
if (testMode == TestMode::aubTestsWithTbx) {

View File

@@ -14,6 +14,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
@@ -187,3 +188,91 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio
EXPECT_EQ(5u, static_cast<uint32_t>(idd.getPreferredSlmAllocationSize()));
}
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsAtLeastXe2HpgCore) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_0K},
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_16K},
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
// SLMSizeInKb holds per-subslice value (32KB total)
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
};
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
}
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsXeCore) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.gtSystemInfo.DualSubSliceCount = 2;
hwInfo.gtSystemInfo.SubSliceCount = 2;
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
uint32_t actualSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
bool usesWddmPreXe2Method = (actualSlmSizeKb == hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount);
std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest;
if (usesWddmPreXe2Method) {
// On WDDM pre-XE2: SLM size exceeds hardware limit (32KB / 2 DSS = 16KB)
// Values beyond 16KB should be clamped to the available hardware limit
valuesToTest = {
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
};
} else {
// On Linux DRM pre-XE2: SLMSizeInKb holds per-subslice value (32KB total)
// Values beyond 32KB should be clamped to the available hardware limit
valuesToTest = {
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32KB},
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32KB},
};
}
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
}
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenWddmOnLinuxAndSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsXeCore) {
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
reinterpret_cast<MockRootDeviceEnvironment *>(&pDevice->getRootDeviceEnvironmentRef())->isWddmOnLinuxEnable = true;
hwInfo.gtSystemInfo.DualSubSliceCount = 2;
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest;
// WDDM on Linux pre-XE2: SLM size exceeds hardware limit (32KB / 2 DSS = 16KB)
// Values beyond 16KB should be clamped to the available hardware limit
valuesToTest = {
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
};
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
}

View File

@@ -318,3 +318,11 @@ TEST_F(DeviceFactoryOverrideTest, givenFailedProductHelperSetupHardwareInfoWhenP
EXPECT_EQ(false, rc);
EXPECT_EQ(1u, productHelper->setupHardwareInfoCalled);
}
TEST_F(DeviceFactoryOverrideTest, givenDefaultHwInfoWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenSlmSizeInKbEqualsMaxProgrammableSlmSize) {
DebugManagerStateRestore restore;
bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
EXPECT_TRUE(success);
auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo();
EXPECT_EQ(hwInfo->capabilityTable.maxProgrammableSlmSize, hwInfo->gtSystemInfo.SLMSizeInKb);
}