mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
refactor: correct programming of SLM size
Unify reading hw avaialble slm size: Pre-Xe2 SLMSizeInKb stores total SLM size across all DSS Xe2+ SLMSizeInKb stores SLM size per SS apply restrictions for preferred/programmable SLM size Related-To: NEO-12949 Signed-off-by: Marcel Skierkowski <marcel.skierkowski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
162f38972f
commit
047f6851f7
@@ -234,6 +234,9 @@ struct EncodeDispatchKernel : public EncodeDispatchKernelBase<GfxFamily> {
|
||||
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled);
|
||||
|
||||
static uint32_t getThreadCountPerSubslice(const HardwareInfo &hwInfo);
|
||||
static uint32_t alignPreferredSlmSize(uint32_t slmSize);
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ template struct NEO::EncodeDispatchKernel<Family>;
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setGrfInfo<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPreferredSlmSize<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupProgrammableSlmSize<Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeThreadGroupDispatch<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t *threadGroupDimensions, const uint32_t threadGroupCount, const uint32_t requiredThreadGroupDispatchSize, const uint32_t grfCount, const uint32_t threadsPerThreadGroup, Family::DefaultWalkerType &walkerCmd);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerType>(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
@@ -120,13 +120,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
|
||||
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
|
||||
|
||||
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), releaseHelper, heaplessModeEnabled);
|
||||
|
||||
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
|
||||
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
|
||||
}
|
||||
idd.setSharedLocalMemorySize(slmSize);
|
||||
EncodeDispatchKernel<Family>::setupProgrammableSlmSize(&idd, rootDeviceEnvironment, args.dispatchInterface->getSlmTotalSize(), heaplessModeEnabled);
|
||||
|
||||
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
bool sshProgrammingRequired = true;
|
||||
@@ -1042,6 +1036,9 @@ void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
|
||||
slmSize = std::min(slmSize, static_cast<uint32_t>(actualHwSlmSizeKb * MemoryConstants::kiloByte));
|
||||
|
||||
constexpr bool isHeapless = Family::template isInterfaceDescriptorHeaplessMode<InterfaceDescriptorType>();
|
||||
|
||||
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
@@ -1062,6 +1059,24 @@ void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType
|
||||
pInterfaceDescriptor->setPreferredSlmAllocationSize(static_cast<PREFERRED_SLM_ALLOCATION_SIZE>(programmableIdPreferredSlmSize));
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled) {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
|
||||
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
|
||||
auto maxProgrammableSlmSizeKb = std::min(hwInfo.capabilityTable.maxProgrammableSlmSize, actualHwSlmSizeKb);
|
||||
auto programmableSlmSize = std::min(slmTotalSize, static_cast<uint32_t>(maxProgrammableSlmSizeKb * MemoryConstants::kiloByte));
|
||||
|
||||
auto programmableIDSLMSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, programmableSlmSize, releaseHelper, heaplessModeEnabled);
|
||||
|
||||
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
|
||||
programmableIDSLMSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
|
||||
}
|
||||
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeStates<Family>::getSshHeapSize() {
|
||||
return 2 * MemoryConstants::megaByte;
|
||||
|
||||
@@ -103,8 +103,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
|
||||
kernelDescriptor,
|
||||
hwInfo);
|
||||
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize(), nullptr, false);
|
||||
idd.setSharedLocalMemorySize(slmSize);
|
||||
|
||||
EncodeDispatchKernel<Family>::setupProgrammableSlmSize(&idd, args.device->getRootDeviceEnvironment(), args.dispatchInterface->getSlmTotalSize(), false);
|
||||
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
@@ -444,6 +444,24 @@ template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::setupProgrammableSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t slmTotalSize, bool heaplessModeEnabled) {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
|
||||
uint32_t actualHwSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
|
||||
auto maxProgrammableSlmSizeKb = std::min(hwInfo.capabilityTable.maxProgrammableSlmSize, actualHwSlmSizeKb);
|
||||
auto programmableSlmSize = std::min(slmTotalSize, static_cast<uint32_t>(maxProgrammableSlmSizeKb * MemoryConstants::kiloByte));
|
||||
|
||||
auto programmableIDSLMSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, programmableSlmSize, nullptr, heaplessModeEnabled);
|
||||
|
||||
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
|
||||
programmableIDSLMSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
|
||||
}
|
||||
|
||||
pInterfaceDescriptor->setSharedLocalMemorySize(programmableIDSLMSize);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
|
||||
return true;
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_N;
|
||||
|
||||
#include "shared/source/gen12lp/adln/os_agnostic_product_helper_adln.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_P;
|
||||
|
||||
#include "shared/source/gen12lp/adlp/os_agnostic_product_helper_adlp.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_S;
|
||||
|
||||
#include "shared/source/gen12lp/adls/os_agnostic_product_helper_adls.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_DG1;
|
||||
|
||||
#include "shared/source/gen12lp/dg1/os_agnostic_product_helper_dg1.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_ROCKETLAKE;
|
||||
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/gen12lp/rkl/os_agnostic_product_helper_rkl.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_TIGERLAKE_LP;
|
||||
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/gen12lp/tgllp/os_agnostic_product_helper_tgllp.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_N;
|
||||
|
||||
#include "shared/source/gen12lp/adln/os_agnostic_product_helper_adln.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_P;
|
||||
|
||||
#include "shared/source/gen12lp/adlp/os_agnostic_product_helper_adlp.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ALDERLAKE_S;
|
||||
|
||||
#include "shared/source/gen12lp/adls/os_agnostic_product_helper_adls.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_DG1;
|
||||
|
||||
#include "shared/source/gen12lp/dg1/os_agnostic_product_helper_dg1.inl"
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
||||
@@ -14,5 +14,6 @@ constexpr static auto gfxProduct = IGFX_ROCKETLAKE;
|
||||
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/gen12lp/rkl/os_agnostic_product_helper_rkl.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
template class NEO::ProductHelperHw<gfxProduct>;
|
||||
|
||||
@@ -14,6 +14,7 @@ constexpr static auto gfxProduct = IGFX_TIGERLAKE_LP;
|
||||
|
||||
#include "shared/source/gen12lp/os_agnostic_product_helper_gen12lp.inl"
|
||||
#include "shared/source/gen12lp/tgllp/os_agnostic_product_helper_tgllp.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
||||
@@ -146,6 +146,8 @@ bool DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(ExecutionE
|
||||
|
||||
rootDeviceEnvironment.setRcsExposure();
|
||||
|
||||
hardwareInfo->gtSystemInfo.SLMSizeInKb = hardwareInfo->capabilityTable.maxProgrammableSlmSize;
|
||||
|
||||
if (debugManager.flags.OverrideSlmSize.get() != -1) {
|
||||
hardwareInfo->capabilityTable.maxProgrammableSlmSize = debugManager.flags.OverrideSlmSize.get();
|
||||
hardwareInfo->gtSystemInfo.SLMSizeInKb = debugManager.flags.OverrideSlmSize.get();
|
||||
@@ -194,6 +196,7 @@ static bool initHwDeviceIdResources(ExecutionEnvironment &executionEnvironment,
|
||||
executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo()->platform.usRevId =
|
||||
static_cast<unsigned short>(debugManager.flags.OverrideRevision.get());
|
||||
}
|
||||
|
||||
if (debugManager.flags.OverrideSlmSize.get() != -1) {
|
||||
auto hardwareInfo = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo();
|
||||
hardwareInfo->capabilityTable.maxProgrammableSlmSize = debugManager.flags.OverrideSlmSize.get();
|
||||
|
||||
@@ -110,6 +110,18 @@ if(SUPPORT_PVC_AND_LATER)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe_hpc_and_later.inl)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_XE2_AND_LATER)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe2_and_later_drm_slm.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_XE_HPC_AND_BEFORE)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_before_xe2_drm_slm.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
if(DISABLE_WDDM_LINUX)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_wddm_stub.cpp
|
||||
|
||||
@@ -514,6 +514,10 @@ int Drm::setupHardwareInfo(const DeviceDescriptor *device, bool setupFeatureTabl
|
||||
hwInfo->gtSystemInfo.NumThreadsPerEu = systemInfo->getNumThreadsPerEu();
|
||||
}
|
||||
|
||||
if (hwInfo->gtSystemInfo.SLMSizeInKb == 0) {
|
||||
hwInfo->gtSystemInfo.SLMSizeInKb = hwInfo->capabilityTable.maxProgrammableSlmSize;
|
||||
}
|
||||
|
||||
auto &productHelper = rootDeviceEnvironment.getProductHelper();
|
||||
auto capsReader = productHelper.getDeviceCapsReader(*this);
|
||||
if (capsReader) {
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/os_interface/product_helper_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
if (rootDeviceEnvironment.isWddmOnLinux()) {
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
} else {
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/os_interface/product_helper_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -280,6 +280,8 @@ class ProductHelper {
|
||||
virtual bool isInterruptSupported() const = 0;
|
||||
virtual bool isCompressionFormatFromGmmRequired() const = 0;
|
||||
|
||||
virtual uint32_t getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
|
||||
virtual bool getStorageInfoLocalOnlyFlag(LocalMemAllocationMode usmDeviceAllocationMode, bool defaultValue) const = 0;
|
||||
virtual ~ProductHelper() = default;
|
||||
|
||||
|
||||
@@ -214,6 +214,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isPidFdOrSocketForIpcSupported() const override;
|
||||
bool checkBcsForDirectSubmissionStop() const override;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling() const override;
|
||||
uint32_t getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||
bool isInterruptSupported() const override;
|
||||
bool isCompressionFormatFromGmmRequired() const override;
|
||||
|
||||
|
||||
@@ -123,6 +123,18 @@ set(NEO_CORE_OS_INTERFACE_WDDM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm${BRANCH_DIR_SUFFIX}/wddm_features_extra.cpp
|
||||
)
|
||||
|
||||
if(SUPPORT_XE2_AND_LATER)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_xe2_and_later_wddm.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_XE_HPC_AND_BEFORE)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/product_helper_before_xe2_wddm.inl
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32 AND NOT DISABLE_WDDM_LINUX)
|
||||
list(APPEND NEO_CORE_OS_INTERFACE_WDDM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm/adapter_factory_create_dxcore.cpp
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/os_interface/product_helper_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
|
||||
namespace NEO {
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -108,6 +108,10 @@ bool Wddm::init() {
|
||||
hardwareInfo->capabilityTable.instrumentationEnabled =
|
||||
(hardwareInfo->capabilityTable.instrumentationEnabled && instrumentationEnabled);
|
||||
|
||||
if (hardwareInfo->gtSystemInfo.SLMSizeInKb == 0) {
|
||||
hardwareInfo->gtSystemInfo.SLMSizeInKb = hardwareInfo->capabilityTable.maxProgrammableSlmSize;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(hardwareInfo->gtSystemInfo.NumThreadsPerEu != hardwareInfo->gtSystemInfo.ThreadCount / hardwareInfo->gtSystemInfo.EUCount);
|
||||
hardwareInfo->gtSystemInfo.NumThreadsPerEu = hardwareInfo->gtSystemInfo.ThreadCount / hardwareInfo->gtSystemInfo.EUCount;
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
constexpr static auto gfxProduct = IGFX_BMG;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
|
||||
#include "shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl"
|
||||
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
constexpr static auto gfxProduct = IGFX_LUNARLAKE;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
|
||||
#include "shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl"
|
||||
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
constexpr static auto gfxProduct = IGFX_BMG;
|
||||
|
||||
#include "shared/source/helpers/windows/product_helper_dg2_and_later_discrete.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
|
||||
#include "shared/source/xe2_hpg_core/bmg/os_agnostic_product_helper_bmg.inl"
|
||||
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_LUNARLAKE;
|
||||
|
||||
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
|
||||
#include "shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl"
|
||||
#include "shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
constexpr static auto gfxProduct = IGFX_PTL;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_mtl_and_later.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_xe2_and_later_drm_slm.inl"
|
||||
#include "shared/source/xe3_core/os_agnostic_product_helper_xe3_core.inl"
|
||||
#include "shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl"
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_PTL;
|
||||
|
||||
#include "shared/source/os_interface/windows/product_helper_xe2_and_later_wddm.inl"
|
||||
#include "shared/source/xe3_core/os_agnostic_product_helper_xe3_core.inl"
|
||||
#include "shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl"
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ const std::map<std::string, std::pair<uint32_t, uint32_t>> guidUuidOffsetMap = {
|
||||
#include "shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl"
|
||||
#include "shared/source/os_interface/linux/product_helper_xe_hpc_and_later.inl"
|
||||
} // namespace NEO
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
#include "shared/source/xe_hpc_core/os_agnostic_product_helper_xe_hpc_core.inl"
|
||||
#include "shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl"
|
||||
namespace NEO {
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_PVC;
|
||||
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
#include "shared/source/xe_hpc_core/os_agnostic_product_helper_xe_hpc_core.inl"
|
||||
#include "shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl"
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_ARROWLAKE;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_DG2;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
#include "shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_METEORLAKE;
|
||||
|
||||
#include "shared/source/os_interface/linux/product_helper_before_xe2_drm_slm.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_ARROWLAKE;
|
||||
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/windows/product_helper_xe_lpg_windows.inl"
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
constexpr static auto gfxProduct = IGFX_DG2;
|
||||
|
||||
#include "shared/source/helpers/windows/product_helper_dg2_and_later_discrete.inl"
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
#include "shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
constexpr static auto gfxProduct = IGFX_METEORLAKE;
|
||||
|
||||
#include "shared/source/os_interface/windows/product_helper_before_xe2_wddm.inl"
|
||||
#include "shared/source/xe_hpg_core/os_agnostic_product_helper_xe_hpg_core.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl"
|
||||
#include "shared/source/xe_hpg_core/xe_lpg/windows/product_helper_xe_lpg_windows.inl"
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/cache_policy.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
@@ -496,6 +497,12 @@ template <>
|
||||
void ProductHelperHw<IGFX_UNKNOWN>::overrideDirectSubmissionTimeouts(uint64_t &timeoutUs, uint64_t &maxTimeoutUs) const {
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getActualHwSlmSize(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
return hwInfo.gtSystemInfo.SLMSizeInKb;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
#include "shared/source/os_interface/product_helper.inl"
|
||||
|
||||
@@ -51,7 +51,7 @@ void adjustHwInfoForTests(HardwareInfo &hwInfoForTests, uint32_t euPerSubSlice,
|
||||
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
|
||||
gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount);
|
||||
gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount);
|
||||
gtSystemInfo.SLMSizeInKb = 1;
|
||||
gtSystemInfo.SLMSizeInKb = 999;
|
||||
}
|
||||
void adjustCsrType(TestMode testMode) {
|
||||
if (testMode == TestMode::aubTestsWithTbx) {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
||||
@@ -187,3 +188,91 @@ HWTEST2_F(CommandEncodeStatesTestDg2AndLater, givenOverridePreferredSlmAllocatio
|
||||
|
||||
EXPECT_EQ(5u, static_cast<uint32_t>(idd.getPreferredSlmAllocationSize()));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsAtLeastXe2HpgCore) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
|
||||
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
|
||||
|
||||
const std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest = {
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_0K},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_16K},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
|
||||
// SLMSizeInKb holds per-subslice value (32KB total)
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
|
||||
{96 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_SLM_ENCODES_32K},
|
||||
};
|
||||
|
||||
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsXeCore) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
|
||||
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.gtSystemInfo.DualSubSliceCount = 2;
|
||||
hwInfo.gtSystemInfo.SubSliceCount = 2;
|
||||
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
|
||||
|
||||
uint32_t actualSlmSizeKb = rootDeviceEnvironment.getProductHelper().getActualHwSlmSize(rootDeviceEnvironment);
|
||||
bool usesWddmPreXe2Method = (actualSlmSizeKb == hwInfo.gtSystemInfo.SLMSizeInKb / hwInfo.gtSystemInfo.DualSubSliceCount);
|
||||
|
||||
std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest;
|
||||
if (usesWddmPreXe2Method) {
|
||||
// On WDDM pre-XE2: SLM size exceeds hardware limit (32KB / 2 DSS = 16KB)
|
||||
// Values beyond 16KB should be clamped to the available hardware limit
|
||||
valuesToTest = {
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
};
|
||||
} else {
|
||||
// On Linux DRM pre-XE2: SLMSizeInKb holds per-subslice value (32KB total)
|
||||
// Values beyond 32KB should be clamped to the available hardware limit
|
||||
valuesToTest = {
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32KB},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32KB},
|
||||
};
|
||||
}
|
||||
|
||||
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTestDg2AndLater, GivenWddmOnLinuxAndSlmTotalSizeExceedsHardwareLimitWhenSetPreferredSlmIsCalledThenSlmSizeIsClampedToHardwareLimit, IsXeCore) {
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename DefaultWalkerType::InterfaceDescriptorType;
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename INTERFACE_DESCRIPTOR_DATA::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
reinterpret_cast<MockRootDeviceEnvironment *>(&pDevice->getRootDeviceEnvironmentRef())->isWddmOnLinuxEnable = true;
|
||||
hwInfo.gtSystemInfo.DualSubSliceCount = 2;
|
||||
hwInfo.gtSystemInfo.SLMSizeInKb = 32;
|
||||
|
||||
std::vector<PreferredSlmTestValues<FamilyType>> valuesToTest;
|
||||
// WDDM on Linux pre-XE2: SLM size exceeds hardware limit (32KB / 2 DSS = 16KB)
|
||||
// Values beyond 16KB should be clamped to the available hardware limit
|
||||
valuesToTest = {
|
||||
{0, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_0KB},
|
||||
{16 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
{32 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
{64 * MemoryConstants::kiloByte, PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16KB},
|
||||
};
|
||||
|
||||
verifyPreferredSlmValues<FamilyType>(valuesToTest, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
@@ -318,3 +318,11 @@ TEST_F(DeviceFactoryOverrideTest, givenFailedProductHelperSetupHardwareInfoWhenP
|
||||
EXPECT_EQ(false, rc);
|
||||
EXPECT_EQ(1u, productHelper->setupHardwareInfoCalled);
|
||||
}
|
||||
|
||||
TEST_F(DeviceFactoryOverrideTest, givenDefaultHwInfoWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenSlmSizeInKbEqualsMaxProgrammableSlmSize) {
|
||||
DebugManagerStateRestore restore;
|
||||
bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
|
||||
EXPECT_TRUE(success);
|
||||
auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo();
|
||||
EXPECT_EQ(hwInfo->capabilityTable.maxProgrammableSlmSize, hwInfo->gtSystemInfo.SLMSizeInKb);
|
||||
}
|
||||
Reference in New Issue
Block a user