performance: replace virtual calls with native class methods

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2026-01-04 07:14:10 +08:00 · 2024-09-10 22:26:23 +00:00
parent b0420fbf40
commit 7ce4a8adc2
12 changed files with 172 additions and 212 deletions
--- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp
+++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp
@@ -34,8 +34,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptor
    auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
    const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
    const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
-    auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
-    const uint32_t workgroupSlmSize = gfxCoreHelper.alignSlmSize(slmTotalSize);
+
+    const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);

    uint32_t slmSize = 0u;

@@ -195,6 +195,30 @@ void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32
    }
 }

+template <>
+uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
+    if (slmSize == 0u) {
+        return 0u;
+    }
+    slmSize = std::max(slmSize, 1024u);
+    slmSize = Math::nextPowerOfTwo(slmSize);
+    UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
+    return slmSize;
+}
+
+template <>
+uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
+    using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
+
+    auto slmValue = std::max(slmSize, 1024u);
+    slmValue = Math::nextPowerOfTwo(slmValue);
+    slmValue = Math::getMinLsbSet(slmValue);
+    slmValue = slmValue - 9;
+    DEBUG_BREAK_IF(slmValue > 7);
+    slmValue *= !!slmSize;
+    return slmValue;
+}
+
 template <>
 void adjustL3ControlField<Family>(void *l3ControlBuffer) {
    using L3_CONTROL = typename Family::L3_CONTROL;
--- a/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp
+++ b/shared/source/xe_hpg_core/gfx_core_helper_xe_hpg_core.cpp
@@ -107,19 +107,6 @@ bool GfxCoreHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t si
    }
 }

-template <>
-uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
-    using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
-
-    auto slmValue = std::max(slmSize, 1024u);
-    slmValue = Math::nextPowerOfTwo(slmValue);
-    slmValue = Math::getMinLsbSet(slmValue);
-    slmValue = slmValue - 9;
-    DEBUG_BREAK_IF(slmValue > 7);
-    slmValue *= !!slmSize;
-    return slmValue;
-}
-
 template <>
 bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
    if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {