performance: replace virtual calls with native class methods

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-09-10 22:26:23 +00:00
committed by Compute-Runtime-Automation
parent b0420fbf40
commit 7ce4a8adc2
12 changed files with 172 additions and 212 deletions

View File

@@ -34,8 +34,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptor
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
const uint32_t workgroupSlmSize = gfxCoreHelper.alignSlmSize(slmTotalSize);
const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);
uint32_t slmSize = 0u;
@@ -195,6 +195,30 @@ void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32
}
}
template <>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 1024u);
slmSize = Math::nextPowerOfTwo(slmSize);
UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
return slmSize;
}
template <>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto slmValue = std::max(slmSize, 1024u);
slmValue = Math::nextPowerOfTwo(slmValue);
slmValue = Math::getMinLsbSet(slmValue);
slmValue = slmValue - 9;
DEBUG_BREAK_IF(slmValue > 7);
slmValue *= !!slmSize;
return slmValue;
}
template <>
void adjustL3ControlField<Family>(void *l3ControlBuffer) {
using L3_CONTROL = typename Family::L3_CONTROL;

View File

@@ -107,19 +107,6 @@ bool GfxCoreHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t si
}
}
template <>
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto slmValue = std::max(slmSize, 1024u);
slmValue = Math::nextPowerOfTwo(slmValue);
slmValue = Math::getMinLsbSet(slmValue);
slmValue = slmValue - 9;
DEBUG_BREAK_IF(slmValue > 7);
slmValue *= !!slmSize;
return slmValue;
}
template <>
bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {