mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 07:14:10 +08:00
performance: replace virtual calls with native class methods
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b0420fbf40
commit
7ce4a8adc2
@@ -34,8 +34,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptor
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
const uint32_t workgroupSlmSize = gfxCoreHelper.alignSlmSize(slmTotalSize);
|
||||
|
||||
const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);
|
||||
|
||||
uint32_t slmSize = 0u;
|
||||
|
||||
@@ -195,6 +195,30 @@ void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
|
||||
if (slmSize == 0u) {
|
||||
return 0u;
|
||||
}
|
||||
slmSize = std::max(slmSize, 1024u);
|
||||
slmSize = Math::nextPowerOfTwo(slmSize);
|
||||
UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
|
||||
return slmSize;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
|
||||
auto slmValue = std::max(slmSize, 1024u);
|
||||
slmValue = Math::nextPowerOfTwo(slmValue);
|
||||
slmValue = Math::getMinLsbSet(slmValue);
|
||||
slmValue = slmValue - 9;
|
||||
DEBUG_BREAK_IF(slmValue > 7);
|
||||
slmValue *= !!slmSize;
|
||||
return slmValue;
|
||||
}
|
||||
|
||||
template <>
|
||||
void adjustL3ControlField<Family>(void *l3ControlBuffer) {
|
||||
using L3_CONTROL = typename Family::L3_CONTROL;
|
||||
|
||||
@@ -107,19 +107,6 @@ bool GfxCoreHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t si
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
|
||||
auto slmValue = std::max(slmSize, 1024u);
|
||||
slmValue = Math::nextPowerOfTwo(slmValue);
|
||||
slmValue = Math::getMinLsbSet(slmValue);
|
||||
slmValue = slmValue - 9;
|
||||
DEBUG_BREAK_IF(slmValue > 7);
|
||||
slmValue *= !!slmSize;
|
||||
return slmValue;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
|
||||
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {
|
||||
|
||||
Reference in New Issue
Block a user