performance: replace virtual calls with native class methods

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2024-09-10 22:26:23 +00:00 committed by Compute-Runtime-Automation
parent b0420fbf40
commit 7ce4a8adc2
12 changed files with 172 additions and 212 deletions

View File

@ -178,7 +178,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
const auto &hardwareInfo = device.getHardwareInfo();
auto &gfxCoreHelper = device.getGfxCoreHelper();
auto programmableIDSLMSize = static_cast<uint32_t>(gfxCoreHelper.computeSlmValues(hardwareInfo, slmTotalSize));
auto programmableIDSLMSize = EncodeDispatchKernel<GfxFamily>::computeSlmValues(hardwareInfo, slmTotalSize);
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
programmableIDSLMSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());

View File

@ -219,6 +219,11 @@ struct EncodeDispatchKernel {
template <typename WalkerType>
static void forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd);
static uint32_t alignSlmSize(uint32_t slmSize);
static uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize);
static bool singleTileExecImplicitScalingRequired(bool cooperativeKernel);
};
template <typename GfxFamily>

View File

@ -63,7 +63,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto pImplicitArgs = args.dispatchInterface->getImplicitArgs();
auto &hwInfo = args.device->getHardwareInfo();
auto &gfxCoreHelper = args.device->getGfxCoreHelper();
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
LinearStream *listCmdBufferStream = container.getCommandStream();
@ -95,8 +94,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
hwInfo);
auto slmSize = static_cast<uint32_t>(
gfxCoreHelper.computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize()));
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
idd.setSharedLocalMemorySize(slmSize);
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
@ -643,6 +641,32 @@ template <typename WalkerType>
void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 1024u);
slmSize = Math::nextPowerOfTwo(slmSize);
UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
return slmSize;
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
auto value = std::max(slmSize, 1024u);
value = Math::nextPowerOfTwo(value);
value = Math::getMinLsbSet(value);
value = value - 9;
DEBUG_BREAK_IF(value > 7);
return value * !!slmSize;
}
template <typename Family>
bool EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) {
return cooperativeKernel;
}
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
return 64 * MemoryConstants::kiloByte;

View File

@ -119,9 +119,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
auto &gfxCoreHelper = args.device->getGfxCoreHelper();
auto slmSize = static_cast<uint32_t>(
gfxCoreHelper.computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize()));
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
@ -262,7 +260,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - args.reserveExtraPayloadSpace);
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
if (pImplicitArgs) {
offsetThreadData -= sizeForImplicitArgsStruct;
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
@ -429,7 +426,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
args.dcFlushEnable, // dcFlush
gfxCoreHelper.singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
args.makeCommandView}; // blockDispatchToCommandBuffer
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
@ -990,6 +987,82 @@ void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(Walk
}
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
const uint32_t alignedSlmSizes[] = {
0u,
1u * MemoryConstants::kiloByte,
2u * MemoryConstants::kiloByte,
4u * MemoryConstants::kiloByte,
8u * MemoryConstants::kiloByte,
16u * MemoryConstants::kiloByte,
24u * MemoryConstants::kiloByte,
32u * MemoryConstants::kiloByte,
48u * MemoryConstants::kiloByte,
64u * MemoryConstants::kiloByte,
96u * MemoryConstants::kiloByte,
128u * MemoryConstants::kiloByte,
};
for (auto &alignedSlmSize : alignedSlmSizes) {
if (slmSize <= alignedSlmSize) {
return alignedSlmSize;
}
}
UNRECOVERABLE_IF(true);
return 0;
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto alignedSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmSize);
if (alignedSlmSize == 0u) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
}
UNRECOVERABLE_IF(slmSize > 128u * MemoryConstants::kiloByte);
if (alignedSlmSize > 96u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K;
}
if (alignedSlmSize > 64u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K;
}
if (alignedSlmSize > 48u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K;
}
if (alignedSlmSize > 32u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K;
}
if (alignedSlmSize > 24u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K;
}
if (alignedSlmSize > 16u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K;
}
if (alignedSlmSize > 8u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K;
}
if (alignedSlmSize > 4u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K;
}
if (alignedSlmSize > 2u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K;
}
if (alignedSlmSize > 1u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K;
}
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K;
}
template <typename Family>
bool EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) {
return cooperativeKernel;
}
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
return 2 * MemoryConstants::megaByte;

View File

@ -62,6 +62,27 @@ template <>
void EncodeBatchBufferStartOrEnd<Family>::appendBatchBufferStart(MI_BATCH_BUFFER_START &cmd, bool indirect, bool predicate) {
}
static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16};
template <>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 4096u);
slmSize = Math::nextPowerOfTwo(slmSize);
return slmSize;
}
template <>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
slmSize += (4 * MemoryConstants::kiloByte - 1);
slmSize = slmSize >> 12;
slmSize = std::min(slmSize, 15u);
slmSize = slmSizeId[slmSize];
return slmSize;
}
} // namespace NEO
#include "shared/source/command_container/command_encoder_enablers.inl"

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -17,27 +17,6 @@
namespace NEO {
typedef Gen8Family Family;
static uint32_t slmSizeId[] = {0, 1, 2, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16};
template <>
uint32_t GfxCoreHelperHw<Family>::alignSlmSize(uint32_t slmSize) const {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 4096u);
slmSize = Math::nextPowerOfTwo(slmSize);
return slmSize;
}
template <>
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
slmSize += (4 * MemoryConstants::kiloByte - 1);
slmSize = slmSize >> 12;
slmSize = std::min(slmSize, 15u);
slmSize = slmSizeId[slmSize];
return slmSize;
}
template <>
size_t GfxCoreHelperHw<Family>::getMaxBarrierRegisterPerSlice() const {
return 16;

View File

@ -402,23 +402,12 @@ uint32_t GfxCoreHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::alignSlmSize(uint32_t slmSize) const {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 1024u);
slmSize = Math::nextPowerOfTwo(slmSize);
UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
return slmSize;
return EncodeDispatchKernel<GfxFamily>::alignSlmSize(slmSize);
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
auto value = std::max(slmSize, 1024u);
value = Math::nextPowerOfTwo(value);
value = Math::getMinLsbSet(value);
value = value - 9;
DEBUG_BREAK_IF(value > 7);
return value * !!slmSize;
return EncodeDispatchKernel<GfxFamily>::computeSlmValues(hwInfo, slmSize);
}
template <typename GfxFamily>
@ -756,7 +745,7 @@ bool GfxCoreHelperHw<GfxFamily>::is48ResourceNeededForCmdBuffer() const {
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) const {
return cooperativeKernel;
return EncodeDispatchKernel<GfxFamily>::singleTileExecImplicitScalingRequired(cooperativeKernel);
}
template <typename GfxFamily>

View File

@ -277,77 +277,6 @@ void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationD
}
}
template <>
uint32_t GfxCoreHelperHw<Family>::alignSlmSize(uint32_t slmSize) const {
const uint32_t alignedSlmSizes[] = {
0u,
1u * MemoryConstants::kiloByte,
2u * MemoryConstants::kiloByte,
4u * MemoryConstants::kiloByte,
8u * MemoryConstants::kiloByte,
16u * MemoryConstants::kiloByte,
24u * MemoryConstants::kiloByte,
32u * MemoryConstants::kiloByte,
48u * MemoryConstants::kiloByte,
64u * MemoryConstants::kiloByte,
96u * MemoryConstants::kiloByte,
128u * MemoryConstants::kiloByte,
};
for (auto &alignedSlmSize : alignedSlmSizes) {
if (slmSize <= alignedSlmSize) {
return alignedSlmSize;
}
}
UNRECOVERABLE_IF(true);
return 0;
}
template <>
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto alignedSlmSize = alignSlmSize(slmSize);
if (alignedSlmSize == 0u) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
}
UNRECOVERABLE_IF(slmSize > 128u * MemoryConstants::kiloByte);
if (alignedSlmSize > 96u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K;
}
if (alignedSlmSize > 64u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K;
}
if (alignedSlmSize > 48u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K;
}
if (alignedSlmSize > 32u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K;
}
if (alignedSlmSize > 24u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K;
}
if (alignedSlmSize > 16u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K;
}
if (alignedSlmSize > 8u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K;
}
if (alignedSlmSize > 4u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K;
}
if (alignedSlmSize > 2u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K;
}
if (alignedSlmSize > 1u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K;
}
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K;
}
template <>
int32_t GfxCoreHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
return ThreadArbitrationPolicy::RoundRobinAfterDependency;

View File

@ -193,9 +193,7 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptor
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
const uint32_t workGroupCountPerDss = static_cast<uint32_t>(Math::divideAndRoundUp(threadsPerDssCount, threadsPerThreadGroup));
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
const uint32_t workgroupSlmSize = gfxCoreHelper.alignSlmSize(slmTotalSize);
const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);
uint32_t slmSize = 0u;

View File

@ -248,75 +248,6 @@ void GfxCoreHelperHw<Family>::setExtraAllocationData(AllocationData &allocationD
}
}
template <>
uint32_t GfxCoreHelperHw<Family>::alignSlmSize(uint32_t slmSize) const {
const uint32_t alignedSlmSizes[] = {
0u,
1u * MemoryConstants::kiloByte,
2u * MemoryConstants::kiloByte,
4u * MemoryConstants::kiloByte,
8u * MemoryConstants::kiloByte,
16u * MemoryConstants::kiloByte,
24u * MemoryConstants::kiloByte,
32u * MemoryConstants::kiloByte,
48u * MemoryConstants::kiloByte,
64u * MemoryConstants::kiloByte,
96u * MemoryConstants::kiloByte,
128u * MemoryConstants::kiloByte,
};
for (auto &alignedSlmSize : alignedSlmSizes) {
if (slmSize <= alignedSlmSize) {
return alignedSlmSize;
}
}
UNRECOVERABLE_IF(true);
return 0;
}
template <>
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
if (slmSize == 0u) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
}
UNRECOVERABLE_IF(slmSize > 128u * MemoryConstants::kiloByte);
if (slmSize > 96u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K;
}
if (slmSize > 64u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K;
}
if (slmSize > 48u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K;
}
if (slmSize > 32u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K;
}
if (slmSize > 24u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K;
}
if (slmSize > 16u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K;
}
if (slmSize > 8u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K;
}
if (slmSize > 4u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K;
}
if (slmSize > 2u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K;
}
if (slmSize > 1u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K;
}
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K;
}
template <>
int32_t GfxCoreHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
return ThreadArbitrationPolicy::RoundRobinAfterDependency;

View File

@ -34,8 +34,8 @@ void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptor
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
const uint32_t workGroupCountPerDss = threadsPerDssCount / threadsPerThreadGroup;
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
const uint32_t workgroupSlmSize = gfxCoreHelper.alignSlmSize(slmTotalSize);
const uint32_t workgroupSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmTotalSize);
uint32_t slmSize = 0u;
@ -195,6 +195,30 @@ void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32
}
}
template <>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
if (slmSize == 0u) {
return 0u;
}
slmSize = std::max(slmSize, 1024u);
slmSize = Math::nextPowerOfTwo(slmSize);
UNRECOVERABLE_IF(slmSize > 64u * MemoryConstants::kiloByte);
return slmSize;
}
template <>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto slmValue = std::max(slmSize, 1024u);
slmValue = Math::nextPowerOfTwo(slmValue);
slmValue = Math::getMinLsbSet(slmValue);
slmValue = slmValue - 9;
DEBUG_BREAK_IF(slmValue > 7);
slmValue *= !!slmSize;
return slmValue;
}
template <>
void adjustL3ControlField<Family>(void *l3ControlBuffer) {
using L3_CONTROL = typename Family::L3_CONTROL;

View File

@ -107,19 +107,6 @@ bool GfxCoreHelperHw<Family>::isBufferSizeSuitableForCompression(const size_t si
}
}
template <>
uint32_t GfxCoreHelperHw<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto slmValue = std::max(slmSize, 1024u);
slmValue = Math::nextPowerOfTwo(slmValue);
slmValue = Math::getMinLsbSet(slmValue);
slmValue = slmValue - 9;
DEBUG_BREAK_IF(slmValue > 7);
slmValue *= !!slmSize;
return slmValue;
}
template <>
bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const {
if (debugManager.flags.ExperimentalCopyThroughLock.get() != -1) {