refactor: unify programming of preferred slm size 1/n
- rename function to reflect actual task function does Related-To: NEO-12639 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
e5658e5327
commit
49371cb13e
|
@ -162,8 +162,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
|||
EncodeDispatchKernel<GfxFamily>::setGrfInfo(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired,
|
||||
sizeCrossThreadData, sizePerThreadData, device.getRootDeviceEnvironment());
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, device.getRootDeviceEnvironment(),
|
||||
threadsPerThreadGroup, slmTotalSize, SlmPolicy::slmPolicyNone);
|
||||
EncodeDispatchKernel<GfxFamily>::setupPreferredSlmSize(&interfaceDescriptor, device.getRootDeviceEnvironment(),
|
||||
threadsPerThreadGroup, slmTotalSize, SlmPolicy::slmPolicyNone);
|
||||
|
||||
if constexpr (heaplessModeEnabled == false) {
|
||||
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
|
||||
|
|
|
@ -129,8 +129,8 @@ struct EncodeDispatchKernel {
|
|||
static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
|
||||
|
|
|
@ -411,7 +411,7 @@ inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const Roo
|
|||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
template <typename Family>
|
||||
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
|
||||
|
|
|
@ -15,7 +15,7 @@ template void NEO::EncodeDispatchKernel<Family>::adjustTimestampPacket<Family::D
|
|||
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setGrfInfo<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template void NEO::EncodeDispatchKernel<Family>::appendAdditionalIDDFields<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPreferredSlmSize<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
template void NEO::EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t grfCount, Family::DefaultWalkerType &walkerCmd);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncMocs<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerType>(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||
|
|
|
@ -397,9 +397,9 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
idd.getThreadGroupDispatchSize());
|
||||
}
|
||||
|
||||
EncodeDispatchKernel<Family>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup,
|
||||
args.dispatchInterface->getSlmTotalSize(),
|
||||
args.dispatchInterface->getSlmPolicy());
|
||||
EncodeDispatchKernel<Family>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup,
|
||||
args.dispatchInterface->getSlmTotalSize(),
|
||||
args.dispatchInterface->getSlmPolicy());
|
||||
|
||||
EncodeWalkerArgs walkerArgs{
|
||||
args.isCooperative ? KernelExecutionType::concurrent : KernelExecutionType::defaultType,
|
||||
|
|
|
@ -255,7 +255,7 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
|
|||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
|
|
|
@ -187,7 +187,7 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
|
|||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace NEO {
|
|||
|
||||
template <>
|
||||
template <typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
void EncodeDispatchKernel<Family>::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE;
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -44,11 +44,11 @@ void verifyPreferredSlmValues(std::vector<PreferredSlmTestValues<FamilyType>> va
|
|||
? valueToTest.preferredSlmAllocationSizePerDss
|
||||
: valueToTest.preferredSlmAllocationSizePerDss / localWorkGroupsPerDssCount;
|
||||
|
||||
NEO::EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd,
|
||||
rootDeviceEnvironment,
|
||||
threadsPerThreadGroup,
|
||||
slmTotalSize,
|
||||
slmPolicy);
|
||||
NEO::EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd,
|
||||
rootDeviceEnvironment,
|
||||
threadsPerThreadGroup,
|
||||
slmTotalSize,
|
||||
slmPolicy);
|
||||
|
||||
EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSize());
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -50,7 +50,7 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn
|
|||
hwInfo.platform.usDeviceID = deviceId;
|
||||
hwInfo.platform.usRevId = revisionToTest.revisionId;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsCount, slmTotalSize, SlmPolicy::slmPolicyNone);
|
||||
EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsCount, slmTotalSize, SlmPolicy::slmPolicyNone);
|
||||
if (revisionToTest.isWaRequired) {
|
||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K, idd.getPreferredSlmAllocationSize());
|
||||
} else {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -41,28 +41,28 @@ DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPrefer
|
|||
const uint32_t threadsPerThreadGroup = 7; // 18 groups will fit in one DSS
|
||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K, idd.getPreferredSlmAllocationSize());
|
||||
}
|
||||
{
|
||||
const uint32_t threadsPerThreadGroup = 8; // 16 groups will fit in one DSS
|
||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||
}
|
||||
{
|
||||
const uint32_t threadsPerThreadGroup = 9; // 14 groups will fit in one DSS
|
||||
const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||
}
|
||||
{
|
||||
const uint32_t threadsPerThreadGroup = 50; // 2 groups will fit in one DSS
|
||||
const uint32_t slmSizePerThreadGroup = 16 * MemoryConstants::kiloByte;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
EncodeDispatchKernel<FamilyType>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EncodeDispatchKernel<FamilyType>::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm);
|
||||
EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue