diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 8470ea9e39..ff88b8e0ce 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -162,8 +162,8 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( EncodeDispatchKernel::setGrfInfo(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData, device.getRootDeviceEnvironment()); - EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, device.getRootDeviceEnvironment(), - threadsPerThreadGroup, slmTotalSize, SlmPolicy::slmPolicyNone); + EncodeDispatchKernel::setupPreferredSlmSize(&interfaceDescriptor, device.getRootDeviceEnvironment(), + threadsPerThreadGroup, slmTotalSize, SlmPolicy::slmPolicyNone); if constexpr (heaplessModeEnabled == false) { interfaceDescriptor.setBindingTablePointer(static_cast(bindingTablePointer)); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 2d746d2899..b630ec7d97 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -129,8 +129,8 @@ struct EncodeDispatchKernel { static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs); template - static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, - const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); + static void setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, + const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template static void encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index ba17913ac2..0efba9dbb3 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -411,7 +411,7 @@ inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const Roo template template -void EncodeDispatchKernel::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {} +void EncodeDispatchKernel::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {} template inline bool EncodeDispatchKernel::isDshNeeded(const DeviceInfo &deviceInfo) { diff --git a/shared/source/command_container/command_encoder_enablers.inl b/shared/source/command_container/command_encoder_enablers.inl index aaae61aaa7..f280f1ee6d 100644 --- a/shared/source/command_container/command_encoder_enablers.inl +++ b/shared/source/command_container/command_encoder_enablers.inl @@ -15,7 +15,7 @@ template void NEO::EncodeDispatchKernel::adjustTimestampPacket::setupPostSyncForRegularEvent(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::setupPostSyncForInOrderExec(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args); template void NEO::EncodeDispatchKernel::setGrfInfo(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t grfCount, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const RootDeviceEnvironment &rootDeviceEnvironment); -template void NEO::EncodeDispatchKernel::appendAdditionalIDDFields(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); +template void NEO::EncodeDispatchKernel::setupPreferredSlmSize(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy); template void NEO::EncodeDispatchKernel::adjustInterfaceDescriptorData(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t grfCount, Family::DefaultWalkerType &walkerCmd); template void NEO::EncodeDispatchKernel::setupPostSyncMocs(Family::DefaultWalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush); template void NEO::EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernelArgs &args); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index a69f2264e6..0b431cad32 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -397,9 +397,9 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis idd.getThreadGroupDispatchSize()); } - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, - args.dispatchInterface->getSlmTotalSize(), - args.dispatchInterface->getSlmPolicy()); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, + args.dispatchInterface->getSlmTotalSize(), + args.dispatchInterface->getSlmPolicy()); EncodeWalkerArgs walkerArgs{ args.isCooperative ? KernelExecutionType::concurrent : KernelExecutionType::defaultType, diff --git a/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp b/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp index a7eb35541a..b4456b41e8 100644 --- a/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp +++ b/shared/source/xe2_hpg_core/command_encoder_xe2_hpg_core.cpp @@ -255,7 +255,7 @@ void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDevice template <> template -void EncodeDispatchKernel::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { +void EncodeDispatchKernel::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE; auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index 01a38f85ec..ccb93ac938 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -187,7 +187,7 @@ void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDevice template <> template -void EncodeDispatchKernel::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { +void EncodeDispatchKernel::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE; auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount; diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index be7bf0ff02..fff6980a4a 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -29,7 +29,7 @@ namespace NEO { template <> template -void EncodeDispatchKernel::appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { +void EncodeDispatchKernel::setupPreferredSlmSize(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { using PREFERRED_SLM_ALLOCATION_SIZE = typename InterfaceDescriptorType::PREFERRED_SLM_ALLOCATION_SIZE; auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); const uint32_t threadsPerDssCount = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.DualSubSliceCount; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h index 5d63025262..e5e40e1593 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_dg2_and_later.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,11 +44,11 @@ void verifyPreferredSlmValues(std::vector> va ? valueToTest.preferredSlmAllocationSizePerDss : valueToTest.preferredSlmAllocationSizePerDss / localWorkGroupsPerDssCount; - NEO::EncodeDispatchKernel::appendAdditionalIDDFields(&idd, - rootDeviceEnvironment, - threadsPerThreadGroup, - slmTotalSize, - slmPolicy); + NEO::EncodeDispatchKernel::setupPreferredSlmSize(&idd, + rootDeviceEnvironment, + threadsPerThreadGroup, + slmTotalSize, + slmPolicy); EXPECT_EQ(valueToTest.expectedValueInIdd, idd.getPreferredSlmAllocationSize()); } diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp index 3cd788f6b4..b31c3db4d5 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_encode_dispatch_kernel_pvc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ PVCTEST_F(CommandEncodeStatesPvcTest, GivenSmallSlmTotalSizesWhenSetAdditionalIn hwInfo.platform.usDeviceID = deviceId; hwInfo.platform.usRevId = revisionToTest.revisionId; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsCount, slmTotalSize, SlmPolicy::slmPolicyNone); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsCount, slmTotalSize, SlmPolicy::slmPolicyNone); if (revisionToTest.isWaRequired) { EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_16K, idd.getPreferredSlmAllocationSize()); } else { diff --git a/shared/test/unit_test/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp index 4900d10b9e..68a3193761 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/test_encode_dispatch_kernel_dg2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -41,28 +41,28 @@ DG2TEST_F(CommandEncodeStatesDg2Test, givenNoWorkaroundNeededWhenSelectingPrefer const uint32_t threadsPerThreadGroup = 7; // 18 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_64K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 8; // 16 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 9; // 14 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 2 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } { const uint32_t threadsPerThreadGroup = 50; // 2 groups will fit in one DSS const uint32_t slmSizePerThreadGroup = 16 * MemoryConstants::kiloByte; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - EncodeDispatchKernel::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); + EncodeDispatchKernel::setupPreferredSlmSize(&idd, rootDeviceEnvironment, threadsPerThreadGroup, slmSizePerThreadGroup, SlmPolicy::slmPolicyLargeSlm); EXPECT_EQ(PREFERRED_SLM_ALLOCATION_SIZE::PREFERRED_SLM_ALLOCATION_SIZE_32K, idd.getPreferredSlmAllocationSize()); } }