From 1eafbc30f8568f132a71d51f652d2c88ebbf09a9 Mon Sep 17 00:00:00 2001 From: "Vysochyn, Illia" Date: Tue, 25 Feb 2025 14:11:01 +0000 Subject: [PATCH] feature: Enable eu thread scheduling mode override via IDD Enables eu thread scheduling mode override via the INTERFACE_DESCRIPTOR_DATA and COMPUTE_WALKER. Defines encodeEuSchedulingPolicy operating on INTERFACE_DESCRIPTOR_DATA or Xe3 platform. Adds tests verifying that thread scheduling mode can be overridden via COMPUTE_WALKER. Related-To: NEO-13771, HSD-18041256338, HSD-14013056398 Signed-off-by: Vysochyn, Illia --- ...nd_encoder_from_xe_hpg_core_to_xe2_hpg.inl | 7 +++- ...d_encoder_from_xe_hpg_core_to_xe3_core.inl | 5 --- .../xe3_core/command_encoder_xe3_core.cpp | 30 +++++++++++++++ .../command_encoder_tests.cpp | 15 +++++++- .../command_encoder_xe3_core_tests.cpp | 37 +++++++++++++++++++ 5 files changed, 87 insertions(+), 7 deletions(-) diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe2_hpg.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe2_hpg.inl index 3f7c6f450d..8b3cd3211c 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe2_hpg.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe2_hpg.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,4 +22,9 @@ bool EncodeEnableRayTracing::is48bResourceNeededForRayTracing() { return true; } +template +template +void EncodeDispatchKernel::encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy) { +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl index 3e4e7dc1ac..9f7b310715 100644 --- a/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl +++ b/shared/source/command_container/command_encoder_from_xe_hpg_core_to_xe3_core.inl @@ -37,11 +37,6 @@ template void EncodeDispatchKernel::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr) { } -template -template -void EncodeDispatchKernel::encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy) { -} - template bool EncodeDispatchKernel::singleTileExecImplicitScalingRequired(bool cooperativeKernel) { return cooperativeKernel; diff --git a/shared/source/xe3_core/command_encoder_xe3_core.cpp b/shared/source/xe3_core/command_encoder_xe3_core.cpp index 1a2d06f534..ef34deb16b 100644 --- a/shared/source/xe3_core/command_encoder_xe3_core.cpp +++ b/shared/source/xe3_core/command_encoder_xe3_core.cpp @@ -18,6 +18,8 @@ #include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/constants.h" #include "shared/source/kernel/grf_config.h" +#include "shared/source/kernel/kernel_descriptor.h" +#include "shared/source/os_interface/product_helper.h" #include "shared/source/release_helper/release_helper.h" #include "shared/source/xe3_core/hw_cmds_base.h" @@ -128,6 +130,34 @@ void EncodeDispatchKernel::encodeAdditionalWalkerFields(const RootDevice } } +template +template +void EncodeDispatchKernel::encodeEuSchedulingPolicy(InterfaceDescriptorType *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy) { + using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; + if constexpr (std::is_same_v) { + + auto pipelinedThreadArbitrationPolicy = kernelDesc.kernelAttributes.threadArbitrationPolicy; + + if (pipelinedThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) { + pipelinedThreadArbitrationPolicy = static_cast(defaultPipelinedThreadArbitrationPolicy); + } + + switch (pipelinedThreadArbitrationPolicy) { + case ThreadArbitrationPolicy::RoundRobin: + pInterfaceDescriptor->setEuThreadSchedulingModeOverride(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN); + break; + case ThreadArbitrationPolicy::AgeBased: + pInterfaceDescriptor->setEuThreadSchedulingModeOverride(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST); + break; + case ThreadArbitrationPolicy::RoundRobinAfterDependency: + pInterfaceDescriptor->setEuThreadSchedulingModeOverride(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN); + break; + default: + pInterfaceDescriptor->setEuThreadSchedulingModeOverride(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT); + } + } +} + template <> bool EncodeEnableRayTracing::is48bResourceNeededForRayTracing() { if (debugManager.flags.Enable64bAddressingForRayTracing.get() != -1) { diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index c7db1b97c8..0981bddd76 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -893,7 +893,20 @@ HWTEST2_F(CommandEncoderTests, whenAskingForImplicitScalingValuesThenAlwaysRetur EXPECT_FALSE(ImplicitScalingDispatch::platformSupportsImplicitScaling(*rootExecEnv)); } -HWTEST2_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPolicyIsCalledThenNothingIsChanged, HeapfulSupportedMatch) { +HWTEST2_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPolicyIsCalledThenChanged, IsAtLeastXe3Core) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + + INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; + + KernelDescriptor kernelDescriptor; + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; + int32_t defaultPipelinedThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + + EXPECT_EQ(idd.getEuThreadSchedulingModeOverride(), INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST); +} + +HWTEST2_F(CommandEncoderTests, givenInterfaceDescriptorWhenEncodeEuSchedulingPolicyIsCalledThenNothingIsChanged, IsAtMostXe2HpgCore) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; diff --git a/shared/test/unit_test/xe3_core/command_encoder_xe3_core_tests.cpp b/shared/test/unit_test/xe3_core/command_encoder_xe3_core_tests.cpp index cfb7731f67..9a3bce6997 100644 --- a/shared/test/unit_test/xe3_core/command_encoder_xe3_core_tests.cpp +++ b/shared/test/unit_test/xe3_core/command_encoder_xe3_core_tests.cpp @@ -195,3 +195,40 @@ XE3_CORETEST_F(CommandEncodeStatesXe3Test, givenHeapSharingEnabledWhenRetrieving itorCmd = find<_3DSTATE_BINDING_TABLE_POOL_ALLOC *>(commands.begin(), commands.end()); EXPECT_EQ(commands.end(), itorCmd); } + +XE3_CORETEST_F(Xe3CoreCommandEncoderTest, givenPipelinedEuThreadArbitrationPolicyWhenEncodeEuSchedulingPolicyIsCalledThenIddContainsCorrectEuSchedulingPolicy) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + + INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; + KernelDescriptor kernelDescriptor; + int32_t defaultPipelinedThreadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + + { + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, idd.getEuThreadSchedulingModeOverride()); + } + + defaultPipelinedThreadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; + + { + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, idd.getEuThreadSchedulingModeOverride()); + } + { + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, idd.getEuThreadSchedulingModeOverride()); + } + { + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, idd.getEuThreadSchedulingModeOverride()); + } + { + kernelDescriptor.kernelAttributes.threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency; + EncodeDispatchKernel::encodeEuSchedulingPolicy(&idd, kernelDescriptor, defaultPipelinedThreadArbitrationPolicy); + EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, idd.getEuThreadSchedulingModeOverride()); + } +}