From 06a4d2cc02ef4ba3b7a2f4e16584baadb996c30c Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 24 Feb 2022 04:17:23 +0000 Subject: [PATCH] Refactor support for L0 scheduling hints (XE_HP and later) Make sure STATE_COMPUTE_MODE is updated when passing the scheduling hint for a kernel. Signed-off-by: Jaime Arteaga --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 8 +++--- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 7 +++-- level_zero/core/source/kernel/kernel_imp.cpp | 10 +++++-- level_zero/core/source/kernel/kernel_imp.h | 2 +- .../test_cmdlist_append_launch_kernel_1.cpp | 26 ++++++++++++++++++- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index ee509be56e..a2eaa6ff90 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -74,7 +74,7 @@ template void CommandListCoreFamily::programThreadArbitrationPolicy(Device *device) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily); - auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); + threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } @@ -2236,7 +2236,6 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; auto &hwInfo = device->getHwInfo(); - auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); @@ -2257,11 +2256,10 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; auto &neoDevice = *device->getNEODevice(); - auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); - finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); + finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy); if (finalStreamState.stateComputeMode.isDirty()) { - clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); + clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy); NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true); NEO::EncodeComputeMode::programComputeModeCommand(*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, hwInfo); NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 59458b434b..9574bbd5fc 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -210,13 +210,16 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z } } + KernelImp *kernelImp = static_cast(kernel); + if (kernelImp->getSchedulingHintExp() != std::numeric_limits::max()) { + this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp(); + } + auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative; updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative); - KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); - this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp(); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ eventAddress, //eventAddress diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index fbd16b7f8d..556ef250bf 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -754,7 +754,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &kernelDescriptor = kernelImmData->getDescriptor(); - this->schedulingHintExpFlag = hwHelper.getDefaultThreadArbitrationPolicy(); UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap); if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) { @@ -1003,7 +1002,14 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const { } ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) { - this->schedulingHintExpFlag = pHint->flags; + + if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) { + this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::AgeBased; + } else if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN) { + this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobin; + } else { + this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency; + } return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 1d14e676f1..2b33ba0c5f 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -210,7 +210,7 @@ struct KernelImp : Kernel { bool kernelHasIndirectAccess = true; - uint32_t schedulingHintExpFlag = 0u; + uint32_t schedulingHintExpFlag = std::numeric_limits::max(); std::unique_ptr pImplicitArgs; }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 32dabd5db9..f6b719241e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -57,6 +57,30 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed()); } +HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithOldestFirstThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { + createKernel(); + ze_scheduling_hint_exp_desc_t pHint{}; + pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST; + kernel->setSchedulingHintExp(&pHint); + ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::AgeBased); +} + +HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { + createKernel(); + ze_scheduling_hint_exp_desc_t pHint{}; + pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; + kernel->setSchedulingHintExp(&pHint); + ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobin); +} + +HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) { + createKernel(); + ze_scheduling_hint_exp_desc_t pHint{}; + pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN; + kernel->setSchedulingHintExp(&pHint); + ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency); +} + HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) { createKernel(); ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; @@ -70,7 +94,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - ASSERT_EQ(commandList->threadArbitrationPolicy, pHint->flags); + ASSERT_EQ(commandList->threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobin); delete (pHint); }