Refactor support for L0 scheduling hints (XE_HP and later)

Make sure STATE_COMPUTE_MODE is updated when passing the
scheduling hint for a kernel.

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga
2022-02-24 04:17:23 +00:00
committed by Compute-Runtime-Automation
parent 3b7fbef826
commit 06a4d2cc02
5 changed files with 42 additions and 11 deletions

View File

@@ -74,7 +74,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) { void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily); auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
} }
@@ -2236,7 +2236,6 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &hwInfo = device->getHwInfo(); auto &hwInfo = device->getHwInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
@@ -2257,11 +2256,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
auto &neoDevice = *device->getNEODevice(); auto &neoDevice = *device->getNEODevice();
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy);
if (finalStreamState.stateComputeMode.isDirty()) { if (finalStreamState.stateComputeMode.isDirty()) {
clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true); NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true);
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, hwInfo); NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, hwInfo);
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false); NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false);

View File

@@ -210,13 +210,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
} }
} }
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
if (kernelImp->getSchedulingHintExp() != std::numeric_limits<uint32_t>::max()) {
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
}
auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative; auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative;
updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative); updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative);
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, //eventAddress eventAddress, //eventAddress

View File

@@ -754,7 +754,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &kernelDescriptor = kernelImmData->getDescriptor(); auto &kernelDescriptor = kernelImmData->getDescriptor();
this->schedulingHintExpFlag = hwHelper.getDefaultThreadArbitrationPolicy();
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap); UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) { if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
@@ -1003,7 +1002,14 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
} }
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) { ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
this->schedulingHintExpFlag = pHint->flags;
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::AgeBased;
} else if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN) {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobin;
} else {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }

View File

@@ -210,7 +210,7 @@ struct KernelImp : Kernel {
bool kernelHasIndirectAccess = true; bool kernelHasIndirectAccess = true;
uint32_t schedulingHintExpFlag = 0u; uint32_t schedulingHintExpFlag = std::numeric_limits<uint32_t>::max();
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs; std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
}; };

View File

@@ -57,6 +57,30 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll
ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed()); ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed());
} }
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithOldestFirstThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::AgeBased);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobin);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) { HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) {
createKernel(); createKernel();
ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t;
@@ -70,7 +94,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(commandList->threadArbitrationPolicy, pHint->flags); ASSERT_EQ(commandList->threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobin);
delete (pHint); delete (pHint);
} }