Refactor support for L0 scheduling hints (XE_HP and later)

Make sure STATE_COMPUTE_MODE is updated when passing the
scheduling hint for a kernel.

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga
2022-02-24 04:17:23 +00:00
committed by Compute-Runtime-Automation
parent 3b7fbef826
commit 06a4d2cc02
5 changed files with 42 additions and 11 deletions

View File

@ -74,7 +74,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
}
@ -2236,7 +2236,6 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &hwInfo = device->getHwInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
@ -2257,11 +2256,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
auto &neoDevice = *device->getNEODevice();
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy);
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
if (finalStreamState.stateComputeMode.isDirty()) {
clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy);
clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true);
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, hwInfo);
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false);

View File

@ -210,13 +210,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
}
}
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
if (kernelImp->getSchedulingHintExp() != std::numeric_limits<uint32_t>::max()) {
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
}
auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative;
updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative);
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, //eventAddress

View File

@ -754,7 +754,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &kernelDescriptor = kernelImmData->getDescriptor();
this->schedulingHintExpFlag = hwHelper.getDefaultThreadArbitrationPolicy();
UNRECOVERABLE_IF(!this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap);
if (isaAllocation->getAllocationType() == NEO::AllocationType::KERNEL_ISA_INTERNAL) {
@ -1003,7 +1002,14 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
}
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
this->schedulingHintExpFlag = pHint->flags;
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::AgeBased;
} else if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN) {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobin;
} else {
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
return ZE_RESULT_SUCCESS;
}

View File

@ -210,7 +210,7 @@ struct KernelImp : Kernel {
bool kernelHasIndirectAccess = true;
uint32_t schedulingHintExpFlag = 0u;
uint32_t schedulingHintExpFlag = std::numeric_limits<uint32_t>::max();
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
};

View File

@ -57,6 +57,30 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll
ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed());
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithOldestFirstThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::AgeBased);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobin);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
createKernel();
ze_scheduling_hint_exp_desc_t pHint{};
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN;
kernel->setSchedulingHintExp(&pHint);
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) {
createKernel();
ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t;
@ -70,7 +94,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(commandList->threadArbitrationPolicy, pHint->flags);
ASSERT_EQ(commandList->threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobin);
delete (pHint);
}