Add support for new thread arbitration policies via zeKernelSchedulingHintExp

Related-To: LOCI-2319

Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
Vinod Tipparaju 2021-08-02 16:28:38 +05:30 committed by Compute-Runtime-Automation
parent c54152bbbc
commit 37670aeb91
7 changed files with 74 additions and 4 deletions

View File

@ -227,3 +227,10 @@ zeKernelSetCacheConfig(
ze_cache_config_flags_t flags) {
return L0::Kernel::fromHandle(hKernel)->setCacheConfig(flags);
}
ZE_APIEXPORT ze_result_t ZE_APICALL
zeKernelSchedulingHintExp(
ze_kernel_handle_t hKernel,
ze_scheduling_hint_exp_desc_t *pHint) {
return L0::Kernel::fromHandle(hKernel)->setSchedulingHintExp(pHint);
}

View File

@ -141,6 +141,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
NEO::Device *neoDevice = device->getNEODevice();
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::KernelNameTag>(
*commandContainer.getCommandStream(),

View File

@ -209,6 +209,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
uint32_t partitionCount = 0;
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer,
reinterpret_cast<const void *>(pThreadGroupDimensions),

View File

@ -142,6 +142,8 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
virtual NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() = 0;
virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0;
Kernel() = default;
Kernel(const Kernel &) = delete;
Kernel(Kernel &&) = delete;

View File

@ -721,11 +721,15 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
}
auto isaAllocation = this->kernelImmData->getIsaGraphicsAllocation();
auto neoDevice = module->getDevice()->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
this->schedulingHintExpFlag = hwHelper.getDefaultThreadArbitrationPolicy();
if (this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap != nullptr &&
isaAllocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL) {
auto neoDevice = module->getDevice()->getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *isaAllocation),
*neoDevice,
isaAllocation,
@ -803,7 +807,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
residencyContainer.resize(this->kernelArgHandlers.size(), nullptr);
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
auto neoDevice = module->getDevice()->getNEODevice();
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation();
this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation);
@ -949,4 +952,16 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
return getImmutableData()->getIsaGraphicsAllocation();
}
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
this->schedulingHintExpFlag = pHint->flags;
return ZE_RESULT_SUCCESS;
}
uint32_t KernelImp::getSchedulingHintExp() {
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
this->schedulingHintExpFlag = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
}
return this->schedulingHintExpFlag;
}
} // namespace L0

View File

@ -150,6 +150,9 @@ struct KernelImp : Kernel {
return privateMemoryGraphicsAllocation;
}
ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override;
uint32_t getSchedulingHintExp();
protected:
KernelImp() = default;
@ -203,6 +206,8 @@ struct KernelImp : Kernel {
ze_cache_config_flags_t cacheConfigFlags = 0u;
bool kernelHasIndirectAccess = true;
uint32_t schedulingHintExpFlag = 0u;
};
} // namespace L0

View File

@ -121,6 +121,43 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll
ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed());
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) {
createKernel();
ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t;
pHint->pNext = nullptr;
pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
kernel->setSchedulingHintExp(pHint);
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(commandList->threadArbitrationPolicy, pHint->flags);
delete (pHint);
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionAndOverrideThreadArbitrationPolicyThenTheLatterIsUsedToSetCmdListThreadArbitrationPolicy) {
createKernel();
ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t;
pHint->pNext = nullptr;
pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
kernel->setSchedulingHintExp(pHint);
DebugManagerStateRestore restorer;
DebugManager.flags.OverrideThreadArbitrationPolicy.set(0);
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(commandList->threadArbitrationPolicy, 0u);
delete (pHint);
}
HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
createKernel();