diff --git a/level_zero/api/core/ze_module.cpp b/level_zero/api/core/ze_module.cpp index 01a09878c4..95ca1e9af2 100644 --- a/level_zero/api/core/ze_module.cpp +++ b/level_zero/api/core/ze_module.cpp @@ -227,3 +227,10 @@ zeKernelSetCacheConfig( ze_cache_config_flags_t flags) { return L0::Kernel::fromHandle(hKernel)->setCacheConfig(flags); } + +ZE_APIEXPORT ze_result_t ZE_APICALL +zeKernelSchedulingHintExp( + ze_kernel_handle_t hKernel, + ze_scheduling_hint_exp_desc_t *pHint) { + return L0::Kernel::fromHandle(hKernel)->setSchedulingHintExp(pHint); +} diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 966101b184..e296157314 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -141,6 +141,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z NEO::Device *neoDevice = device->getNEODevice(); + this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp(); + if (NEO::DebugManager.flags.EnableSWTags.get()) { neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( *commandContainer.getCommandStream(), diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl index c10f566b82..8edea52137 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl @@ -209,6 +209,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); + this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp(); + uint32_t partitionCount = 0; NEO::EncodeDispatchKernel::encode(commandContainer, reinterpret_cast(pThreadGroupDimensions), diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index a83003a503..0bbdd97f99 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -142,6 +142,8 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { virtual NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() = 0; + virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0; + Kernel() = default; Kernel(const Kernel &) = delete; Kernel(Kernel &&) = delete; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index cb6b7263b9..06eced7170 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -721,11 +721,15 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { } auto isaAllocation = this->kernelImmData->getIsaGraphicsAllocation(); + + auto neoDevice = module->getDevice()->getNEODevice(); + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + this->schedulingHintExpFlag = hwHelper.getDefaultThreadArbitrationPolicy(); + if (this->kernelImmData->getKernelInfo()->heapInfo.pKernelHeap != nullptr && isaAllocation->getAllocationType() == NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL) { - auto neoDevice = module->getDevice()->getNEODevice(); - auto &hwInfo = neoDevice->getHardwareInfo(); - auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *isaAllocation), *neoDevice, isaAllocation, @@ -803,7 +807,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { residencyContainer.resize(this->kernelArgHandlers.size(), nullptr); auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; - auto neoDevice = module->getDevice()->getNEODevice(); if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) { this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation(); this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation); @@ -949,4 +952,16 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const { return getImmutableData()->getIsaGraphicsAllocation(); } +ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) { + this->schedulingHintExpFlag = pHint->flags; + return ZE_RESULT_SUCCESS; +} + +uint32_t KernelImp::getSchedulingHintExp() { + if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { + this->schedulingHintExpFlag = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); + } + return this->schedulingHintExpFlag; +} + } // namespace L0 diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index e7255b0478..79ffb53444 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -150,6 +150,9 @@ struct KernelImp : Kernel { return privateMemoryGraphicsAllocation; } + ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override; + uint32_t getSchedulingHintExp(); + protected: KernelImp() = default; @@ -203,6 +206,8 @@ struct KernelImp : Kernel { ze_cache_config_flags_t cacheConfigFlags = 0u; bool kernelHasIndirectAccess = true; + + uint32_t schedulingHintExpFlag = 0u; }; } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index 9b335c1f47..3b4bdbb165 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -121,6 +121,43 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithIndirectAllocationsNotAll ASSERT_FALSE(commandList->hasIndirectAllocationsAllowed()); } +HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) { + createKernel(); + ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; + pHint->pNext = nullptr; + pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; + kernel->setSchedulingHintExp(pHint); + + ze_group_count_t groupCount{1, 1, 1}; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); + + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_EQ(commandList->threadArbitrationPolicy, pHint->flags); + delete (pHint); +} + +HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionAndOverrideThreadArbitrationPolicyThenTheLatterIsUsedToSetCmdListThreadArbitrationPolicy) { + createKernel(); + ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; + pHint->pNext = nullptr; + pHint->flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN; + kernel->setSchedulingHintExp(pHint); + + DebugManagerStateRestore restorer; + DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); + + ze_group_count_t groupCount{1, 1, 1}; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); + + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_EQ(commandList->threadArbitrationPolicy, 0u); + delete (pHint); +} + HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelThenBbEndIsAddedAndNewCmdBufferAllocated) { using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; createKernel();