diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index bf99627e7f..2ba4801698 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -152,7 +152,6 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { virtual NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() = 0; virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0; - virtual int32_t getSchedulingHintExp() = 0; Kernel() = default; Kernel(const Kernel &) = delete; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 7cdfa266aa..1f60cbb212 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1056,7 +1056,7 @@ ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint return ZE_RESULT_SUCCESS; } -int32_t KernelImp::getSchedulingHintExp() { +int32_t KernelImp::getSchedulingHintExp() const { return this->schedulingHintExpFlag; } } // namespace L0 diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 806d259fb2..efdc2b1e83 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -164,7 +164,7 @@ struct KernelImp : Kernel { } ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override; - int32_t getSchedulingHintExp() override; + int32_t getSchedulingHintExp() const override; NEO::ImplicitArgs *getImplicitArgs() const override { return pImplicitArgs.get(); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 5831a7eb07..e87b8e9ce9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -70,12 +70,13 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo); bool expectedIsCoherencyRequired = hwHelper.forceNonGpuCoherencyWA(true); int expectedLargeGrfMode = hwInfoConfig.isGrfNumReportedWithScm() ? 1 : -1; + int expectedThreadArbitrationPolicy = hwInfoConfig.isThreadArbitrationPolicyReportedWithScm() ? NEO::ThreadArbitrationPolicy::RoundRobin : -1; EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.disableEUFusion.value); EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedIsCoherencyRequired, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value); EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); - EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); + EXPECT_EQ(expectedThreadArbitrationPolicy, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); commandListImmediate.requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value = 0; commandListImmediate.requiredStreamState.frontEndState.disableEUFusion.value = 0; @@ -86,12 +87,13 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus commandListImmediate.executeCommandListImmediateWithFlushTask(false); expectedLargeGrfMode = hwInfoConfig.isGrfNumReportedWithScm() ? 0 : -1; + expectedThreadArbitrationPolicy = hwInfoConfig.isThreadArbitrationPolicyReportedWithScm() ? NEO::ThreadArbitrationPolicy::AgeBased : -1; EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableEUFusion.value); EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableOverdispatch.value); EXPECT_EQ(0, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value); EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value); - EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); + EXPECT_EQ(expectedThreadArbitrationPolicy, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value); } HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenContainsAnyKernelFlagIsReset, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 80f2a2f705..89300022e3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -84,6 +84,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationP } HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + createKernel(); ze_scheduling_hint_exp_desc_t *pHint = new ze_scheduling_hint_exp_desc_t; pHint->pNext = nullptr; @@ -109,6 +112,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe DebugManagerStateRestore restorer; DebugManager.flags.OverrideThreadArbitrationPolicy.set(0); + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index fcf4ebaca6..46831ef5c9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1240,6 +1240,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernel HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsAtLeastSkl) { DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); auto defaultThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index a97b79e7c3..f0f6e6db16 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -141,6 +141,10 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( EncodeDispatchKernel::setGrfInfo(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData, hardwareInfo); + auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); + hwInfoConfig.updateIddCommand(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired, + kernel.getThreadArbitrationPolicy()); + EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 5448c9f04f..c2e3dba808 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -592,6 +592,7 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenClSetKernelExecInfoAlreadySe } DebugManagerStateRestore stateRestore; DebugManager.flags.AUBDumpSubCaptureMode.set(static_cast(AubSubCaptureManager::SubCaptureMode::Filter)); + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); MockKernelWithInternals kernelInternals(*pClDevice, context); Kernel *kernel = kernelInternals.mockKernel; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 0ad14ca7d6..6b38f58b1d 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -729,6 +729,9 @@ HWTEST_F(EnqueueKernelTests, whenEnqueueingKernelThenCsrCorrectlySetsRequiredThr using CommandStreamReceiverHw::streamProperties; }; + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + cl_uint workDim = 1; size_t globalWorkOffset[3] = {0, 0, 0}; size_t globalWorkSize[3] = {1, 1, 1}; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 71d72e7605..2651db55d2 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -1302,6 +1302,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlush } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDispatchFlagsWhenCallFlushTaskThenThreadArbitrationPolicyIsSetProperly) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 62be0e6a8a..7c3fe2518e 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -405,6 +405,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDefaultCommandStreamReceiverThenRoundRobinPolicyIsSelected) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + auto pCommandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(pCommandStreamReceiver); EXPECT_EQ(ThreadArbitrationPolicy::NotPresent, pCommandStreamReceiver->peekThreadArbitrationPolicy()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 7e9a662b39..0aa50e35b1 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -708,6 +708,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhen } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnqueueWithoutArbitrationPolicyWhenPolicyIsAlreadyProgrammedThenReuse) { + DebugManagerStateRestore restorer; + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto &csrThreadArbitrationPolicy = commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value; diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 16ff91e1d8..04a7f2e106 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -39,9 +39,6 @@ template void EncodeDispatchKernel::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, const HardwareInfo &hwInfo) { - - auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); - hwInfoConfig.updateIddCommand(pInterfaceDescriptor, numGrf); } template @@ -79,6 +76,10 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDispatchKernel::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData, hwInfo); + auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); + hwInfoConfig.updateIddCommand(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, + args.dispatchInterface->getSchedulingHintExp()); + bool localIdsGenerationByRuntime = args.dispatchInterface->requiresGenerationOfLocalIdsByRuntime(); auto requiredWorkgroupOrder = args.dispatchInterface->getRequiredWorkgroupOrder(); bool inlineDataProgramming = EncodeDispatchKernel::inlineDataProgrammingRequired(kernelDescriptor); diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl index 65fbf96393..0851f20415 100644 --- a/shared/source/command_stream/definitions/stream_properties.inl +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -24,7 +24,7 @@ struct StateComputeModeProperties { void clearIsDirty(); bool isDirtyExtra() const; - void setPropertiesExtra(bool reportNumGrf); + void setPropertiesExtra(bool reportNumGrf, bool reportThreadArbitrationPolicy); void setPropertiesExtra(const StateComputeModeProperties &properties); void clearIsDirtyExtra(); }; diff --git a/shared/source/command_stream/stream_properties.cpp b/shared/source/command_stream/stream_properties.cpp index 63f4f5f771..e8140ba4c6 100644 --- a/shared/source/command_stream/stream_properties.cpp +++ b/shared/source/command_stream/stream_properties.cpp @@ -51,9 +51,12 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get(); } - this->threadArbitrationPolicy.set(threadArbitrationPolicy); + bool reportThreadArbitrationPolicy = hwInfoConfig.isThreadArbitrationPolicyReportedWithScm(); + if (reportThreadArbitrationPolicy) { + this->threadArbitrationPolicy.set(threadArbitrationPolicy); + } - setPropertiesExtra(reportNumGrf); + setPropertiesExtra(reportNumGrf, reportThreadArbitrationPolicy); } void StateComputeModeProperties::setProperties(const StateComputeModeProperties &properties) { diff --git a/shared/source/command_stream/stream_properties_extra.cpp b/shared/source/command_stream/stream_properties_extra.cpp index 3cb4d37bb0..73ca857e2c 100644 --- a/shared/source/command_stream/stream_properties_extra.cpp +++ b/shared/source/command_stream/stream_properties_extra.cpp @@ -9,7 +9,7 @@ using namespace NEO; -void StateComputeModeProperties::setPropertiesExtra(bool reportNumGrf) { +void StateComputeModeProperties::setPropertiesExtra(bool reportNumGrf, bool reportThreadArbitrationPolicy) { } void StateComputeModeProperties::setPropertiesExtra(const StateComputeModeProperties &properties) { } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index c164cd9693..bc5cc0790f 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -161,6 +161,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideL1CacheControlInSurfaceState, -1, "-1: f DECLARE_DEBUG_VARIABLE(int32_t, OverrideL1CacheControlInSurfaceStateForScratchSpace, -1, "-1: feature inactive, >=0 : following L1 cache control value will be programmed in render surface state for scratch space") DECLARE_DEBUG_VARIABLE(int32_t, OverridePreferredSlmAllocationSizePerDss, -1, "-1: default, >=0: program value for preferred SLM allocation size per dss") DECLARE_DEBUG_VARIABLE(int32_t, ForceGrfNumProgrammingWithScm, -1, "-1: default, 0: do not program grf num with SCM, 1: program grf num with SCM") +DECLARE_DEBUG_VARIABLE(int32_t, ForceThreadArbitrationPolicyProgrammingWithScm, -1, "-1: default, 0: do not program thread arbitration policy with SCM, 1: program thread arbitration policy with SCM") DECLARE_DEBUG_VARIABLE(int32_t, ForceL3PrefetchForComputeWalker, -1, "-1: default, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, ForceZPassAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE") DECLARE_DEBUG_VARIABLE(int32_t, ForcePixelAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE") diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index 1600a16e48..ab50901e93 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -5172,7 +5172,8 @@ typedef struct tagINTERFACE_DESCRIPTOR_DATA { // DWORD 1 uint64_t Reserved_32 : BITFIELD_RANGE(32, 63); // DWORD 2 - uint32_t Reserved_64 : BITFIELD_RANGE(0, 6); + uint32_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint32_t Reserved_66 : BITFIELD_RANGE(2, 6); uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7); uint32_t Reserved_72 : BITFIELD_RANGE(8, 10); uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11); diff --git a/shared/source/kernel/dispatch_kernel_encoder_interface.h b/shared/source/kernel/dispatch_kernel_encoder_interface.h index 79bf50fc89..c9eb4f3896 100644 --- a/shared/source/kernel/dispatch_kernel_encoder_interface.h +++ b/shared/source/kernel/dispatch_kernel_encoder_interface.h @@ -46,6 +46,8 @@ struct DispatchKernelEncoderI { virtual uint32_t getRequiredWorkgroupOrder() const = 0; virtual bool requiresGenerationOfLocalIdsByRuntime() const = 0; + virtual int32_t getSchedulingHintExp() const = 0; + virtual ImplicitArgs *getImplicitArgs() const = 0; }; } // namespace NEO diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index b9b619047f..5b92e10d4c 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -53,7 +53,7 @@ class HwInfoConfig { virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const = 0; virtual void setForceNonCoherent(void *const commandPtr, const StateComputeModeProperties &properties) = 0; virtual void updateScmCommand(void *const commandPtr, const StateComputeModeProperties &properties) = 0; - virtual void updateIddCommand(void *const commandPtr, uint32_t numGrf) = 0; + virtual void updateIddCommand(void *const commandPtr, uint32_t numGrf, int32_t threadArbitrationPolicy) = 0; virtual bool obtainBlitterPreference(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitterFullySupported(const HardwareInfo &hwInfo) const = 0; virtual bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const = 0; @@ -104,6 +104,7 @@ class HwInfoConfig { virtual bool useChannelRedForUnusedShaderChannels() const = 0; virtual bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const = 0; virtual bool isGrfNumReportedWithScm() const = 0; + virtual bool isThreadArbitrationPolicyReportedWithScm() const = 0; virtual bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isTimestampWaitSupportedForEvents() const = 0; virtual bool isTilePlacementResourceWaRequired(const HardwareInfo &hwInfo) const = 0; @@ -146,7 +147,7 @@ class HwInfoConfigHw : public HwInfoConfig { uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override; void setForceNonCoherent(void *const commandPtr, const StateComputeModeProperties &properties) override; void updateScmCommand(void *const commandPtr, const StateComputeModeProperties &properties) override; - void updateIddCommand(void *const commandPtr, uint32_t numGrf) override; + void updateIddCommand(void *const commandPtr, uint32_t numGrf, int32_t threadArbitrationPolicy) override; bool obtainBlitterPreference(const HardwareInfo &hwInfo) const override; bool isBlitterFullySupported(const HardwareInfo &hwInfo) const override; bool isPageTableManagerSupported(const HardwareInfo &hwInfo) const override; @@ -197,6 +198,7 @@ class HwInfoConfigHw : public HwInfoConfig { bool useChannelRedForUnusedShaderChannels() const override; bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const override; bool isGrfNumReportedWithScm() const override; + bool isThreadArbitrationPolicyReportedWithScm() const override; bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override; bool isTimestampWaitSupportedForEvents() const override; bool isTilePlacementResourceWaRequired(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 53d53044e3..a5bf05acee 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -120,7 +120,7 @@ template void HwInfoConfigHw::updateScmCommand(void *const commandPtr, const StateComputeModeProperties &properties) {} template -void HwInfoConfigHw::updateIddCommand(void *const commandPtr, uint32_t numGrf) {} +void HwInfoConfigHw::updateIddCommand(void *const commandPtr, uint32_t numGrf, int32_t threadArbitrationPolicy) {} template bool HwInfoConfigHw::isPageTableManagerSupported(const HardwareInfo &hwInfo) const { @@ -359,6 +359,14 @@ bool HwInfoConfigHw::isGrfNumReportedWithScm() const { return true; } +template +bool HwInfoConfigHw::isThreadArbitrationPolicyReportedWithScm() const { + if (DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.get() != -1) { + return DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.get(); + } + return true; +} + template bool HwInfoConfigHw::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const { return false; diff --git a/shared/test/common/mocks/mock_dispatch_kernel_encoder_interface.h b/shared/test/common/mocks/mock_dispatch_kernel_encoder_interface.h index c5c391ab94..10c37fc636 100644 --- a/shared/test/common/mocks/mock_dispatch_kernel_encoder_interface.h +++ b/shared/test/common/mocks/mock_dispatch_kernel_encoder_interface.h @@ -52,6 +52,7 @@ struct MockDispatchKernelEncoder : public DispatchKernelEncoderI { ADDMETHOD_CONST_NOBASE(getIsaAllocation, GraphicsAllocation *, &mockAllocation, ()); ADDMETHOD_CONST_NOBASE(getDynamicStateHeapData, const uint8_t *, nullptr, ()); ADDMETHOD_CONST_NOBASE(requiresGenerationOfLocalIdsByRuntime, bool, true, ()); + ADDMETHOD_CONST_NOBASE(getSchedulingHintExp, int32_t, ThreadArbitrationPolicy::NotPresent, ()); ADDMETHOD_CONST_NOBASE(getSlmPolicy, SlmPolicy, SlmPolicy::SlmPolicyNone, ()); }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_hw_info_config.cpp b/shared/test/common/mocks/mock_hw_info_config.cpp index 8349c22097..30165234c9 100644 --- a/shared/test/common/mocks/mock_hw_info_config.cpp +++ b/shared/test/common/mocks/mock_hw_info_config.cpp @@ -353,7 +353,7 @@ void HwInfoConfigHw::updateScmCommand(void *const commandPtr, cons } template <> -void HwInfoConfigHw::updateIddCommand(void *const commandPtr, uint32_t numGrf) { +void HwInfoConfigHw::updateIddCommand(void *const commandPtr, uint32_t numGrf, int32_t threadArbitrationPolicy) { } template <> diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 4fea9c7336..6843e0b7d8 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -353,6 +353,7 @@ OverrideL1CacheControlInSurfaceState = -1 OverrideL1CacheControlInSurfaceStateForScratchSpace = -1 OverridePreferredSlmAllocationSizePerDss = -1 ForceGrfNumProgrammingWithScm = -1 +ForceThreadArbitrationPolicyProgrammingWithScm = -1 ForceL3PrefetchForComputeWalker = -1 ForceZPassAsyncComputeThreadLimit = -1 ForcePixelAsyncComputeThreadLimit = -1 diff --git a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp index f6b764a569..e480532aa5 100644 --- a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp +++ b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp @@ -62,6 +62,7 @@ TEST(StreamPropertiesTests, whenSettingCooperativeKernelPropertiesThenCorrectVal TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValuesAreSet) { DebugManagerStateRestore restorer; DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1); + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); int32_t threadArbitrationPolicyValues[] = { ThreadArbitrationPolicy::AgeBased, ThreadArbitrationPolicy::RoundRobin, diff --git a/shared/test/unit_test/helpers/test_hw_info_config.cpp b/shared/test/unit_test/helpers/test_hw_info_config.cpp index 6af2a67684..55d240adf8 100644 --- a/shared/test/unit_test/helpers/test_hw_info_config.cpp +++ b/shared/test/unit_test/helpers/test_hw_info_config.cpp @@ -70,6 +70,22 @@ HWTEST_F(HwInfoConfigTest, givenForceGrfNumProgrammingWithScmFlagSetWhenIsGrfNum EXPECT_TRUE(hwInfoConfig.isGrfNumReportedWithScm()); } +HWTEST_F(HwInfoConfigTest, whenIsThreadArbitrationPolicyReportedWithScmIsQueriedThenTrueIsReturned) { + const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + EXPECT_TRUE(hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()); +} + +HWTEST_F(HwInfoConfigTest, givenForceThreadArbitrationPolicyProgrammingWithScmFlagSetWhenIsThreadArbitrationPolicyReportedWithScmIsQueriedThenCorrectValueIsReturned) { + DebugManagerStateRestore restorer; + const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(0); + EXPECT_FALSE(hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()); + + DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); + EXPECT_TRUE(hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()); +} + HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenIsImplicitScalingSupportedThenExpectFalse, isNotXeHpOrXeHpcCore) { const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); EXPECT_FALSE(hwInfoConfig.isImplicitScalingSupported(*defaultHwInfo)); diff --git a/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp index fff24e0209..5d5ce4def9 100644 --- a/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/compute_mode_tests_xe_hpc_core.cpp @@ -154,6 +154,7 @@ HWTEST2_F(XeHpcComputeModeRequirements, giventhreadArbitrationPolicyWithoutShare SetUpImpl(); using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); auto startOffset = getCsrHw()->commandStream.getUsed(); @@ -192,13 +193,13 @@ HWTEST2_F(XeHpcComputeModeRequirements, giventhreadArbitrationPolicyWithoutShare flags.threadArbitrationPolicy, *defaultHwInfo); flushTask(true); - findCmd(true); // first time + findCmd(hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()); // first time flushTask(false); findCmd(false); // not changed flushTask(true); - findCmd(true); // changed + findCmd(hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()); // changed csr->getMemoryManager()->freeGraphicsMemory(graphicAlloc); } diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index 9f3b66989d..d67bdc062e 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -250,24 +250,37 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC uint8_t buffer[64]{}; StreamProperties properties{}; + auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); auto pLinearStream = std::make_unique(buffer, sizeof(buffer)); properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased, *defaultHwInfo); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo); auto pScm = reinterpret_cast(pLinearStream->getCpuBase()); - EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, pScm->getEuThreadSchedulingModeOverride()); + if (hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()) { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, pScm->getEuThreadSchedulingModeOverride()); + } else { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, pScm->getEuThreadSchedulingModeOverride()); + } pLinearStream = std::make_unique(buffer, sizeof(buffer)); properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobin, *defaultHwInfo); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo); pScm = reinterpret_cast(pLinearStream->getCpuBase()); - EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride()); + if (hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()) { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride()); + } else { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, pScm->getEuThreadSchedulingModeOverride()); + } pLinearStream = std::make_unique(buffer, sizeof(buffer)); properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, *defaultHwInfo); EncodeComputeMode::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo); pScm = reinterpret_cast(pLinearStream->getCpuBase()); - EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride()); + if (hwInfoConfig.isThreadArbitrationPolicyReportedWithScm()) { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride()); + } else { + EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, pScm->getEuThreadSchedulingModeOverride()); + } pLinearStream = std::make_unique(buffer, sizeof(buffer)); properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::NotPresent, *defaultHwInfo);