mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Unify logic determining thread arbitration policy value
Related-To: NEO-6728 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d5fedf90c5
commit
dd01cff879
@@ -260,23 +260,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode,
|
||||
hwInfo);
|
||||
|
||||
if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) {
|
||||
if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.value != -1) {
|
||||
// Reuse previous programming
|
||||
dispatchFlags.threadArbitrationPolicy = this->streamProperties.stateComputeMode.threadArbitrationPolicy.value;
|
||||
} else {
|
||||
// Pick default if this is first submit
|
||||
dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
|
||||
}
|
||||
}
|
||||
|
||||
if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) {
|
||||
dispatchFlags.numGrfRequired = lastSentNumGrfRequired;
|
||||
}
|
||||
|
||||
auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);
|
||||
this->streamProperties.stateComputeMode.setProperties(requiresCoherency, dispatchFlags.numGrfRequired,
|
||||
dispatchFlags.threadArbitrationPolicy);
|
||||
dispatchFlags.threadArbitrationPolicy, hwInfo);
|
||||
|
||||
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
|
||||
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
|
||||
|
||||
@@ -16,7 +16,7 @@ struct StateComputeModeProperties {
|
||||
StreamProperty pixelAsyncComputeThreadLimit{};
|
||||
StreamProperty threadArbitrationPolicy{};
|
||||
|
||||
void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy);
|
||||
void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const HardwareInfo &hwInfo);
|
||||
void setProperties(const StateComputeModeProperties &properties);
|
||||
bool isDirty() const;
|
||||
void clearIsDirty();
|
||||
|
||||
@@ -7,11 +7,13 @@
|
||||
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
|
||||
#include "shared/source/command_stream/thread_arbitration_policy.h"
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy) {
|
||||
void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy,
|
||||
const HardwareInfo &hwInfo) {
|
||||
clearIsDirty();
|
||||
|
||||
int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0);
|
||||
@@ -32,8 +34,15 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t
|
||||
}
|
||||
this->pixelAsyncComputeThreadLimit.set(pixelAsyncComputeThreadLimit);
|
||||
|
||||
bool setDefaultThreadArbitrationPolicy = (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) &&
|
||||
(NEO::DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.get() ||
|
||||
(this->threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent));
|
||||
if (setDefaultThreadArbitrationPolicy) {
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
|
||||
}
|
||||
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
threadArbitrationPolicy = static_cast<uint32_t>(DebugManager.flags.OverrideThreadArbitrationPolicy.get());
|
||||
threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get();
|
||||
}
|
||||
this->threadArbitrationPolicy.set(threadArbitrationPolicy);
|
||||
}
|
||||
|
||||
@@ -409,6 +409,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableDeepBind, false, "Disable passing RTLD_DEEPB
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseUmKmDataTranslator, false, "Use helper library for UMD<->KMD (WDDM) struct layout compatibility")
|
||||
DECLARE_DEBUG_VARIABLE(bool, SkipFlushingEventsOnGetStatusCalls, false, "When set to 1, events are not causing internal flush when querying for CL_EVENT_COMMAND_EXECUTION_STATUS")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AllowUnrestrictedSize, false, "Allow allocating memory with greater size than MAX_MEM_ALLOC_SIZE")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultThreadArbitrationPolicyIfNotSpecified, false, "When executing kernel without thread arbitration hint specified, ensure the default setting is used")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedStateCommand, -1, "-1: default, 0: disable, 1: enable, Program additional extended version of PIPE CONTROL command before non pipelined state command")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceAllResourcesUncached, false, "When set, all memory operations for all resources are forced to UC. This overrides all caching-related debug variables and globally disables all caches")
|
||||
|
||||
@@ -32,7 +32,7 @@ GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeM
|
||||
|
||||
// Adjust the State Compute Mode which sets FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT
|
||||
StreamProperties properties{};
|
||||
properties.stateComputeMode.setProperties(false, cmdContainer.lastSentNumGrfRequired, 0);
|
||||
properties.stateComputeMode.setProperties(false, cmdContainer.lastSentNumGrfRequired, 0, *defaultHwInfo);
|
||||
NEO::EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer.getCommandStream(),
|
||||
properties.stateComputeMode, *defaultHwInfo);
|
||||
|
||||
|
||||
@@ -273,25 +273,25 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC
|
||||
StreamProperties properties{};
|
||||
|
||||
auto pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased);
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
|
||||
auto pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
|
||||
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, pScm->getEuThreadSchedulingModeOverride());
|
||||
|
||||
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobin);
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobin, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
|
||||
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
|
||||
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride());
|
||||
|
||||
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency);
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
|
||||
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
|
||||
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride());
|
||||
|
||||
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::NotPresent);
|
||||
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::NotPresent, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
|
||||
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
|
||||
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, pScm->getEuThreadSchedulingModeOverride());
|
||||
|
||||
@@ -69,7 +69,7 @@ TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValu
|
||||
for (auto requiresCoherency : ::testing::Bool()) {
|
||||
for (auto largeGrf : ::testing::Bool()) {
|
||||
for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) {
|
||||
properties.stateComputeMode.setProperties(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy);
|
||||
properties.stateComputeMode.setProperties(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy, *defaultHwInfo);
|
||||
EXPECT_EQ(largeGrf, properties.stateComputeMode.largeGrfMode.value);
|
||||
EXPECT_EQ(requiresCoherency, properties.stateComputeMode.isCoherencyRequired.value);
|
||||
EXPECT_EQ(-1, properties.stateComputeMode.zPassAsyncComputeThreadLimit.value);
|
||||
@@ -81,19 +81,19 @@ TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValu
|
||||
|
||||
for (auto forceZPassAsyncComputeThreadLimit : ::testing::Bool()) {
|
||||
DebugManager.flags.ForceZPassAsyncComputeThreadLimit.set(forceZPassAsyncComputeThreadLimit);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
|
||||
EXPECT_EQ(forceZPassAsyncComputeThreadLimit, properties.stateComputeMode.zPassAsyncComputeThreadLimit.value);
|
||||
}
|
||||
|
||||
for (auto forcePixelAsyncComputeThreadLimit : ::testing::Bool()) {
|
||||
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(forcePixelAsyncComputeThreadLimit);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
|
||||
EXPECT_EQ(forcePixelAsyncComputeThreadLimit, properties.stateComputeMode.pixelAsyncComputeThreadLimit.value);
|
||||
}
|
||||
|
||||
for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) {
|
||||
DebugManager.flags.OverrideThreadArbitrationPolicy.set(threadArbitrationPolicy);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u);
|
||||
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
|
||||
EXPECT_EQ(threadArbitrationPolicy, properties.stateComputeMode.threadArbitrationPolicy.value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
cmdContainer->lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.stateComputeMode.setProperties(false, GrfConfig::LargeGrfNumber, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, GrfConfig::LargeGrfNumber, 0u, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
||||
|
||||
@@ -439,11 +439,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNotDefaultThenStateComputeModeCommandAdded) {
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.stateComputeMode.setProperties(false, 128, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, 128, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);
|
||||
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);
|
||||
EXPECT_FALSE(streamProperties.stateComputeMode.isDirty());
|
||||
|
||||
streamProperties.stateComputeMode.setProperties(false, 256, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, 256, 0u, *defaultHwInfo);
|
||||
EXPECT_TRUE(streamProperties.stateComputeMode.isDirty());
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCommandContainerWhenN
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
cmdContainer->lastSentNumGrfRequired = 0;
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.stateComputeMode.setProperties(false, cmdContainer->lastSentNumGrfRequired + 1, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, cmdContainer->lastSentNumGrfRequired + 1, 0u, *defaultHwInfo);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
|
||||
|
||||
GenCmdList commands;
|
||||
@@ -58,7 +58,7 @@ HWTEST2_F(CommandEncodeStatesTest, givenLargeGrfModeProgrammedThenExpectedComman
|
||||
|
||||
NEO::EncodeComputeMode<GfxFamily>::adjustPipelineSelect(*cmdContainer, descriptor);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.stateComputeMode.setProperties(false, 256u, 0u);
|
||||
streamProperties.stateComputeMode.setProperties(false, 256u, 0u, *defaultHwInfo);
|
||||
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
|
||||
|
||||
auto usedSpaceAfter = cmdContainer->getCommandStream()->getUsed();
|
||||
|
||||
@@ -191,7 +191,7 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH
|
||||
};
|
||||
|
||||
getCsrHw<FamilyType>()->streamProperties.stateComputeMode.setProperties(flags.requiresCoherency, flags.numGrfRequired,
|
||||
flags.threadArbitrationPolicy);
|
||||
flags.threadArbitrationPolicy, *defaultHwInfo);
|
||||
|
||||
flushTask(true);
|
||||
findCmd(true); // first time
|
||||
|
||||
@@ -62,7 +62,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp
|
||||
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(testValue.pixelThreadLimit);
|
||||
|
||||
pCsr->streamProperties.stateComputeMode = {};
|
||||
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u);
|
||||
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
|
||||
LinearStream stream(buff, 1024);
|
||||
pCsr->programComputeMode(stream, flags, *defaultHwInfo);
|
||||
EXPECT_EQ(cmdsSize, stream.getUsed());
|
||||
@@ -83,7 +83,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp
|
||||
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(-1);
|
||||
|
||||
pCsr->streamProperties.stateComputeMode = {};
|
||||
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u);
|
||||
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
|
||||
LinearStream stream(buff, 1024);
|
||||
pCsr->programComputeMode(stream, flags, *defaultHwInfo);
|
||||
EXPECT_EQ(cmdsSize, stream.getUsed());
|
||||
|
||||
Reference in New Issue
Block a user