Unify logic determining thread arbitration policy value

Related-To: NEO-6728

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2022-03-07 20:29:02 +00:00
committed by Compute-Runtime-Automation
parent d5fedf90c5
commit dd01cff879
27 changed files with 109 additions and 89 deletions

View File

@@ -260,23 +260,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode,
hwInfo);
if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) {
if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.value != -1) {
// Reuse previous programming
dispatchFlags.threadArbitrationPolicy = this->streamProperties.stateComputeMode.threadArbitrationPolicy.value;
} else {
// Pick default if this is first submit
dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
}
}
if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) {
dispatchFlags.numGrfRequired = lastSentNumGrfRequired;
}
auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);
this->streamProperties.stateComputeMode.setProperties(requiresCoherency, dispatchFlags.numGrfRequired,
dispatchFlags.threadArbitrationPolicy);
dispatchFlags.threadArbitrationPolicy, hwInfo);
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;

View File

@@ -16,7 +16,7 @@ struct StateComputeModeProperties {
StreamProperty pixelAsyncComputeThreadLimit{};
StreamProperty threadArbitrationPolicy{};
void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy);
void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const HardwareInfo &hwInfo);
void setProperties(const StateComputeModeProperties &properties);
bool isDirty() const;
void clearIsDirty();

View File

@@ -7,11 +7,13 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/kernel/grf_config.h"
using namespace NEO;
void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy) {
void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy,
const HardwareInfo &hwInfo) {
clearIsDirty();
int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0);
@@ -32,8 +34,15 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t
}
this->pixelAsyncComputeThreadLimit.set(pixelAsyncComputeThreadLimit);
bool setDefaultThreadArbitrationPolicy = (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) &&
(NEO::DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.get() ||
(this->threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent));
if (setDefaultThreadArbitrationPolicy) {
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
}
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(DebugManager.flags.OverrideThreadArbitrationPolicy.get());
threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get();
}
this->threadArbitrationPolicy.set(threadArbitrationPolicy);
}

View File

@@ -409,6 +409,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableDeepBind, false, "Disable passing RTLD_DEEPB
DECLARE_DEBUG_VARIABLE(bool, UseUmKmDataTranslator, false, "Use helper library for UMD<->KMD (WDDM) struct layout compatibility")
DECLARE_DEBUG_VARIABLE(bool, SkipFlushingEventsOnGetStatusCalls, false, "When set to 1, events are not causing internal flush when querying for CL_EVENT_COMMAND_EXECUTION_STATUS")
DECLARE_DEBUG_VARIABLE(bool, AllowUnrestrictedSize, false, "Allow allocating memory with greater size than MAX_MEM_ALLOC_SIZE")
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultThreadArbitrationPolicyIfNotSpecified, false, "When executing kernel without thread arbitration hint specified, ensure the default setting is used")
DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedStateCommand, -1, "-1: default, 0: disable, 1: enable, Program additional extended version of PIPE CONTROL command before non pipelined state command")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
DECLARE_DEBUG_VARIABLE(bool, ForceAllResourcesUncached, false, "When set, all memory operations for all resources are forced to UC. This overrides all caching-related debug variables and globally disables all caches")

View File

@@ -32,7 +32,7 @@ GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeM
// Adjust the State Compute Mode which sets FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT
StreamProperties properties{};
properties.stateComputeMode.setProperties(false, cmdContainer.lastSentNumGrfRequired, 0);
properties.stateComputeMode.setProperties(false, cmdContainer.lastSentNumGrfRequired, 0, *defaultHwInfo);
NEO::EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer.getCommandStream(),
properties.stateComputeMode, *defaultHwInfo);

View File

@@ -273,25 +273,25 @@ XE_HPC_CORETEST_F(CommandEncodeXeHpcCoreTest, whenAdjustComputeModeIsCalledThenC
StreamProperties properties{};
auto pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased);
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
auto pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, pScm->getEuThreadSchedulingModeOverride());
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobin);
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobin, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride());
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency);
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::RoundRobinAfterDependency, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, pScm->getEuThreadSchedulingModeOverride());
pLinearStream = std::make_unique<LinearStream>(buffer, sizeof(buffer));
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::NotPresent);
properties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::NotPresent, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*pLinearStream, properties.stateComputeMode, *defaultHwInfo);
pScm = reinterpret_cast<STATE_COMPUTE_MODE *>(pLinearStream->getCpuBase());
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_HW_DEFAULT, pScm->getEuThreadSchedulingModeOverride());

View File

@@ -69,7 +69,7 @@ TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValu
for (auto requiresCoherency : ::testing::Bool()) {
for (auto largeGrf : ::testing::Bool()) {
for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) {
properties.stateComputeMode.setProperties(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy);
properties.stateComputeMode.setProperties(requiresCoherency, largeGrf ? 256 : 128, threadArbitrationPolicy, *defaultHwInfo);
EXPECT_EQ(largeGrf, properties.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(requiresCoherency, properties.stateComputeMode.isCoherencyRequired.value);
EXPECT_EQ(-1, properties.stateComputeMode.zPassAsyncComputeThreadLimit.value);
@@ -81,19 +81,19 @@ TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValu
for (auto forceZPassAsyncComputeThreadLimit : ::testing::Bool()) {
DebugManager.flags.ForceZPassAsyncComputeThreadLimit.set(forceZPassAsyncComputeThreadLimit);
properties.stateComputeMode.setProperties(false, 0u, 0u);
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
EXPECT_EQ(forceZPassAsyncComputeThreadLimit, properties.stateComputeMode.zPassAsyncComputeThreadLimit.value);
}
for (auto forcePixelAsyncComputeThreadLimit : ::testing::Bool()) {
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(forcePixelAsyncComputeThreadLimit);
properties.stateComputeMode.setProperties(false, 0u, 0u);
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
EXPECT_EQ(forcePixelAsyncComputeThreadLimit, properties.stateComputeMode.pixelAsyncComputeThreadLimit.value);
}
for (auto threadArbitrationPolicy : threadArbitrationPolicyValues) {
DebugManager.flags.OverrideThreadArbitrationPolicy.set(threadArbitrationPolicy);
properties.stateComputeMode.setProperties(false, 0u, 0u);
properties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
EXPECT_EQ(threadArbitrationPolicy, properties.stateComputeMode.threadArbitrationPolicy.value);
}
}

View File

@@ -73,7 +73,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
cmdContainer->lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, GrfConfig::LargeGrfNumber, 0u);
streamProperties.stateComputeMode.setProperties(false, GrfConfig::LargeGrfNumber, 0u, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());

View File

@@ -439,11 +439,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNotDefaultThenStateComputeModeCommandAdded) {
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, 128, 0u);
streamProperties.stateComputeMode.setProperties(false, 128, 0u);
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);
EXPECT_FALSE(streamProperties.stateComputeMode.isDirty());
streamProperties.stateComputeMode.setProperties(false, 256, 0u);
streamProperties.stateComputeMode.setProperties(false, 256, 0u, *defaultHwInfo);
EXPECT_TRUE(streamProperties.stateComputeMode.isDirty());
}

View File

@@ -22,7 +22,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenCommandContainerWhenN
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
cmdContainer->lastSentNumGrfRequired = 0;
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, cmdContainer->lastSentNumGrfRequired + 1, 0u);
streamProperties.stateComputeMode.setProperties(false, cmdContainer->lastSentNumGrfRequired + 1, 0u, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
GenCmdList commands;
@@ -58,7 +58,7 @@ HWTEST2_F(CommandEncodeStatesTest, givenLargeGrfModeProgrammedThenExpectedComman
NEO::EncodeComputeMode<GfxFamily>::adjustPipelineSelect(*cmdContainer, descriptor);
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, 256u, 0u);
streamProperties.stateComputeMode.setProperties(false, 256u, 0u, *defaultHwInfo);
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
auto usedSpaceAfter = cmdContainer->getCommandStream()->getUsed();

View File

@@ -191,7 +191,7 @@ PVCTEST_F(PvcComputeModeRequirements, giventhreadArbitrationPolicyWithoutSharedH
};
getCsrHw<FamilyType>()->streamProperties.stateComputeMode.setProperties(flags.requiresCoherency, flags.numGrfRequired,
flags.threadArbitrationPolicy);
flags.threadArbitrationPolicy, *defaultHwInfo);
flushTask(true);
findCmd(true); // first time

View File

@@ -62,7 +62,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(testValue.pixelThreadLimit);
pCsr->streamProperties.stateComputeMode = {};
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u);
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
LinearStream stream(buff, 1024);
pCsr->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());
@@ -83,7 +83,7 @@ XE_HPG_CORETEST_F(ComputeModeRequirementsXeHpgCore, GivenVariousSettingsWhenComp
DebugManager.flags.ForcePixelAsyncComputeThreadLimit.set(-1);
pCsr->streamProperties.stateComputeMode = {};
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u);
pCsr->streamProperties.stateComputeMode.setProperties(false, 0u, 0u, *defaultHwInfo);
LinearStream stream(buff, 1024);
pCsr->programComputeMode(stream, flags, *defaultHwInfo);
EXPECT_EQ(cmdsSize, stream.getUsed());