Unify program thread abitration logic for gen9 and gen11

Use single call for programming thread arbitration logic in flushTask.

Related-To: NEO-5995

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski 2021-12-13 18:29:58 +00:00 committed by Compute-Runtime-Automation
parent 5700619f24
commit e182aa6055
11 changed files with 156 additions and 50 deletions

View File

@ -16,6 +16,8 @@
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
#include "test.h"
#include "test_traits_common.h"
using namespace NEO;
using MultiRootDeviceCommandStreamReceiverBufferTests = MultiRootDeviceFixture;
@ -662,6 +664,41 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhen
EXPECT_TRUE(found);
}
struct PreambleThreadArbitrationMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_GEN8_CORE)) {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::implementsPreambleThreadArbitration;
}
return false;
}
};
HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenVariousInputWhenFlushingTaskThenProgramThreadArbitrationPolicyWhenNeeded, PreambleThreadArbitrationMatcher) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto &hwHelper = HwHelper::get(pDevice->getHardwareInfo().platform.eRenderCoreFamily);
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.requiredThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
flushTask(commandStreamReceiver);
size_t parsingOffset = commandStreamReceiver.commandStream.getUsed();
for (auto arbitrationChanged : ::testing::Bool()) {
commandStreamReceiver.lastSentThreadArbitrationPolicy = arbitrationChanged ? ThreadArbitrationPolicy::NotPresent
: hwHelper.getDefaultThreadArbitrationPolicy();
for (auto isPreambleNeeded : ::testing::Bool()) {
commandStreamReceiver.isPreambleSent = !isPreambleNeeded;
flushTask(commandStreamReceiver);
HardwareParse csHwParser;
csHwParser.parseCommands<FamilyType>(commandStreamReceiver.commandStream, parsingOffset);
auto miLoadRegisterCommandsCount = findAll<MI_LOAD_REGISTER_IMM *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end()).size();
size_t expectedCount = (isPreambleNeeded ? 2 : (arbitrationChanged ? 1 : 0));
EXPECT_EQ(expectedCount, miLoadRegisterCommandsCount);
parsingOffset = commandStreamReceiver.commandStream.getUsed();
}
}
}
namespace CpuIntrinsicsTests {
extern volatile uint32_t *pauseAddress;
extern uint32_t pauseValue;

View File

@ -140,35 +140,49 @@ HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDeb
pDevice->setDebuggerActive(false);
}
HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingCmdSize) {
HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingCmdSize) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy;
auto policyNotChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto policyNotChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto policyNotChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice);
commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy + 1;
auto policyChanged = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto policyChangedPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto policyChangedFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice);
auto actualDifference = policyChanged - policyNotChanged;
auto expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize();
EXPECT_EQ(expectedDifference, actualDifference);
auto actualDifferenceForPreamble = policyChangedPreamble - policyNotChangedPreamble;
auto actualDifferenceForFlush = policyChangedFlush - policyNotChangedFlush;
auto expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() +
commandStreamReceiver.getCmdSizeForComputeMode();
EXPECT_EQ(0u, actualDifferenceForPreamble);
EXPECT_EQ(expectedDifference, actualDifferenceForFlush);
}
HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingPreambleCmdSizeThenResultDependsOnPolicyProgrammingAndAdditionalCmdsSize) {
HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingAndAdditionalCmdsSize) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.requiredThreadArbitrationPolicy = commandStreamReceiver.lastSentThreadArbitrationPolicy;
commandStreamReceiver.isPreambleSent = false;
auto preambleNotSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto preambleNotSentPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto preambleNotSentFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice);
commandStreamReceiver.isPreambleSent = true;
auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto preambleSentPreamble = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto preambleSentFlush = commandStreamReceiver.getRequiredCmdStreamSize(flushTaskFlags, *pDevice);
auto actualDifference = preambleNotSent - preambleSent;
auto expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() + PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice);
auto actualDifferenceForPreamble = preambleNotSentPreamble - preambleSentPreamble;
auto actualDifferenceForFlush = preambleNotSentFlush - preambleSentFlush;
EXPECT_EQ(expectedDifference, actualDifference);
commandStreamReceiver.isPreambleSent = false;
auto expectedDifferenceForPreamble = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice);
auto expectedDifferenceForFlush = expectedDifferenceForPreamble + PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() +
commandStreamReceiver.getCmdSizeForL3Config() +
PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
EXPECT_EQ(expectedDifferenceForPreamble, actualDifferenceForPreamble);
EXPECT_EQ(expectedDifferenceForFlush, actualDifferenceForFlush);
}
HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenMediaVfeStateDirtyEstimatingPreambleCmdSizeThenResultDependsVfeStateProgrammingCmdSize) {
@ -218,12 +232,12 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndForceSemaphoreDelayBe
auto preambleSent = commandStreamReceiver.getRequiredCmdSizeForPreamble(*pDevice);
auto actualDifferenceWhenSemaphoreDelayNotReprogrammed = preambleNotSentAndSemaphoreDelayNotReprogrammed - preambleSent;
auto expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() + PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice);
auto expectedDifference = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice);
EXPECT_EQ(expectedDifference, actualDifferenceWhenSemaphoreDelayNotReprogrammed);
auto actualDifferenceWhenSemaphoreDelayReprogrammed = preambleNotSentAndSemaphoreDelayReprogrammed - preambleSent;
expectedDifference = PreambleHelper<FamilyType>::getThreadArbitrationCommandsSize() + PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice) + PreambleHelper<FamilyType>::getSemaphoreDelayCommandSize();
expectedDifference = PreambleHelper<FamilyType>::getAdditionalCommandsSize(*pDevice) + PreambleHelper<FamilyType>::getSemaphoreDelayCommandSize();
EXPECT_EQ(expectedDifference, actualDifferenceWhenSemaphoreDelayReprogrammed);
}

View File

@ -129,9 +129,6 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdSizeForPreamble(
if (!this->isPreambleSent) {
size += PreambleHelper<GfxFamily>::getAdditionalCommandsSize(device);
}
if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
size += PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
}
if (!this->isPreambleSent) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
size += PreambleHelper<GfxFamily>::getSemaphoreDelayCommandSize();
@ -333,6 +330,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this);
}
bool isPreambleNeeded = !this->isPreambleSent;
programHardwareContext(commandStreamCSR);
programComputeMode(commandStreamCSR, dispatchFlags, device.getHardwareInfo());
programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs);
@ -342,7 +340,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
addPipeControlBefore3dState(commandStreamCSR, dispatchFlags);
programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags);
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
if (this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy || isPreambleNeeded) {
PreambleHelper<GfxFamily>::programThreadArbitration(&commandStreamCSR, this->requiredThreadArbitrationPolicy);
this->lastSentThreadArbitrationPolicy = this->requiredThreadArbitrationPolicy;
}
@ -824,6 +822,10 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
if (!this->isPreambleSent || this->lastSentThreadArbitrationPolicy != this->requiredThreadArbitrationPolicy) {
size += PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
}
if (stallingCommandsOnNextFlushRequired) {
size += getCmdSizeForStallingCommands(dispatchFlags);
}

View File

@ -77,7 +77,6 @@ template <typename GfxFamily>
void PreambleHelper<GfxFamily>::programPreamble(LinearStream *pCommandStream, Device &device, uint32_t l3Config,
uint32_t requiredThreadArbitrationPolicy, GraphicsAllocation *preemptionCsr) {
programL3(pCommandStream, l3Config);
programThreadArbitration(pCommandStream, requiredThreadArbitrationPolicy);
programPreemption(pCommandStream, device, preemptionCsr);
if (device.isDebuggerActive()) {
programKernelDebugging(pCommandStream);

View File

@ -115,7 +115,7 @@ GEN11TEST_F(PreemptionWatermarkGen11, WhenPreambleIsCreatedThenWorkAroundsIsNotP
}
typedef PreambleFixture ThreadArbitrationGen11;
GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsSet) {
GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsNotSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::Disabled));
typedef ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
@ -129,6 +129,28 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb
parseCommands<FamilyType>(cs);
auto ppC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(cmdList.end(), ppC);
auto cmd = findMmioCmd<FamilyType>(cmdList.begin(), cmdList.end(), RowChickenReg4::address);
ASSERT_EQ(nullptr, cmd);
MockDevice device;
EXPECT_EQ(0u, PreambleHelper<ICLFamily>::getAdditionalCommandsSize(device));
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper<ICLFamily>::getThreadArbitrationCommandsSize());
}
GEN11TEST_F(ThreadArbitrationGen11, whenThreadArbitrationPolicyIsProgrammedThenCorrectValuesAreSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::Disabled));
typedef ICLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef ICLFamily::PIPE_CONTROL PIPE_CONTROL;
LinearStream &cs = linearStream;
MockDevice mockDevice;
PreambleHelper<FamilyType>::programThreadArbitration(&linearStream, ThreadArbitrationPolicy::RoundRobin);
parseCommands<FamilyType>(cs);
auto ppC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(ppC, cmdList.end());
@ -186,4 +208,4 @@ GEN11TEST_F(PreambleFixtureGen11, whenKernelDebuggingCommandsAreProgrammedThenCo
pCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*it);
EXPECT_EQ(0xe400u, pCmd->getRegisterOffset());
EXPECT_EQ((1u << 7) | (1u << 4), pCmd->getDataDword());
}
}

View File

@ -16,4 +16,5 @@ struct TestTraits<IGFX_GEN11_CORE> {
static constexpr bool auxTranslationSupported = false;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = true;
static constexpr bool implementsPreambleThreadArbitration = true;
};

View File

@ -16,4 +16,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
static constexpr bool auxTranslationSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = false;
static constexpr bool implementsPreambleThreadArbitration = false;
};

View File

@ -15,4 +15,5 @@ struct TestTraits<IGFX_GEN8_CORE> {
static constexpr bool iohInSbaSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = false;
static constexpr bool implementsPreambleThreadArbitration = false;
};

View File

@ -6,6 +6,7 @@
*/
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/preamble/preamble_fixture.h"
#include "shared/test/unit_test/source_level_debugger/source_level_debugger_preamble_test.h"
#include "gtest/gtest.h"
@ -61,3 +62,59 @@ GEN9TEST_F(PreambleTestGen9, givenGen9ThenL3IsProgrammed) {
EXPECT_EQ(l3ConfigDifference, isL3Programmable);
}
using ThreadArbitrationGen9 = PreambleFixture;
GEN9TEST_F(ThreadArbitrationGen9, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsNotSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::Disabled));
typedef SKLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef SKLFamily::PIPE_CONTROL PIPE_CONTROL;
LinearStream &cs = linearStream;
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
MockDevice mockDevice;
PreambleHelper<SKLFamily>::programPreamble(&linearStream, mockDevice, l3Config,
ThreadArbitrationPolicy::RoundRobin,
nullptr);
parseCommands<SKLFamily>(cs);
auto ppC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(cmdList.end(), ppC);
auto itorLRI = reverse_find<MI_LOAD_REGISTER_IMM *>(cmdList.rbegin(), cmdList.rend());
ASSERT_NE(cmdList.rend(), itorLRI);
const auto &lri = *reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_NE(0xE404u, lri.getRegisterOffset());
EXPECT_NE(0x100u, lri.getDataDword());
MockDevice device;
EXPECT_EQ(0u, PreambleHelper<SKLFamily>::getAdditionalCommandsSize(device));
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper<SKLFamily>::getThreadArbitrationCommandsSize());
}
GEN9TEST_F(ThreadArbitrationGen9, whenThreadArbitrationPolicyIsProgrammedThenCorrectValuesAreSet) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::Disabled));
typedef SKLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef SKLFamily::PIPE_CONTROL PIPE_CONTROL;
LinearStream &cs = linearStream;
MockDevice mockDevice;
PreambleHelper<FamilyType>::programThreadArbitration(&linearStream, ThreadArbitrationPolicy::RoundRobin);
parseCommands<SKLFamily>(cs);
auto ppC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(ppC, cmdList.end());
auto itorLRI = reverse_find<MI_LOAD_REGISTER_IMM *>(cmdList.rbegin(), cmdList.rend());
ASSERT_NE(cmdList.rend(), itorLRI);
const auto &lri = *reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_EQ(0xE404u, lri.getRegisterOffset());
EXPECT_EQ(0x100u, lri.getDataDword());
MockDevice device;
EXPECT_EQ(0u, PreambleHelper<SKLFamily>::getAdditionalCommandsSize(device));
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper<SKLFamily>::getThreadArbitrationCommandsSize());
}

View File

@ -71,35 +71,6 @@ SKLTEST_F(Gen9L3Config, GivenSlmWhenProgrammingL3ThenProgrammingIsCorrect) {
}
typedef PreambleFixture ThreadArbitration;
SKLTEST_F(ThreadArbitration, givenPreambleWhenItIsProgrammedThenThreadArbitrationIsSetToRoundRobin) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::Disabled));
typedef SKLFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
typedef SKLFamily::PIPE_CONTROL PIPE_CONTROL;
LinearStream &cs = linearStream;
uint32_t l3Config = PreambleHelper<FamilyType>::getL3Config(*defaultHwInfo, true);
MockDevice mockDevice;
PreambleHelper<SKLFamily>::programPreamble(&linearStream, mockDevice, l3Config,
ThreadArbitrationPolicy::RoundRobin,
nullptr);
parseCommands<SKLFamily>(cs);
auto ppC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(ppC, cmdList.end());
auto itorLRI = reverse_find<MI_LOAD_REGISTER_IMM *>(cmdList.rbegin(), cmdList.rend());
ASSERT_NE(cmdList.rend(), itorLRI);
const auto &lri = *reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*itorLRI);
EXPECT_EQ(0xE404u, lri.getRegisterOffset());
EXPECT_EQ(0x100u, lri.getDataDword());
MockDevice device;
EXPECT_EQ(0u, PreambleHelper<SKLFamily>::getAdditionalCommandsSize(device));
EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper<SKLFamily>::getThreadArbitrationCommandsSize());
}
SKLTEST_F(ThreadArbitration, GivenDefaultWhenProgrammingPreambleThenArbitrationPolicyIsRoundRobin) {
EXPECT_EQ(ThreadArbitrationPolicy::RoundRobin, HwHelperHw<SKLFamily>::get().getDefaultThreadArbitrationPolicy());
}

View File

@ -15,4 +15,5 @@ struct TestTraits<IGFX_GEN9_CORE> {
static constexpr bool iohInSbaSupported = true;
static constexpr bool isUsingNonDefaultIoctls = false;
static constexpr bool deviceEnqueueSupport = true;
static constexpr bool implementsPreambleThreadArbitration = true;
};