Share pipeline select state updates between regular and immediate command lists

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-09-22 01:44:06 +00:00
committed by Compute-Runtime-Automation
parent 842003f827
commit a95ab1d16b
14 changed files with 370 additions and 14 deletions

View File

@@ -48,6 +48,11 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
dispatchFlags.numGrfRequired = (requiredStateComputeMode.largeGrfMode.value == 1) ? GrfConfig::LargeGrfNumber
: GrfConfig::DefaultGrfNumber;
dispatchFlags.threadArbitrationPolicy = requiredStateComputeMode.threadArbitrationPolicy.value;
const auto &requiredPipelineSelect = this->requiredStreamState.pipelineSelect;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = requiredPipelineSelect.systolicMode.value != -1
? !!requiredPipelineSelect.systolicMode.value
: false;
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -92,6 +92,12 @@ struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixtur
template <typename FamilyType>
void testBody();
template <typename FamilyType>
void testBodyShareStateRegularImmediate();
template <typename FamilyType>
void testBodyShareStateImmediateRegular();
DebugManagerStateRestore restorer;
};

View File

@@ -6,6 +6,7 @@
*/
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
@@ -365,5 +366,215 @@ void CmdListPipelineSelectStateFixture::testBody() {
}
}
template <typename FamilyType>
void CmdListPipelineSelectStateFixture::testBodyShareStateRegularImmediate() {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
void *currentBuffer = nullptr;
auto &regularCmdlistRequiredState = commandList->getRequiredStreamState();
auto &regularCmdListFinalState = commandList->getFinalStreamState();
auto &csrState = commandQueue->csr->getStreamProperties();
auto commandListHandle = commandList->toHandle();
auto &regularCommandListStream = *commandList->commandContainer.getCommandStream();
auto &cmdQueueStream = commandQueue->commandStream;
GenCmdList cmdList;
std::vector<GenCmdList::iterator> pipelineSelectList;
size_t sizeBefore = 0;
size_t sizeAfter = 0;
auto result = ZE_RESULT_SUCCESS;
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = regularCommandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = regularCommandListStream.getUsed();
EXPECT_EQ(1, regularCmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, regularCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(regularCommandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
EXPECT_EQ(commandQueue->csr, commandListImmediate->csr);
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = ultCsr.commandStream;
size_t csrUsedBefore = csrStream.getUsed();
sizeBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(csrStream.getCpuBase(), csrUsedBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(csrUsedAfter - csrUsedBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
}
template <typename FamilyType>
void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
void *currentBuffer = nullptr;
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
auto &csrState = commandQueue->csr->getStreamProperties();
EXPECT_EQ(commandQueue->csr, commandListImmediate->csr);
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = ultCsr.commandStream;
GenCmdList cmdList;
std::vector<GenCmdList::iterator> pipelineSelectList;
size_t sizeBefore = 0;
size_t sizeAfter = 0;
auto result = ZE_RESULT_SUCCESS;
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
size_t csrUsedBefore = csrStream.getUsed();
sizeBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(csrStream.getCpuBase(), csrUsedBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(csrUsedAfter - csrUsedBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
auto &regularCmdlistRequiredState = commandList->getRequiredStreamState();
auto &regularCmdListFinalState = commandList->getFinalStreamState();
auto commandListHandle = commandList->toHandle();
auto &regularCommandListStream = *commandList->commandContainer.getCommandStream();
auto &cmdQueueStream = commandQueue->commandStream;
sizeBefore = regularCommandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = regularCommandListStream.getUsed();
EXPECT_EQ(1, regularCmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, regularCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(regularCommandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
}
} // namespace ult
} // namespace L0

View File

@@ -1414,6 +1414,8 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
}
commandList->close();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
@@ -1597,6 +1599,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
cmdList.clear();
feStateCmds.clear();
commandList->close();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,

View File

@@ -536,10 +536,21 @@ HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenIn
using CmdListPipelineSelectStateTest = Test<CmdListPipelineSelectStateFixture>;
using SystolicSupport = IsAnyProducts<IGFX_ALDERLAKE_P, IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;
HWTEST2_F(CmdListPipelineSelectStateTest,
givenAppendSystolicKernelToCommandListWhenExecutingCommandListThenPipelineSelectStateIsTrackedCorrectly, SystolicSupport) {
testBody<FamilyType>();
}
HWTEST2_F(CmdListPipelineSelectStateTest,
givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingSystolicKernelOnBothRegularFirstThenPipelineSelectStateIsNotChanged, SystolicSupport) {
testBodyShareStateRegularImmediate<FamilyType>();
}
HWTEST2_F(CmdListPipelineSelectStateTest,
givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingSystolicKernelOnBothImmediateFirstThenPipelineSelectStateIsNotChanged, SystolicSupport) {
testBodyShareStateImmediateRegular<FamilyType>();
}
} // namespace ult
} // namespace L0

View File

@@ -724,6 +724,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRe
}
HWTEST2_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRequirementChangedWhenFlushingTaskThenPipelineSelectIsSent, IsAtMostXeHpcCore) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = 1;
flushTask(commandStreamReceiver);

View File

@@ -647,13 +647,15 @@ HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectCom
{
mockCsr->getStreamProperties().frontEndState = {};
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
flags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
LinearStream commandStream{&memory, sizeof(memory)};
mockCsr->mediaVfeStateDirty = true;
mockCsr->programVFEState(commandStream, flags, 10);
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
auto cfeState = reinterpret_cast<CFE_STATE *>(&memory);
auto expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*pHwInfo);
EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable());
EXPECT_EQ(expectedDisableOverdispatch, cfeState->getComputeOverdispatchDisable());
}
{
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
@@ -661,8 +663,9 @@ HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectCom
LinearStream commandStream{&memory, sizeof(memory)};
mockCsr->mediaVfeStateDirty = true;
mockCsr->programVFEState(commandStream, flags, 10);
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
EXPECT_FALSE(pCommand->getComputeOverdispatchDisable());
auto cfeState = reinterpret_cast<CFE_STATE *>(&memory);
EXPECT_FALSE(cfeState->getComputeOverdispatchDisable());
}
}
}
@@ -1644,3 +1647,97 @@ HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFro
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
}
HWTEST_F(UltCommandStreamReceiverTest, givenPipelineSelectStateNotInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.lastMediaSamplerConfig = -1;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}
HWTEST_F(UltCommandStreamReceiverTest,
givenPipelineSelectStateInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 1;
commandStreamReceiver.lastMediaSamplerConfig = -1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
commandStreamReceiver.lastMediaSamplerConfig = 1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 1;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
commandStreamReceiver.lastSystolicPipelineSelectMode = true;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}

View File

@@ -151,6 +151,7 @@ struct UltCommandStreamReceiverTest
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.lastSentUseGlobalAtomics = false;
commandStreamReceiver.streamProperties.pipelineSelect.setProperties(true, false, false, *defaultHwInfo);
commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber,
hwHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), *defaultHwInfo);
commandStreamReceiver.streamProperties.frontEndState.setProperties(false, false, false, -1, *defaultHwInfo);

View File

@@ -86,14 +86,19 @@ PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenRevisionBAndAboveWhenLast
auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily);
for (auto &testInput : testInputs) {
hwInfo->platform.usRevId = testInput.revId;
commandStreamReceiver.systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(*hwInfo);
hwInfoConfig->fillPipelineSelectPropertiesSupportStructure(commandStreamReceiver.pipelineSupportFlags, *hwInfo);
commandStreamReceiver.isPreambleSent = true;
commandStreamReceiver.lastMediaSamplerConfig = false;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = -1;
flushTask(commandStreamReceiver);
EXPECT_EQ(testInput.expectedValue, commandStreamReceiver.lastSystolicPipelineSelectMode);
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
if (testInput.expectedValue) {
EXPECT_TRUE(commandStreamReceiver.lastSystolicPipelineSelectMode);
} else {
EXPECT_FALSE(commandStreamReceiver.lastSystolicPipelineSelectMode);
}
}
}

View File

@@ -75,8 +75,8 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
}
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo);
hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
hwInfoConfig->fillPipelineSelectPropertiesSupportStructure(pipelineSupportFlags, hwInfo);
}
CommandStreamReceiver::~CommandStreamReceiver() {

View File

@@ -393,6 +393,7 @@ class CommandStreamReceiver {
LinearStream commandStream;
StreamProperties streamProperties{};
FrontEndPropertiesSupport feSupportFlags{};
PipelineSelectPropertiesSupport pipelineSupportFlags{};
// offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
@@ -476,7 +477,6 @@ class CommandStreamReceiver {
bool useGpuIdleImplicitFlush = false;
bool lastSentUseGlobalAtomics = false;
bool useNotifyEnableForPostSync = false;
bool systolicModeConfigurable = false;
};
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump,

View File

@@ -178,6 +178,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
void createKernelArgsBufferAllocation() override;
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags);
void handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags);
HeapDirtyState dshState;
HeapDirtyState iohState;

View File

@@ -262,8 +262,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(hwInfo, dispatchFlags.useSLM);
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
auto isSystolicPipelineSelectModeChanged = (this->lastSystolicPipelineSelectMode != dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->systolicModeConfigurable;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->pipelineSupportFlags.systolicMode;
handlePipelineSelectStateTransition(dispatchFlags);
auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);
this->streamProperties.stateComputeMode.setProperties(requiresCoherency, dispatchFlags.numGrfRequired,
@@ -271,8 +271,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
csrSizeRequestFlags.systolicPipelineSelectMode = isSystolicPipelineSelectModeChanged;
csrSizeRequestFlags.activePartitionsChanged = isProgramActivePartitionConfigRequired();
@@ -1496,4 +1494,19 @@ void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchF
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags) {
if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) {
this->lastMediaSamplerConfig = static_cast<int8_t>(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value);
}
if (streamProperties.pipelineSelect.systolicMode.value != -1) {
this->lastSystolicPipelineSelectMode = !!streamProperties.pipelineSelect.systolicMode.value;
}
csrSizeRequestFlags.mediaSamplerConfigChanged =
(this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired)) && this->pipelineSupportFlags.mediaSamplerDopClockGate;
csrSizeRequestFlags.systolicPipelineSelectMode =
(this->lastSystolicPipelineSelectMode != !!dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->pipelineSupportFlags.systolicMode;
}
} // namespace NEO

View File

@@ -33,12 +33,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::checkPlatformSupportsGpuIdleImplicitFlush;
using BaseClass::checkPlatformSupportsNewResourceImplicitFlush;
using BaseClass::createKernelArgsBufferAllocation;
using BaseClass::csrSizeRequestFlags;
using BaseClass::directSubmission;
using BaseClass::dshState;
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
using BaseClass::getScratchSpaceController;
using BaseClass::handleFrontEndStateTransition;
using BaseClass::handlePipelineSelectStateTransition;
using BaseClass::indirectHeap;
using BaseClass::iohState;
using BaseClass::isBlitterDirectSubmissionEnabled;
@@ -110,6 +112,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::osContext;
using BaseClass::CommandStreamReceiver::ownershipMutex;
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
using BaseClass::CommandStreamReceiver::pipelineSupportFlags;
using BaseClass::CommandStreamReceiver::postSyncWriteOffset;
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize;
@@ -118,7 +121,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::scratchSpaceController;
using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator;
using BaseClass::CommandStreamReceiver::systolicModeConfigurable;
using BaseClass::CommandStreamReceiver::tagAddress;
using BaseClass::CommandStreamReceiver::taskCount;
using BaseClass::CommandStreamReceiver::taskLevel;