diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index af54148820..17efb4e062 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -48,6 +48,11 @@ void CommandListCoreFamilyImmediate::updateDispatchFlagsWithRequi dispatchFlags.numGrfRequired = (requiredStateComputeMode.largeGrfMode.value == 1) ? GrfConfig::LargeGrfNumber : GrfConfig::DefaultGrfNumber; dispatchFlags.threadArbitrationPolicy = requiredStateComputeMode.threadArbitrationPolicy.value; + + const auto &requiredPipelineSelect = this->requiredStreamState.pipelineSelect; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = requiredPipelineSelect.systolicMode.value != -1 + ? !!requiredPipelineSelect.systolicMode.value + : false; } template diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 742222da43..3e66cef892 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -92,6 +92,12 @@ struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixtur template void testBody(); + template + void testBodyShareStateRegularImmediate(); + + template + void testBodyShareStateImmediateRegular(); + DebugManagerStateRestore restorer; }; diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index 4eadeabe8d..214f2c3dbc 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -6,6 +6,7 @@ */ #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" @@ -365,5 +366,215 @@ void CmdListPipelineSelectStateFixture::testBody() { } } +template +void CmdListPipelineSelectStateFixture::testBodyShareStateRegularImmediate() { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + + const ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + void *currentBuffer = nullptr; + + auto ®ularCmdlistRequiredState = commandList->getRequiredStreamState(); + auto ®ularCmdListFinalState = commandList->getFinalStreamState(); + auto &csrState = commandQueue->csr->getStreamProperties(); + + auto commandListHandle = commandList->toHandle(); + + auto ®ularCommandListStream = *commandList->commandContainer.getCommandStream(); + auto &cmdQueueStream = commandQueue->commandStream; + + GenCmdList cmdList; + std::vector pipelineSelectList; + size_t sizeBefore = 0; + size_t sizeAfter = 0; + auto result = ZE_RESULT_SUCCESS; + + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1; + + sizeBefore = regularCommandListStream.getUsed(); + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = regularCommandListStream.getUsed(); + + EXPECT_EQ(1, regularCmdlistRequiredState.pipelineSelect.systolicMode.value); + EXPECT_EQ(1, regularCmdListFinalState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(regularCommandListStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); + + cmdList.clear(); + pipelineSelectList.clear(); + commandList->close(); + + sizeBefore = cmdQueueStream.getUsed(); + result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = cmdQueueStream.getUsed(); + + EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, pipelineSelectList.size()); + + auto pipelineSelectCmd = genCmdCast(*pipelineSelectList[0]); + EXPECT_TRUE(NEO::UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd)); + + cmdList.clear(); + pipelineSelectList.clear(); + + auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream(); + EXPECT_EQ(commandQueue->csr, commandListImmediate->csr); + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = ultCsr.commandStream; + + size_t csrUsedBefore = csrStream.getUsed(); + sizeBefore = immediateCmdListStream.getUsed(); + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = immediateCmdListStream.getUsed(); + size_t csrUsedAfter = csrStream.getUsed(); + + auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState(); + auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState(); + + EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value); + EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); + + cmdList.clear(); + pipelineSelectList.clear(); + + EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(csrStream.getCpuBase(), csrUsedBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (csrUsedAfter - csrUsedBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); +} + +template +void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + + const ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + void *currentBuffer = nullptr; + + auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState(); + auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState(); + + auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream(); + + auto &csrState = commandQueue->csr->getStreamProperties(); + + EXPECT_EQ(commandQueue->csr, commandListImmediate->csr); + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = ultCsr.commandStream; + + GenCmdList cmdList; + std::vector pipelineSelectList; + size_t sizeBefore = 0; + size_t sizeAfter = 0; + auto result = ZE_RESULT_SUCCESS; + + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1; + + size_t csrUsedBefore = csrStream.getUsed(); + sizeBefore = immediateCmdListStream.getUsed(); + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = immediateCmdListStream.getUsed(); + size_t csrUsedAfter = csrStream.getUsed(); + + EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value); + EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); + + cmdList.clear(); + pipelineSelectList.clear(); + + EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(csrStream.getCpuBase(), csrUsedBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (csrUsedAfter - csrUsedBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, pipelineSelectList.size()); + + auto pipelineSelectCmd = genCmdCast(*pipelineSelectList[0]); + EXPECT_TRUE(NEO::UnitTestHelper::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd)); + + cmdList.clear(); + pipelineSelectList.clear(); + + auto ®ularCmdlistRequiredState = commandList->getRequiredStreamState(); + auto ®ularCmdListFinalState = commandList->getFinalStreamState(); + + auto commandListHandle = commandList->toHandle(); + + auto ®ularCommandListStream = *commandList->commandContainer.getCommandStream(); + auto &cmdQueueStream = commandQueue->commandStream; + + sizeBefore = regularCommandListStream.getUsed(); + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = regularCommandListStream.getUsed(); + + EXPECT_EQ(1, regularCmdlistRequiredState.pipelineSelect.systolicMode.value); + EXPECT_EQ(1, regularCmdListFinalState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(regularCommandListStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); + + cmdList.clear(); + pipelineSelectList.clear(); + commandList->close(); + + sizeBefore = cmdQueueStream.getUsed(); + result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + sizeAfter = cmdQueueStream.getUsed(); + + EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value); + + currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + currentBuffer, + (sizeAfter - sizeBefore))); + pipelineSelectList = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, pipelineSelectList.size()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 201560d1d3..29f013ac16 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1414,6 +1414,8 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value); } + commandList->close(); + GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, @@ -1597,6 +1599,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr cmdList.clear(); feStateCmds.clear(); + commandList->close(); ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp index 08947b2602..ff0c191f6e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp @@ -536,10 +536,21 @@ HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenIn using CmdListPipelineSelectStateTest = Test; using SystolicSupport = IsAnyProducts; + HWTEST2_F(CmdListPipelineSelectStateTest, givenAppendSystolicKernelToCommandListWhenExecutingCommandListThenPipelineSelectStateIsTrackedCorrectly, SystolicSupport) { testBody(); } +HWTEST2_F(CmdListPipelineSelectStateTest, + givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingSystolicKernelOnBothRegularFirstThenPipelineSelectStateIsNotChanged, SystolicSupport) { + testBodyShareStateRegularImmediate(); +} + +HWTEST2_F(CmdListPipelineSelectStateTest, + givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingSystolicKernelOnBothImmediateFirstThenPipelineSelectStateIsNotChanged, SystolicSupport) { + testBodyShareStateImmediateRegular(); +} + } // namespace ult } // namespace L0 diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index aaf3216143..b73ad4f497 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -724,6 +724,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRe } HWTEST2_F(CommandStreamReceiverFlushTaskTests, GivenPreambleSentAndMediaSamplerRequirementChangedWhenFlushingTaskThenPipelineSelectIsSent, IsAtMostXeHpcCore) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true; commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = 1; flushTask(commandStreamReceiver); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 874a1eccf3..6291537200 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -647,13 +647,15 @@ HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectCom { mockCsr->getStreamProperties().frontEndState = {}; auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); + flags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; + LinearStream commandStream{&memory, sizeof(memory)}; mockCsr->mediaVfeStateDirty = true; mockCsr->programVFEState(commandStream, flags, 10); - auto pCommand = reinterpret_cast(&memory); + auto cfeState = reinterpret_cast(&memory); auto expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*pHwInfo); - EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable()); + EXPECT_EQ(expectedDisableOverdispatch, cfeState->getComputeOverdispatchDisable()); } { auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -661,8 +663,9 @@ HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectCom LinearStream commandStream{&memory, sizeof(memory)}; mockCsr->mediaVfeStateDirty = true; mockCsr->programVFEState(commandStream, flags, 10); - auto pCommand = reinterpret_cast(&memory); - EXPECT_FALSE(pCommand->getComputeOverdispatchDisable()); + auto cfeState = reinterpret_cast(&memory); + + EXPECT_FALSE(cfeState->getComputeOverdispatchDisable()); } } } @@ -1644,3 +1647,97 @@ HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFro EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); commandStreamReceiver.setMediaVFEStateDirty(false); } + +HWTEST_F(UltCommandStreamReceiverTest, givenPipelineSelectStateNotInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) { + auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + commandStreamReceiver.pipelineSupportFlags.systolicMode = false; + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true; + + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false; + commandStreamReceiver.lastMediaSamplerConfig = -1; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true; + commandStreamReceiver.lastMediaSamplerConfig = 0; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false; + commandStreamReceiver.pipelineSupportFlags.systolicMode = true; + + commandStreamReceiver.lastSystolicPipelineSelectMode = false; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); + + commandStreamReceiver.pipelineSupportFlags.systolicMode = false; + commandStreamReceiver.lastSystolicPipelineSelectMode = false; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); + + commandStreamReceiver.pipelineSupportFlags.systolicMode = true; + commandStreamReceiver.lastSystolicPipelineSelectMode = false; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); +} + +HWTEST_F(UltCommandStreamReceiverTest, + givenPipelineSelectStateInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) { + auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + commandStreamReceiver.pipelineSupportFlags.systolicMode = false; + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true; + + commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 1; + commandStreamReceiver.lastMediaSamplerConfig = -1; + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0; + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0; + commandStreamReceiver.lastMediaSamplerConfig = 1; + dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged); + + commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false; + commandStreamReceiver.pipelineSupportFlags.systolicMode = true; + + commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 1; + commandStreamReceiver.lastSystolicPipelineSelectMode = false; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); + + commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); + + commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0; + commandStreamReceiver.lastSystolicPipelineSelectMode = true; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false; + commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode); +} diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index d3e032cfbd..1a6cada359 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -151,6 +151,7 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.lastMediaSamplerConfig = 0; commandStreamReceiver.lastSentUseGlobalAtomics = false; + commandStreamReceiver.streamProperties.pipelineSelect.setProperties(true, false, false, *defaultHwInfo); commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber, hwHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), *defaultHwInfo); commandStreamReceiver.streamProperties.frontEndState.setProperties(false, false, false, -1, *defaultHwInfo); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp index 0ddbe54357..f59684d410 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp @@ -86,14 +86,19 @@ PVCTEST_F(PvcCommandStreamReceiverFlushTaskTests, givenRevisionBAndAboveWhenLast auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily); for (auto &testInput : testInputs) { hwInfo->platform.usRevId = testInput.revId; - commandStreamReceiver.systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(*hwInfo); + hwInfoConfig->fillPipelineSelectPropertiesSupportStructure(commandStreamReceiver.pipelineSupportFlags, *hwInfo); commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.lastMediaSamplerConfig = false; + commandStreamReceiver.lastSystolicPipelineSelectMode = false; + commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = -1; flushTask(commandStreamReceiver); - EXPECT_EQ(testInput.expectedValue, commandStreamReceiver.lastSystolicPipelineSelectMode); - commandStreamReceiver.lastSystolicPipelineSelectMode = false; + if (testInput.expectedValue) { + EXPECT_TRUE(commandStreamReceiver.lastSystolicPipelineSelectMode); + } else { + EXPECT_FALSE(commandStreamReceiver.lastSystolicPipelineSelectMode); + } } } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 32a5532558..176d589aed 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -75,8 +75,8 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi } auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); - this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo); hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo); + hwInfoConfig->fillPipelineSelectPropertiesSupportStructure(pipelineSupportFlags, hwInfo); } CommandStreamReceiver::~CommandStreamReceiver() { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index ea67b5def3..bb34fce71e 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -393,6 +393,7 @@ class CommandStreamReceiver { LinearStream commandStream; StreamProperties streamProperties{}; FrontEndPropertiesSupport feSupportFlags{}; + PipelineSelectPropertiesSupport pipelineSupportFlags{}; // offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte; @@ -476,7 +477,6 @@ class CommandStreamReceiver { bool useGpuIdleImplicitFlush = false; bool lastSentUseGlobalAtomics = false; bool useNotifyEnableForPostSync = false; - bool systolicModeConfigurable = false; }; typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump, diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 0a587eefe3..ac3bb81c42 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -178,6 +178,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const; void createKernelArgsBufferAllocation() override; void handleFrontEndStateTransition(DispatchFlags &dispatchFlags); + void handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags); HeapDirtyState dshState; HeapDirtyState iohState; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 79e692e2a5..d2a7aeffb8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -262,8 +262,8 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto newL3Config = PreambleHelper::getL3Config(hwInfo, dispatchFlags.useSLM); - dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable; - auto isSystolicPipelineSelectModeChanged = (this->lastSystolicPipelineSelectMode != dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->systolicModeConfigurable; + dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->pipelineSupportFlags.systolicMode; + handlePipelineSelectStateTransition(dispatchFlags); auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency); this->streamProperties.stateComputeMode.setProperties(requiresCoherency, dispatchFlags.numGrfRequired, @@ -271,8 +271,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config; csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode; - csrSizeRequestFlags.mediaSamplerConfigChanged = this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired); - csrSizeRequestFlags.systolicPipelineSelectMode = isSystolicPipelineSelectModeChanged; csrSizeRequestFlags.activePartitionsChanged = isProgramActivePartitionConfigRequired(); @@ -1496,4 +1494,19 @@ void CommandStreamReceiverHw::handleFrontEndStateTransition(DispatchF } } +template +void CommandStreamReceiverHw::handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags) { + if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) { + this->lastMediaSamplerConfig = static_cast(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value); + } + if (streamProperties.pipelineSelect.systolicMode.value != -1) { + this->lastSystolicPipelineSelectMode = !!streamProperties.pipelineSelect.systolicMode.value; + } + + csrSizeRequestFlags.mediaSamplerConfigChanged = + (this->lastMediaSamplerConfig != static_cast(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired)) && this->pipelineSupportFlags.mediaSamplerDopClockGate; + csrSizeRequestFlags.systolicPipelineSelectMode = + (this->lastSystolicPipelineSelectMode != !!dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->pipelineSupportFlags.systolicMode; +} + } // namespace NEO diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index bbb4023307..936f7a4bb8 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -33,12 +33,14 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::checkPlatformSupportsGpuIdleImplicitFlush; using BaseClass::checkPlatformSupportsNewResourceImplicitFlush; using BaseClass::createKernelArgsBufferAllocation; + using BaseClass::csrSizeRequestFlags; using BaseClass::directSubmission; using BaseClass::dshState; using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; using BaseClass::handleFrontEndStateTransition; + using BaseClass::handlePipelineSelectStateTransition; using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::isBlitterDirectSubmissionEnabled; @@ -110,6 +112,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::osContext; using BaseClass::CommandStreamReceiver::ownershipMutex; using BaseClass::CommandStreamReceiver::perfCounterAllocator; + using BaseClass::CommandStreamReceiver::pipelineSupportFlags; using BaseClass::CommandStreamReceiver::postSyncWriteOffset; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; @@ -118,7 +121,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::scratchSpaceController; using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired; using BaseClass::CommandStreamReceiver::submissionAggregator; - using BaseClass::CommandStreamReceiver::systolicModeConfigurable; using BaseClass::CommandStreamReceiver::tagAddress; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel;