diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index f720338a45..a5d740d9cd 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -653,9 +653,9 @@ size_t CommandQueueHw::estimateLinearStreamSizeComplementary( linearStreamSizeEstimate += estimatePipelineSelectCmdSize(); if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled() || this->stateBaseAddressTracking) { - auto streamPropertiesCopy = csr->getStreamProperties(); + auto streamPropertiesCopy = this->csr->getStreamProperties(); bool frontEndStateDirtyCopy = ctx.frontEndStateDirty; - bool gpgpuEnabledCopy = csr->getPreambleSetFlag(); + bool gpgpuEnabledCopy = this->csr->getPreambleSetFlag(); bool baseAdresStateDirtyCopy = ctx.gsbaStateDirty; for (uint32_t i = 0; i < numCommandLists; i++) { auto cmdList = CommandList::fromHandle(phCommandLists[i]); @@ -1194,11 +1194,10 @@ size_t CommandQueueHw::estimateScmCmdSizeForMultipleCommandLists( size_t estimatedSize = 0; bool isRcs = this->getCsr()->isRcs(); - size_t singleScmCmdSize = NEO::EncodeComputeMode::getCmdSizeForComputeMode(device->getNEODevice()->getRootDeviceEnvironment(), false, isRcs); csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode); if (csrStateCopy.stateComputeMode.isDirty()) { - estimatedSize += singleScmCmdSize; + estimatedSize = NEO::EncodeComputeMode::getCmdSizeForComputeMode(device->getNEODevice()->getRootDeviceEnvironment(), false, isRcs); } csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode); @@ -1227,6 +1226,7 @@ void CommandQueueHw::programRequiredStateComputeModeForCommandLis bool isRcs = this->getCsr()->isRcs(); NEO::EncodeComputeMode::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs, false, device->getNEODevice()->getRootDeviceEnvironment(), isRcs, this->getCsr()->getDcFlushSupport(), nullptr); + this->csr->setStateComputeModeDirty(false); } csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode); } diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index ffb178878e..2d462a8b24 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -593,6 +593,7 @@ void CmdListThreadArbitrationFixture::testBody() { auto &commandListStream = *commandList->commandContainer.getCommandStream(); auto &cmdQueueStream = commandQueue->commandStream; + auto queueCsr = commandQueue->getCsr(); GenCmdList cmdList; std::vector stateComputeModeList; @@ -622,11 +623,15 @@ void CmdListThreadArbitrationFixture::testBody() { stateComputeModeList.clear(); commandList->close(); + EXPECT_TRUE(queueCsr->getStateComputeModeDirty()); + sizeBefore = cmdQueueStream.getUsed(); result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); sizeAfter = cmdQueueStream.getUsed(); + EXPECT_FALSE(queueCsr->getStateComputeModeDirty()); + EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value); currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore); @@ -850,6 +855,7 @@ void CmdListLargeGrfFixture::testBody() { auto &cmdlistRequiredState = commandList->getRequiredStreamState(); auto &cmdListFinalState = commandList->getFinalStreamState(); auto &csrState = commandQueue->csr->getStreamProperties(); + auto queueCsr = commandQueue->getCsr(); auto commandListHandle = commandList->toHandle(); @@ -884,11 +890,15 @@ void CmdListLargeGrfFixture::testBody() { stateComputeModeList.clear(); commandList->close(); + EXPECT_TRUE(queueCsr->getStateComputeModeDirty()); + sizeBefore = cmdQueueStream.getUsed(); result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); sizeAfter = cmdQueueStream.getUsed(); + EXPECT_FALSE(queueCsr->getStateComputeModeDirty()); + EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value); currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore); diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index 7de4dc1615..523dc9ee36 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -143,6 +143,7 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.isStateSipSent = true; commandStreamReceiver.lastPreemptionMode = pDevice->getPreemptionMode(); commandStreamReceiver.setMediaVFEStateDirty(false); + commandStreamReceiver.stateComputeModeDirty = false; auto gmmHelper = pDevice->getGmmHelper(); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto mocsIndex = gfxCoreHelper.getMocsIndex(*gmmHelper, true, isL1CacheEnabled); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 0ed5cd04df..726487ca49 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -534,6 +534,7 @@ void CommandStreamReceiver::initProgrammingFlags() { bindingTableBaseAddressRequired = true; mediaVfeStateDirty = true; lastVmeSubslicesConfig = false; + stateComputeModeDirty = true; lastSentL3Config = 0; lastMediaSamplerConfig = -1; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 8341e0bcc2..f6b9550f48 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -159,10 +159,13 @@ class CommandStreamReceiver { void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; } - bool getMediaVFEStateDirty() { return mediaVfeStateDirty; } + bool getMediaVFEStateDirty() const { return mediaVfeStateDirty; } void setGSBAStateDirty(bool dirty) { GSBAStateDirty = dirty; } - bool getGSBAStateDirty() { return GSBAStateDirty; } + bool getGSBAStateDirty() const { return GSBAStateDirty; } + + void setStateComputeModeDirty(bool dirty) { stateComputeModeDirty = dirty; } + bool getStateComputeModeDirty() const { return stateComputeModeDirty; } void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize); GraphicsAllocation *getScratchAllocation(); @@ -507,6 +510,7 @@ class CommandStreamReceiver { bool bindingTableBaseAddressRequired = false; bool heapStorageRequiresRecyclingTag = false; bool mediaVfeStateDirty = true; + bool stateComputeModeDirty = true; bool lastVmeSubslicesConfig = false; bool timestampPacketWriteEnabled = false; bool staticWorkPartitioningEnabled = false; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ceeded3109..e08eae7cc3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -804,6 +804,7 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream EncodeComputeMode::programComputeModeCommandWithSynchronization( stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, hasSharedHandles(), this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport, logicalStateHelper.get()); + this->setStateComputeModeDirty(false); } } diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 1b9bd0cda0..f44455d17e 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -124,6 +124,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::sbaSupportFlags; using BaseClass::CommandStreamReceiver::scratchSpaceController; + using BaseClass::CommandStreamReceiver::stateComputeModeDirty; using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::tagAddress; using BaseClass::CommandStreamReceiver::taskCount; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index fbc91b8f5d..dbc84a46e5 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -198,6 +198,7 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) { EXPECT_FALSE(csr.isPreambleSent); EXPECT_FALSE(csr.GSBAFor32BitProgrammed); EXPECT_TRUE(csr.mediaVfeStateDirty); + EXPECT_TRUE(csr.stateComputeModeDirty); EXPECT_FALSE(csr.lastVmeSubslicesConfig); EXPECT_EQ(0u, csr.lastSentL3Config); EXPECT_EQ(-1, csr.lastMediaSamplerConfig); @@ -2999,3 +3000,28 @@ HWTEST2_F(CommandStreamReceiverHwTest, ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress); EXPECT_EQ(nullptr, hwParserCsr.cmdBindingTableBaseAddress); } + +HWTEST2_F(CommandStreamReceiverHwTest, + givenStateComputeModeDirtyWhenFlushingFirstTimeThenCleanDirtyFlagToDispatchStateComputeMode, + IsAtLeastXeHpCore) { + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + EXPECT_TRUE(commandStreamReceiver.getStateComputeModeDirty()); + + commandStreamReceiver.flushTask(commandStream, + 0, + &dsh, + &ioh, + &ssh, + taskLevel, + flushTaskFlags, + *pDevice); + EXPECT_FALSE(commandStreamReceiver.getStateComputeModeDirty()); + + HardwareParse hwParserCsr; + hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); + hwParserCsr.findHardwareCommands(); + auto scmCmd = hwParserCsr.getCommand(); + EXPECT_NE(nullptr, scmCmd); +}