From 3f7269d401ddf490236b5be914d5a0a4e72af42a Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 6 Jul 2023 14:12:49 +0000 Subject: [PATCH] fix: make sip state programing once for all level zero command queues Related-To: NEO-7828 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdqueue/cmdqueue_hw.h | 3 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 19 +++--- .../sources/cmdqueue/test_cmdqueue_3.cpp | 3 + .../sources/debugger/test_l0_debugger_1.cpp | 58 +++++++++++++++++++ .../command_stream_receiver_hw_1_tests.cpp | 5 +- .../command_stream/command_stream_receiver.h | 8 +++ .../command_stream_receiver_hw_base.inl | 4 +- .../command_stream_receiver_tests.cpp | 10 ++++ 8 files changed, 96 insertions(+), 14 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index fd42ded9b3..b5ce518a85 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -68,7 +68,8 @@ struct CommandQueueHw : public CommandQueueImp { Device *device, bool debugEnabled, bool programActivePartitionConfig, - bool performMigration); + bool performMigration, + bool sipSent); inline bool isNEODebuggerActive(Device *device); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 4fa15d8980..2faddb6867 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -83,7 +83,8 @@ ze_result_t CommandQueueHw::executeCommandLists( device, NEO::Debugger::isDebugEnabled(internalUsage), csr->isProgramActivePartitionConfigRequired(), - performMigration}; + performMigration, + csr->getSipSentFlag()}; ctx.globalInit |= ctx.isDebugEnabled && !this->commandQueueDebugCmdsProgrammed && (neoDevice->getSourceLevelDebugger() || device->getL0Debugger()); this->startingCmdBuffer = &this->commandStream; @@ -498,12 +499,13 @@ CommandQueueHw::CommandListExecutionContext::CommandListExecution Device *device, bool debugEnabled, bool programActivePartitionConfig, - bool performMigration) : preemptionMode{contextPreemptionMode}, - statePreemption{contextPreemptionMode}, - isPreemptionModeInitial{contextPreemptionMode == NEO::PreemptionMode::Initial}, - isDebugEnabled{debugEnabled}, - isProgramActivePartitionConfigRequired{programActivePartitionConfig}, - isMigrationRequested{performMigration} { + bool performMigration, + bool sipSent) : preemptionMode{contextPreemptionMode}, + statePreemption{contextPreemptionMode}, + isPreemptionModeInitial{contextPreemptionMode == NEO::PreemptionMode::Initial}, + isDebugEnabled{debugEnabled}, + isProgramActivePartitionConfigRequired{programActivePartitionConfig}, + isMigrationRequested{performMigration} { constexpr size_t residencyContainerSpaceForPreemption = 2; constexpr size_t residencyContainerSpaceForTagWrite = 1; @@ -514,7 +516,7 @@ CommandQueueHw::CommandListExecutionContext::CommandListExecution this->isDevicePreemptionModeMidThread = device->getDevicePreemptionMode() == NEO::PreemptionMode::MidThread; this->stateSipRequired = (this->isPreemptionModeInitial && this->isDevicePreemptionModeMidThread) || - this->isNEODebuggerActive(device); + (!sipSent && this->isNEODebuggerActive(device)); if (this->isDevicePreemptionModeMidThread) { this->spaceForResidency += residencyContainerSpaceForPreemption; @@ -916,6 +918,7 @@ void CommandQueueHw::programStateSip(bool isStateSipRequired, NEO } NEO::Device *neoDevice = this->device->getNEODevice(); NEO::PreemptionHelper::programStateSip(cmdStream, *neoDevice, this->csr->getLogicalStateHelper(), &this->csr->getOsContext()); + this->csr->setSipSentFlag(true); } template diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index d38670bf5e..ca1641bb30 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -844,6 +844,7 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWithIndirectAccessWhenExecuti device, false, csr->isProgramActivePartitionConfigRequired(), + false, false}; ctx.hasIndirectAccess = true; @@ -869,6 +870,7 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCtxWitNohIndirectAccessWhenExecu device, false, csr->isProgramActivePartitionConfigRequired(), + false, false}; ctx.hasIndirectAccess = false; @@ -895,6 +897,7 @@ HWTEST2_F(CommandQueueIndirectAllocations, givenCommandQueueWhenHandleIndirectAl device, false, csr->isProgramActivePartitionConfigRequired(), + false, false}; std::unique_lock lock; diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index e3ac0add44..ac38b3fdc8 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -156,6 +156,64 @@ HWTEST_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledWhenCommandL commandQueue->destroy(); } +HWTEST_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledWhenTwoCommandQueuesExecuteCommandListThenSipIsDispatchedOncePerContext) { + using STATE_SIP = typename FamilyType::STATE_SIP; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + + auto &defaultEngine = neoDevice->getDefaultEngine(); + + defaultEngine.commandStreamReceiver->setPreemptionMode(NEO::PreemptionMode::ThreadGroup); + + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, defaultEngine.commandStreamReceiver, &queueDesc, false, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto commandQueue2 = whiteboxCast(CommandQueue::create(productFamily, device, defaultEngine.commandStreamReceiver, &queueDesc, false, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue2); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto engineGroupType = device->getGfxCoreHelper().getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), neoDevice->getHardwareInfo()); + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)->toHandle()}; + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + + uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->close(); + + returnValue = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto usedSpaceAfter = commandQueue->commandStream.getUsed(); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + commandQueue->commandStream.getCpuBase(), + usedSpaceAfter)); + + auto stateSipCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(1u, stateSipCmds.size()); + + returnValue = commandQueue2->executeCommandLists(numCommandLists, commandLists, nullptr, true); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto usedSpaceAfter2 = commandQueue2->commandStream.getUsed(); + + cmdList.clear(); + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + commandQueue2->commandStream.getCpuBase(), + usedSpaceAfter2)); + + stateSipCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, stateSipCmds.size()); + + commandList->destroy(); + commandQueue->destroy(); + commandQueue2->destroy(); +} + using Gen12Plus = IsAtLeastGfxCore; HWTEST2_P(L0DebuggerParameterizedTests, givenDebuggerWhenAppendingKernelToCommandListThenBindlessSurfaceStateForDebugSurfaceIsProgrammedAtOffsetZero, Gen12Plus) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index b26fdc8a62..f3a0d242de 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -131,7 +131,7 @@ HWTEST_F(UltCommandStreamReceiverTest, whenGetCmdSizeForPerDssBackedBufferIsCall } } -HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDebuggerIsActiveWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsIncluded) { +HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDebuggerIsActiveWhenGetRequiredStateSipCmdSizeIsCalledThenStateSipCmdSizeIsNotIncluded) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -147,8 +147,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDeb commandStreamReceiver.isStateSipSent = true; auto sizeWithSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); - auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, commandStreamReceiver.isRcs()); - EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true) - debugger->sbaTrackingSize); + EXPECT_EQ(0u, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true) - debugger->sbaTrackingSize); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingCmdSize) { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 4013205850..13d8605225 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -383,6 +383,14 @@ class CommandStreamReceiver { isPreambleSent = value; } + bool getSipSentFlag() const { + return isStateSipSent; + } + + void setSipSentFlag(bool value) { + isStateSipSent = value; + } + PreemptionMode getPreemptionMode() const { return lastPreemptionMode; } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 565a82e656..6e4c6c5ac7 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -899,7 +899,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat if (device.getDebugger()) { size += device.getDebugger()->getSbaTrackingCommandsSize(NEO::Debugger::SbaAddresses::trackedAddressCount); } - if (!this->isStateSipSent || device.getDebugger()) { + if (!getSipSentFlag()) { size += PreemptionHelper::getRequiredStateSipCmdSize(device, isRcs()); } size += MemorySynchronizationCommands::getSizeForSingleBarrier(false); @@ -1004,7 +1004,7 @@ template inline void CommandStreamReceiverHw::programStateSip(LinearStream &cmdStream, Device &device) { if (!this->isStateSipSent) { PreemptionHelper::programStateSip(cmdStream, device, logicalStateHelper.get(), this->osContext); - this->isStateSipSent = true; + setSipSentFlag(true); } } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 5d4881fd82..61347d3507 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -4140,6 +4140,12 @@ HWTEST2_F(CommandStreamReceiverHwTest, auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; + EXPECT_FALSE(commandStreamReceiver.getSipSentFlag()); + commandStreamReceiver.setSipSentFlag(true); + EXPECT_TRUE(commandStreamReceiver.getSipSentFlag()); + commandStreamReceiver.setSipSentFlag(false); + EXPECT_FALSE(commandStreamReceiver.getSipSentFlag()); + commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), immediateFlushTaskFlags, *pDevice); auto sipAllocation = NEO::SipKernel::getSipKernel(*pDevice).getSipAllocation(); @@ -4151,6 +4157,8 @@ HWTEST2_F(CommandStreamReceiverHwTest, auto stateSipCmd = hwParserCsr.getCommand(); ASSERT_NE(nullptr, stateSipCmd); + EXPECT_TRUE(commandStreamReceiver.getSipSentFlag()); + size_t usedSize = commandStreamReceiver.commandStream.getUsed(); commandStreamReceiver.flushImmediateTask(commandStream, commandStream.getUsed(), @@ -4162,5 +4170,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, stateSipCmd = hwParserCsr.getCommand(); EXPECT_EQ(nullptr, stateSipCmd); + EXPECT_TRUE(commandStreamReceiver.getSipSentFlag()); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(sipAllocation)); }