From 96c668d5955e347c4c3815af41e6d1f7192113bb Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Thu, 21 Apr 2022 17:57:54 +0000 Subject: [PATCH] L0 Debugger - Support for debugging immediate cmd lists - add debugger support to imm cmd lists - add debugger support to flushTask Related-To: NEO-6845 Signed-off-by: Mateusz Hoppe --- .../source/cmdlist/cmdlist_hw_immediate.inl | 6 ++ .../sources/debugger/test_l0_debugger_2.cpp | 102 +++++++++++++++--- .../command_stream_receiver_hw_1_tests.cpp | 2 +- .../command_stream_receiver_hw_base.inl | 28 +++-- 4 files changed, 114 insertions(+), 24 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index b595376e35..bd3bb6e611 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -114,6 +114,12 @@ ze_result_t CommandListCoreFamilyImmediate::executeCommandListImm ssh = this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH); } + if (this->device->getL0Debugger()) { + UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage)); + this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId())); + this->csr->makeResident(*this->device->getDebugSurface()); + } + auto completionStamp = this->csr->flushTask( *commandStream, commandStreamStart, diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp index ff795edeef..a464e2479c 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_2.cpp @@ -5,7 +5,9 @@ * */ +#include "shared/source/helpers/preamble.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/image/image_hw.h" @@ -18,7 +20,7 @@ namespace ult { using L0DebuggerTest = Test; -struct L0DebuggerInternalUsageTest : public L0DebuggerTest { +struct L0DebuggerWithBlitterTest : public L0DebuggerTest { void SetUp() override { VariableBackup backupHwInfo(defaultHwInfo.get()); defaultHwInfo->capabilityTable.blitterOperationsSupported = true; @@ -26,7 +28,7 @@ struct L0DebuggerInternalUsageTest : public L0DebuggerTest { } }; -HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionEnabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { +HWTEST_F(L0DebuggerWithBlitterTest, givenFlushTaskSubmissionEnabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restorer; @@ -55,7 +57,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionEnabledWhenCommand commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionDisabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsCalled) { +HWTEST_F(L0DebuggerWithBlitterTest, givenFlushTaskSubmissionDisabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; DebugManagerStateRestore restorer; @@ -84,7 +86,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionDisabledWhenComman commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenDebuggerLogsDisabledWhenCommandListIsSynchronizedThenSbaAddressesAreNotPrinted) { +HWTEST_F(L0DebuggerWithBlitterTest, givenDebuggerLogsDisabledWhenCommandListIsSynchronizedThenSbaAddressesAreNotPrinted) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.DebuggerLogBitmask.set(0); @@ -104,7 +106,10 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenDebuggerLogsDisabledWhenCommandListIs commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenImmediateCommandListWhenExecutingWithFlushTaskThenSipIsInstalledAndDebuggerAllocationsAreResident) { + using STATE_SIP = typename FamilyType::STATE_SIP; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -112,15 +117,58 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForIm ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; + + auto &csr = neoDevice->getUltCommandStreamReceiver(); + csr.storeMakeResidentAllocations = true; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue); + EXPECT_TRUE(commandList->isFlushTaskSubmissionEnabled); + EXPECT_EQ(&csr, commandList->csr); + + csr.lastFlushedCommandStream = nullptr; auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, csr.lastFlushedCommandStream); + + auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(commandList->csr->getOsContext().getContextId()); + auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation(); + auto debugSurface = device->getDebugSurface(); + + EXPECT_TRUE(csr.isMadeResident(sbaBuffer)); + EXPECT_TRUE(csr.isMadeResident(sipIsa)); + EXPECT_TRUE(csr.isMadeResident(debugSurface)); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, commandList->csr->getCS().getCpuBase(), commandList->csr->getCS().getUsed())); + + const auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + if (hwHelper.isSipWANeeded(hwInfo)) { + + auto miLoadImm = findAll(cmdList.begin(), cmdList.end()); + + auto globalSipFound = 0u; + for (size_t i = 0; i < miLoadImm.size(); i++) { + MI_LOAD_REGISTER_IMM *miLoad = genCmdCast(*miLoadImm[i]); + ASSERT_NE(nullptr, miLoad); + + if (miLoad->getRegisterOffset() == NEO::GlobalSipRegister::registerOffset) { + globalSipFound++; + } + } + EXPECT_NE(0u, globalSipFound); + } else { + auto sipItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sipItor); + } commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledWithInternalCommandListForImmediateWhenAppendLaunchKernelThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenInternalUsageImmediateCommandListWhenExecutingThenDebuggerAllocationsAreNotResident) { + using STATE_SIP = typename FamilyType::STATE_SIP; + Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -128,15 +176,37 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledWithI ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount{1, 1, 1}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + // Internal command list must not have flush task enabled + EXPECT_FALSE(commandList->isFlushTaskSubmissionEnabled); + + auto &csr = reinterpret_cast &>(*commandList->csr); + csr.storeMakeResidentAllocations = true; + auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); + auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(commandList->csr->getOsContext().getContextId()); + auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation(); + auto debugSurface = device->getDebugSurface(); + + EXPECT_FALSE(csr.isMadeResident(sbaBuffer)); + EXPECT_FALSE(csr.isMadeResident(sipIsa)); + EXPECT_FALSE(csr.isMadeResident(debugSurface)); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, commandList->csr->getCS().getCpuBase(), commandList->csr->getCS().getUsed())); + + auto sipItor = find(cmdList.begin(), cmdList.end()); + ASSERT_EQ(cmdList.end(), sipItor); + commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -153,7 +223,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForIm commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { Mock<::L0::Kernel> kernel; DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); @@ -170,7 +240,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForI commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -186,7 +256,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForIm commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); @@ -202,7 +272,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForI commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -223,7 +293,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForIm commandList->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForRegularCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledForRegularCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -256,7 +326,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForRe commandQueue->destroy(); } -HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { +HWTEST_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); @@ -275,7 +345,7 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForI commandList->destroy(); } -HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { +HWTEST2_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -293,7 +363,7 @@ HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledComm commandList->destroy(); } -HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { +HWTEST2_F(L0DebuggerWithBlitterTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsAtLeastSkl) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); @@ -311,7 +381,7 @@ HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledCom commandList->destroy(); } -HWTEST2_F(L0DebuggerInternalUsageTest, givenDebuggingEnabledWhenInternalCmdQIsUsedThenDebuggerPathsAreNotExecuted, IsAtLeastSkl) { +HWTEST2_F(L0DebuggerWithBlitterTest, givenDebuggingEnabledWhenInternalCmdQIsUsedThenDebuggerPathsAreNotExecuted, IsAtLeastSkl) { ze_command_queue_desc_t queueDesc = {}; std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index bac4ca4ce7..3b3545cb36 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -142,7 +142,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenSentStateSipFlagSetAndSourceLevelDeb auto sizeWithSourceKernelDebugging = commandStreamReceiver.getRequiredCmdStreamSize(dispatchFlags, *pDevice); auto sizeForStateSip = PreemptionHelper::getRequiredStateSipCmdSize(*pDevice, commandStreamReceiver.isRcs()); - EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true)); + EXPECT_EQ(sizeForStateSip, sizeWithSourceKernelDebugging - sizeWithoutSourceKernelDebugging - PreambleHelper::getKernelDebuggingCommandsSize(true) - debugger->sbaTrackingSize); } HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPolicyChangedWhenEstimatingFlushTaskSizeThenResultDependsOnPolicyProgrammingCmdSize) { diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index bf4b662f10..873a385b69 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -362,6 +362,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics; } + bool debuggingEnabled = device.getDebugger() != nullptr; bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false; auto memoryCompressionState = lastMemoryCompressionState; @@ -414,6 +415,12 @@ CompletionStamp CommandStreamReceiverHw::flushTask( programAdditionalStateBaseAddress(commandStreamCSR, cmd, device); + if (debuggingEnabled && !device.getDebugger()->isLegacy()) { + NEO::Debugger::SbaAddresses sbaAddresses = {}; + NEO::EncodeStateBaseAddress::setSbaAddressesForDebugger(sbaAddresses, cmd); + device.getDebugger()->captureStateBaseAddress(commandStreamCSR, sbaAddresses); + } + if (sshDirty) { bindingTableBaseAddressRequired = true; } @@ -424,14 +431,15 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } EncodeWA::encodeAdditionalPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs, false, hwInfo, isRcs()); - addPipeControlBeforeStateSip(commandStreamCSR, device); - programStateSip(commandStreamCSR, device); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { collectStateBaseAddresPatchInfo(commandStream.getGraphicsAllocation()->getGpuAddress(), stateBaseAddressCmdOffset, dsh, ioh, ssh, newGSHbase); } } + addPipeControlBeforeStateSip(commandStreamCSR, device); + programStateSip(commandStreamCSR, device); + DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel); if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { @@ -500,11 +508,12 @@ CompletionStamp CommandStreamReceiverHw::flushTask( makeResident(*preemptionAllocation); } - if (dispatchFlags.preemptionMode == PreemptionMode::MidThread || sourceLevelDebuggerActive) { + if (dispatchFlags.preemptionMode == PreemptionMode::MidThread || debuggingEnabled) { makeResident(*SipKernel::getSipKernel(device).getSipAllocation()); - if (debugSurface) { - makeResident(*debugSurface); - } + } + + if (sourceLevelDebuggerActive && debugSurface) { + makeResident(*debugSurface); } if (experimentalCmdBuffer.get() != nullptr) { @@ -788,6 +797,10 @@ template size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device) { size_t size = getRequiredCmdSizeForPreamble(device); size += getRequiredStateBaseAddressSize(device); + + if (device.getDebugger()) { + size += device.getDebugger()->getSbaTrackingCommandsSize(NEO::Debugger::SbaAddresses::trackedAddressCount); + } if (!this->isStateSipSent || device.getDebugger()) { size += PreemptionHelper::getRequiredStateSipCmdSize(device, isRcs()); } @@ -895,7 +908,8 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForPreemption(const template inline void CommandStreamReceiverHw::programStateSip(LinearStream &cmdStream, Device &device) { - if (!this->isStateSipSent || device.isDebuggerActive()) { + bool debuggingEnabled = device.getDebugger() != nullptr; + if (!this->isStateSipSent || debuggingEnabled) { PreemptionHelper::programStateSip(cmdStream, device); this->isStateSipSent = true; }