From d15eed035b1f693dbe20048d76d7e008e230ea19 Mon Sep 17 00:00:00 2001 From: Joshua Santosh Ranjan Date: Wed, 10 Nov 2021 13:26:03 +0000 Subject: [PATCH] Metrics Restore addressOffsetCCSOffset after query programming Related-To: LOCI-2711 Signed-off-by: Joshua Santosh Ranjan --- level_zero/core/source/cmdlist/cmdlist_hw.h | 1 + .../core/source/cmdlist/cmdlist_hw_base.inl | 3 ++ .../cmdlist/cmdlist_hw_xehp_and_later.inl | 11 ++++ level_zero/core/source/cmdlist/cmdlist_imp.h | 1 + .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + ...cmdlist_append_multipartition_prologue.cpp | 52 +++++++++++++++++++ .../tools/source/metrics/metric_query_imp.cpp | 10 +++- 7 files changed, 77 insertions(+), 2 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 8849939261..62bad2d7ee 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -153,6 +153,7 @@ struct CommandListCoreFamily : CommandListImp { size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; void appendMultiPartitionPrologue(uint32_t partitionDataSize) override; + void appendMultiPartitionEpilogue() override; ze_result_t reserveSpace(size_t size, void **ptr) override; ze_result_t reset() override; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index da1abd0a86..eb6f6bc578 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -178,6 +178,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z template void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t partitionDataSize) {} +template +void CommandListCoreFamily::appendMultiPartitionEpilogue() {} + template void CommandListCoreFamily::appendComputeBarrierCommand() { size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index db6a957d5e..fd7dd68643 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -318,6 +318,17 @@ void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t true); } +template +void CommandListCoreFamily::appendMultiPartitionEpilogue() { + + const size_t estimatedSizeRequired = sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); + increaseCommandStreamSpace(estimatedSizeRequired); + NEO::EncodeSetMMIO::encodeIMM(commandContainer, + NEO::PartitionRegisters::addressOffsetCCSOffset, + CommonConstants::partitionAddressOffset, + true); +} + template void CommandListCoreFamily::appendComputeBarrierCommand() { NEO::PipeControlArgs args = createBarrierFlags(); diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index f89cb7b5a3..c2ae4df116 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -25,6 +25,7 @@ struct CommandListImp : CommandList { ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; virtual void appendMultiPartitionPrologue(uint32_t partitionDataSize) = 0; + virtual void appendMultiPartitionEpilogue() = 0; protected: ~CommandListImp() override = default; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 11dbd6556c..f856b1ecc8 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -327,6 +327,7 @@ struct MockCommandList : public CommandList { ze_command_list_flags_t flags)); ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionPrologue, (uint32_t partitionDataSize)); + ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionEpilogue, (void)); uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp index b02c693183..f8ce556d2f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp @@ -79,6 +79,58 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh auto result = commandList->close(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } +using MultiPartitionEpilogueTest = Test; +HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) { + + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); + auto &commandContainer = commandList->commandContainer; + + ASSERT_NE(nullptr, commandContainer.getCommandStream()); + auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); + + CommandListImp *cmdListImp = static_cast(commandList.get()); + cmdListImp->appendMultiPartitionEpilogue(); + + auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + auto itorPc = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorPc); + + auto lriCmdPc = genCmdCast(*itorPc); + ASSERT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, static_cast(lriCmdPc->getRegisterOffset())); + ASSERT_EQ(static_cast(lriCmdPc->getDataDword()), CommonConstants::partitionAddressOffset); + ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true); + + auto result = commandList->close(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); +} + +HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) { + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); + auto &commandContainer = commandList->commandContainer; + + ASSERT_NE(nullptr, commandContainer.getCommandStream()); + auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); + + CommandListImp *cmdListImp = static_cast(commandList.get()); + cmdListImp->appendMultiPartitionEpilogue(); + + auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); + ASSERT_EQ(usedSpaceAfter, usedSpaceBefore); + + auto result = commandList->close(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); +} } // namespace ult } // namespace L0 diff --git a/level_zero/tools/source/metrics/metric_query_imp.cpp b/level_zero/tools/source/metrics/metric_query_imp.cpp index 52b4e3782d..df4bdeb3ac 100644 --- a/level_zero/tools/source/metrics/metric_query_imp.cpp +++ b/level_zero/tools/source/metrics/metric_query_imp.cpp @@ -773,6 +773,7 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_ const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize; static_cast(commandList).appendMultiPartitionPrologue(static_cast(allocationSizeForSubDevice)); void *buffer = nullptr; + bool gpuCommandStatus = true; // Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands, // so only one of the sub-device contexts will be used to append to command list. @@ -817,10 +818,15 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_ // Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for // each query object in metrics library, so that get data works properly. - if (!metricLibrarySubDevice.getGpuCommands(commandBuffer)) { - return ZE_RESULT_ERROR_UNKNOWN; + gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer); + if (!gpuCommandStatus) { + break; } } + static_cast(commandList).appendMultiPartitionEpilogue(); + if (!gpuCommandStatus) { + return ZE_RESULT_ERROR_UNKNOWN; + } // Write gpu commands for sub device index 0. } else {