From 6951ff1a070c718a06c69603ccefd6e0d7c9d32b Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Mon, 7 Jul 2025 12:08:43 +0000 Subject: [PATCH] fix: store whole global timestamp before context timestamp Related-To: NEO-15192 Signed-off-by: Maciej Plewka --- level_zero/core/source/cmdlist/cmdlist_hw.h | 5 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 58 ++++--- .../source/cmdlist/cmdlist_hw_dg2_and_pvc.inl | 4 +- .../cmdlist/cmdlist_hw_skl_to_tgllp.inl | 4 +- .../cmdlist/cmdlist_hw_xe2_hpg_and_later.inl | 37 ++--- .../unit_tests/fixtures/cmdlist_fixture.h | 9 ++ .../unit_tests/fixtures/cmdlist_fixture.inl | 145 +++++++++++++++++- .../sources/cmdlist/test_cmdlist_1.cpp | 130 +++++++++++++++- .../test_cmdlist_append_signal_event.cpp | 43 +++--- .../cmdlist/test_cmdlist_xe2_and_later.cpp | 8 +- 10 files changed, 352 insertions(+), 91 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index b7f49e30ce..18ed4d02ee 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -329,7 +329,10 @@ struct CommandListCoreFamily : public CommandListImp { ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions, size_t &patchIndex); void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize, size_t &patchIndex); void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation); - void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation); + void adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation, bool globalTimestamp); + void writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation); + void pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination); + void writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp); void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation); void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker); void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9845a74943..6e465ee165 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3357,49 +3357,47 @@ void CommandListCoreFamily::programRegionGroupBarrier(Kernel &ker template void CommandListCoreFamily::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation) { - constexpr uint32_t mask = 0xfffffffe; - auto baseAddr = event->getPacketAddress(this->device); + writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(), maskLsb, workloadPartition, copyOperation, true); + writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(), maskLsb, workloadPartition, copyOperation, false); +} - auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(); - auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(); - - void **globalPostSyncCmdBuffer = nullptr; - void **contextPostSyncCmdBuffer = nullptr; - - void *globalPostSyncCmd = nullptr; - void *contextPostSyncCmd = nullptr; - +template +void CommandListCoreFamily::writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp) { + void **postSyncCmdBuffer = nullptr; + void *postSyncCmd = nullptr; if (outTimeStampSyncCmds != nullptr) { - globalPostSyncCmdBuffer = &globalPostSyncCmd; - contextPostSyncCmdBuffer = &contextPostSyncCmd; + postSyncCmdBuffer = &postSyncCmd; } + uint64_t address = ptrOffset(baseAddr, offset); - uint64_t globalAddress = ptrOffset(baseAddr, globalOffset); - uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); + uint32_t registerOffset = isGlobalTimestamp ? RegisterOffsets::globalTimestampLdw : RegisterOffsets::gpThreadTimeRegAddressOffsetLow; + writeTimestamp(commandContainer, registerOffset, address, maskLsb, workloadPartition, postSyncCmdBuffer, copyOperation); + pushTimestampPatch(outTimeStampSyncCmds, offset, postSyncCmd); + adjustWriteKernelTimestamp(address, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation, isGlobalTimestamp); +} +template +void CommandListCoreFamily::writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation) { + constexpr uint32_t mask = 0xfffffffe; if (maskLsb) { - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation); - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation); + NEO::EncodeMathMMIO::encodeBitwiseAndVal( + container, regOffset, mask, address, workloadPartition, postSyncCmdBuffer, copyOperation); } else { - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation); + NEO::EncodeStoreMMIO::encode( + *container.getCommandStream(), regOffset, address, workloadPartition, postSyncCmdBuffer, copyOperation); } +} - if (outTimeStampSyncCmds != nullptr) { +template +void CommandListCoreFamily::pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination) { + if (container) { CommandToPatch ctxCmd; ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem; - - ctxCmd.offset = globalOffset; - ctxCmd.pDestination = globalPostSyncCmd; - outTimeStampSyncCmds->push_back(ctxCmd); - - ctxCmd.offset = contextOffset; - ctxCmd.pDestination = contextPostSyncCmd; - outTimeStampSyncCmds->push_back(ctxCmd); + ctxCmd.offset = offset; + ctxCmd.pDestination = pDestination; + container->push_back(ctxCmd); } - - adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation); } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl b/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl index a5e9a61551..ba40cd042d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl @@ -15,7 +15,7 @@ inline NEO::PreemptionMode CommandListCoreFamily::obtainKernelPre } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, - bool workloadPartition, bool copyOperation) {} +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, + bool workloadPartition, bool copyOperation, bool globalTimestamp) {} } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index f714406497..8193543579 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -36,8 +36,8 @@ size_t CommandListCoreFamily::getReserveSshSize() { } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, - bool workloadPartition, bool copyOperation) {} +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, + bool workloadPartition, bool copyOperation, bool globalTimestamp) {} template bool CommandListCoreFamily::isInOrderNonWalkerSignalingRequired(const Event *event) const { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl index 13aa2401d1..e122dad16a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl @@ -21,37 +21,18 @@ inline NEO::PreemptionMode CommandListCoreFamily::obtainKernelPre } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, - bool workloadPartition, bool copyOperation) { - uint64_t globalHighAddress = globalAddress + sizeof(uint32_t); - uint64_t contextHighAddress = contextAddress + sizeof(uint32_t); - - void **globalPostSyncCmdBuffer = nullptr; - void **contextPostSyncCmdBuffer = nullptr; - - void *globalPostSyncCmd = nullptr; - void *contextPostSyncCmd = nullptr; +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, + bool workloadPartition, bool copyOperation, bool globalTimestamp) { + uint64_t highAddress = address + sizeof(uint32_t); + void **postSyncCmdBuffer = nullptr; + void *postSyncCmd = nullptr; if (outTimeStampSyncCmds != nullptr) { - globalPostSyncCmdBuffer = &globalPostSyncCmd; - contextPostSyncCmdBuffer = &contextPostSyncCmd; + postSyncCmdBuffer = &postSyncCmd; } - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampUn, globalHighAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextHighAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation); - - if (outTimeStampSyncCmds != nullptr) { - CommandToPatch ctxCmd; - ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem; - - ctxCmd.offset = globalHighAddress - baseAddress; - ctxCmd.pDestination = globalPostSyncCmd; - outTimeStampSyncCmds->push_back(ctxCmd); - - ctxCmd.offset = contextHighAddress - baseAddress; - ctxCmd.pDestination = contextPostSyncCmd; - outTimeStampSyncCmds->push_back(ctxCmd); - } + uint32_t registerOffset = globalTimestamp ? RegisterOffsets::globalTimestampUn : RegisterOffsets::gpThreadTimeRegAddressOffsetHigh; + writeTimestamp(commandContainer, registerOffset, highAddress, false, workloadPartition, postSyncCmdBuffer, copyOperation); + pushTimestampPatch(outTimeStampSyncCmds, highAddress - baseAddress, postSyncCmd); } - } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 3d83b84c3b..6093d087a2 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -73,6 +73,15 @@ void validateTimestampRegisters(GenCmdList &cmdList, uint64_t secondStoreRegMemAddress, bool workloadPartition, bool useMask); +template +void validateTimestampLongRegisters(GenCmdList &cmdList, + GenCmdList::iterator &startIt, + uint32_t firstLoadRegisterRegSrcAddress, + uint64_t firstStoreRegMemAddress, + uint32_t secondLoadRegisterRegSrcAddress, + uint64_t secondStoreRegMemAddress, + bool workloadPartition, + bool useMask); struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { void setUp() { diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index 7010083562..7b21921ec9 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -80,7 +80,7 @@ void validateTimestampRegisters(GenCmdList &cmdList, } else { ASSERT_NE(cmdList.end(), itor); auto cmdMem = genCmdCast(*itor); - EXPECT_EQ(RegisterOffsets::globalTimestampUn, cmdMem->getRegisterAddress()); + EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress()); if (workloadPartition) { EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); @@ -89,7 +89,7 @@ void validateTimestampRegisters(GenCmdList &cmdList, } } - itor++; + itor = useMask ? find(itor, cmdList.end()) : find(startIt, cmdList.end()); if (useMask) { { ASSERT_NE(cmdList.end(), itor); @@ -128,7 +128,7 @@ void validateTimestampRegisters(GenCmdList &cmdList, } else { ASSERT_NE(cmdList.end(), itor); auto cmdMem = genCmdCast(*itor); - EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, cmdMem->getRegisterAddress()); + EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress()); if (workloadPartition) { EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); @@ -141,6 +141,145 @@ void validateTimestampRegisters(GenCmdList &cmdList, startIt = itor; } +template +void validateTimestampLongRegisters(GenCmdList &cmdList, + GenCmdList::iterator &startIt, + uint32_t firstLoadRegisterRegSrcAddress, + uint64_t firstStoreRegMemAddress, + uint32_t secondLoadRegisterRegSrcAddress, + uint64_t secondStoreRegMemAddress, + uint32_t thirdLoadRegisterRegSrcAddress, + uint64_t thirdStoreRegMemAddress, + uint32_t fourthLoadRegisterRegSrcAddress, + uint64_t fourthStoreRegMemAddress, + bool workloadPartition, + bool useMask) { + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_MATH = typename FamilyType::MI_MATH; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + constexpr uint32_t mask = 0xfffffffe; + + auto itor = useMask ? find(startIt, cmdList.end()) : find(startIt, cmdList.end()); + if (useMask) { + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress()); + EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + } else { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); + EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + itor++; + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); + EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + + itor = useMask ? find(itor, cmdList.end()) : find(startIt, cmdList.end()); + if (useMask) { + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + cmdMem = genCmdCast(*itor); + EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress()); + EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + } else { + ASSERT_NE(cmdList.end(), itor); + cmdMem = genCmdCast(*itor); + EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); + EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + itor++; + ASSERT_NE(cmdList.end(), itor); + cmdMem = genCmdCast(*itor); + EXPECT_EQ(fourthLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress()); + EXPECT_EQ(fourthStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + itor++; + startIt = itor; +} + template void CmdListPipelineSelectStateFixture::testBody() { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 27fc0e4f03..605102079b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -3013,7 +3013,7 @@ HWTEST_F(CommandListCreateTests, givenCommandListWhenAppendingBarrierWithIncorre EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_ARGUMENT); } -HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) { +HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); @@ -3051,7 +3051,54 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandF EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); } -HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) { +HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::copy, 0u); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + auto baseAddr = event->getGpuAddress(device); + auto contextOffset = event->getContextStartOffset(); + auto globalOffset = event->getGlobalStartOffset(); + EXPECT_EQ(baseAddr, event->getPacketAddress(device)); + + commandList->appendEventForProfilingCopyCommand(event.get(), true); + EXPECT_EQ(1u, event->getPacketsInUse()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset)); +} + +HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostPVC) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; auto commandList = std::make_unique>>(); @@ -3086,6 +3133,85 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandFo EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); } +HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::copy, 0u); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + commandList->appendEventForProfilingCopyCommand(event.get(), false); + + auto contextOffset = event->getContextEndOffset(); + auto globalOffset = event->getGlobalEndOffset(); + auto baseAddr = event->getGpuAddress(device); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); +} + +HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::copy, 0u); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + commandList->appendEventForProfilingCopyCommand(event.get(), false); + + auto contextOffset = event->getContextEndOffset(); + auto globalOffset = event->getGlobalEndOffset(); + auto baseAddr = event->getGpuAddress(device); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset)); +} + HWTEST_F(CommandListCreateTests, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream) { auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::copy, 0u); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 790abe030f..10c2b0ce6b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -857,40 +857,45 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); auto startCmdList = cmdList.begin(); - validateTimestampRegisters(cmdList, - startCmdList, - RegisterOffsets::globalTimestampLdw, globalStartAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, - true, - true); if (UnitTestHelper::timestampRegisterHighAddress()) { uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t); uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t); + validateTimestampLongRegisters(cmdList, + startCmdList, + RegisterOffsets::globalTimestampLdw, globalStartAddress, + RegisterOffsets::globalTimestampUn, globalStartAddressHigh, + RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh, + true, + true); + } else { validateTimestampRegisters(cmdList, startCmdList, - RegisterOffsets::globalTimestampUn, globalStartAddressHigh, - RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh, + RegisterOffsets::globalTimestampLdw, globalStartAddress, + RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress, true, - false); + true); } - validateTimestampRegisters(cmdList, - startCmdList, - RegisterOffsets::globalTimestampLdw, globalEndAddress, - RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, - true, - true); - if (UnitTestHelper::timestampRegisterHighAddress()) { uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t); uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t); + validateTimestampLongRegisters(cmdList, + startCmdList, + RegisterOffsets::globalTimestampLdw, globalEndAddress, + RegisterOffsets::globalTimestampUn, globalEndAddressHigh, + RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh, + true, + true); + } else { validateTimestampRegisters(cmdList, startCmdList, - RegisterOffsets::globalTimestampUn, globalEndAddressHigh, - RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh, + RegisterOffsets::globalTimestampLdw, globalEndAddress, + RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress, true, - false); + true); } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp index bbb1ed33ae..2440393288 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xe2_and_later.cpp @@ -105,8 +105,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture { ASSERT_EQ(6u, srmCommands.size()); validateSrmCommand(reinterpret_cast(*srmCommands[0]), globalAddress, RegisterOffsets::csGprR12); - validateSrmCommand(reinterpret_cast(*srmCommands[1]), contextAddress, RegisterOffsets::csGprR12); - validateSrmCommand(reinterpret_cast(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); + validateSrmCommand(reinterpret_cast(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); + validateSrmCommand(reinterpret_cast(*srmCommands[2]), contextAddress, RegisterOffsets::csGprR12); validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); validateLrrCommand(reinterpret_cast(*srmCommands[4]), RegisterOffsets::globalTimestampLdw); @@ -116,8 +116,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture { ASSERT_EQ(4u, srmCommands.size()); validateSrmCommand(reinterpret_cast(*srmCommands[0]), globalAddress, RegisterOffsets::globalTimestampLdw); - validateSrmCommand(reinterpret_cast(*srmCommands[1]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow); - validateSrmCommand(reinterpret_cast(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); + validateSrmCommand(reinterpret_cast(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn); + validateSrmCommand(reinterpret_cast(*srmCommands[2]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow); validateSrmCommand(reinterpret_cast(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh); } }