fix: store whole global timestamp before context timestamp

Related-To: NEO-15192
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2025-07-07 12:08:43 +00:00
committed by Compute-Runtime-Automation
parent 0842e16412
commit 6951ff1a07
10 changed files with 352 additions and 91 deletions

View File

@@ -329,7 +329,10 @@ struct CommandListCoreFamily : public CommandListImp {
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions, size_t &patchIndex);
void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize, size_t &patchIndex);
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation);
void adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation, bool globalTimestamp);
void writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation);
void pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination);
void writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp);
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);

View File

@@ -3357,49 +3357,47 @@ void CommandListCoreFamily<gfxCoreFamily>::programRegionGroupBarrier(Kernel &ker
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation) {
constexpr uint32_t mask = 0xfffffffe;
auto baseAddr = event->getPacketAddress(this->device);
writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(), maskLsb, workloadPartition, copyOperation, true);
writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(), maskLsb, workloadPartition, copyOperation, false);
}
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
void **globalPostSyncCmdBuffer = nullptr;
void **contextPostSyncCmdBuffer = nullptr;
void *globalPostSyncCmd = nullptr;
void *contextPostSyncCmd = nullptr;
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp) {
void **postSyncCmdBuffer = nullptr;
void *postSyncCmd = nullptr;
if (outTimeStampSyncCmds != nullptr) {
globalPostSyncCmdBuffer = &globalPostSyncCmd;
contextPostSyncCmdBuffer = &contextPostSyncCmd;
postSyncCmdBuffer = &postSyncCmd;
}
uint64_t address = ptrOffset(baseAddr, offset);
uint64_t globalAddress = ptrOffset(baseAddr, globalOffset);
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
uint32_t registerOffset = isGlobalTimestamp ? RegisterOffsets::globalTimestampLdw : RegisterOffsets::gpThreadTimeRegAddressOffsetLow;
writeTimestamp(commandContainer, registerOffset, address, maskLsb, workloadPartition, postSyncCmdBuffer, copyOperation);
pushTimestampPatch(outTimeStampSyncCmds, offset, postSyncCmd);
adjustWriteKernelTimestamp(address, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation, isGlobalTimestamp);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation) {
constexpr uint32_t mask = 0xfffffffe;
if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(
container, regOffset, mask, address, workloadPartition, postSyncCmdBuffer, copyOperation);
} else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
NEO::EncodeStoreMMIO<GfxFamily>::encode(
*container.getCommandStream(), regOffset, address, workloadPartition, postSyncCmdBuffer, copyOperation);
}
}
if (outTimeStampSyncCmds != nullptr) {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination) {
if (container) {
CommandToPatch ctxCmd;
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
ctxCmd.offset = globalOffset;
ctxCmd.pDestination = globalPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
ctxCmd.offset = contextOffset;
ctxCmd.pDestination = contextPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
ctxCmd.offset = offset;
ctxCmd.pDestination = pDestination;
container->push_back(ctxCmd);
}
adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -15,7 +15,7 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation) {}
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
} // namespace L0

View File

@@ -36,8 +36,8 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation) {}
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {

View File

@@ -21,37 +21,18 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation) {
uint64_t globalHighAddress = globalAddress + sizeof(uint32_t);
uint64_t contextHighAddress = contextAddress + sizeof(uint32_t);
void **globalPostSyncCmdBuffer = nullptr;
void **contextPostSyncCmdBuffer = nullptr;
void *globalPostSyncCmd = nullptr;
void *contextPostSyncCmd = nullptr;
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
bool workloadPartition, bool copyOperation, bool globalTimestamp) {
uint64_t highAddress = address + sizeof(uint32_t);
void **postSyncCmdBuffer = nullptr;
void *postSyncCmd = nullptr;
if (outTimeStampSyncCmds != nullptr) {
globalPostSyncCmdBuffer = &globalPostSyncCmd;
contextPostSyncCmdBuffer = &contextPostSyncCmd;
postSyncCmdBuffer = &postSyncCmd;
}
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampUn, globalHighAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextHighAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
if (outTimeStampSyncCmds != nullptr) {
CommandToPatch ctxCmd;
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
ctxCmd.offset = globalHighAddress - baseAddress;
ctxCmd.pDestination = globalPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
ctxCmd.offset = contextHighAddress - baseAddress;
ctxCmd.pDestination = contextPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
}
uint32_t registerOffset = globalTimestamp ? RegisterOffsets::globalTimestampUn : RegisterOffsets::gpThreadTimeRegAddressOffsetHigh;
writeTimestamp(commandContainer, registerOffset, highAddress, false, workloadPartition, postSyncCmdBuffer, copyOperation);
pushTimestampPatch(outTimeStampSyncCmds, highAddress - baseAddress, postSyncCmd);
}
} // namespace L0

View File

@@ -73,6 +73,15 @@ void validateTimestampRegisters(GenCmdList &cmdList,
uint64_t secondStoreRegMemAddress,
bool workloadPartition,
bool useMask);
template <typename FamilyType>
void validateTimestampLongRegisters(GenCmdList &cmdList,
GenCmdList::iterator &startIt,
uint32_t firstLoadRegisterRegSrcAddress,
uint64_t firstStoreRegMemAddress,
uint32_t secondLoadRegisterRegSrcAddress,
uint64_t secondStoreRegMemAddress,
bool workloadPartition,
bool useMask);
struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
void setUp() {

View File

@@ -80,7 +80,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
} else {
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(RegisterOffsets::globalTimestampUn, cmdMem->getRegisterAddress());
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
@@ -89,7 +89,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
}
}
itor++;
itor = useMask ? find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
if (useMask) {
{
ASSERT_NE(cmdList.end(), itor);
@@ -128,7 +128,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
} else {
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, cmdMem->getRegisterAddress());
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
@@ -141,6 +141,145 @@ void validateTimestampRegisters(GenCmdList &cmdList,
startIt = itor;
}
template <typename FamilyType>
void validateTimestampLongRegisters(GenCmdList &cmdList,
GenCmdList::iterator &startIt,
uint32_t firstLoadRegisterRegSrcAddress,
uint64_t firstStoreRegMemAddress,
uint32_t secondLoadRegisterRegSrcAddress,
uint64_t secondStoreRegMemAddress,
uint32_t thirdLoadRegisterRegSrcAddress,
uint64_t thirdStoreRegMemAddress,
uint32_t fourthLoadRegisterRegSrcAddress,
uint64_t fourthStoreRegMemAddress,
bool workloadPartition,
bool useMask) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
constexpr uint32_t mask = 0xfffffffe;
auto itor = useMask ? find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
if (useMask) {
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
} else {
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
itor = useMask ? find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
if (useMask) {
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress());
EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
} else {
ASSERT_NE(cmdList.end(), itor);
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
ASSERT_NE(cmdList.end(), itor);
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(fourthLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
EXPECT_EQ(fourthStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
itor++;
startIt = itor;
}
template <typename FamilyType>
void CmdListPipelineSelectStateFixture::testBody() {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;

View File

@@ -3013,7 +3013,7 @@ HWTEST_F(CommandListCreateTests, givenCommandListWhenAppendingBarrierWithIncorre
EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_ARGUMENT);
}
HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) {
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
@@ -3051,7 +3051,54 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandF
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
}
HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) {
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
auto baseAddr = event->getGpuAddress(device);
auto contextOffset = event->getContextStartOffset();
auto globalOffset = event->getGlobalStartOffset();
EXPECT_EQ(baseAddr, event->getPacketAddress(device));
commandList->appendEventForProfilingCopyCommand(event.get(), true);
EXPECT_EQ(1u, event->getPacketsInUse());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset));
}
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostPVC) {
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
@@ -3086,6 +3133,85 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandFo
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
}
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfilingCopyCommand(event.get(), false);
auto contextOffset = event->getContextEndOffset();
auto globalOffset = event->getGlobalEndOffset();
auto baseAddr = event->getGpuAddress(device);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
}
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) {
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
ze_result_t result = ZE_RESULT_SUCCESS;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfilingCopyCommand(event.get(), false);
auto contextOffset = event->getContextEndOffset();
auto globalOffset = event->getGlobalEndOffset();
auto baseAddr = event->getGpuAddress(device);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
EXPECT_NE(cmdList.end(), ++itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset));
}
HWTEST_F(CommandListCreateTests, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);

View File

@@ -857,40 +857,45 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
auto startCmdList = cmdList.begin();
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampLdw, globalStartAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
true,
true);
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t);
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
validateTimestampLongRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampLdw, globalStartAddress,
RegisterOffsets::globalTimestampUn, globalStartAddressHigh,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh,
true,
true);
} else {
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampUn, globalStartAddressHigh,
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh,
RegisterOffsets::globalTimestampLdw, globalStartAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
true,
false);
true);
}
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampLdw, globalEndAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
true,
true);
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t);
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
validateTimestampLongRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampLdw, globalEndAddress,
RegisterOffsets::globalTimestampUn, globalEndAddressHigh,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh,
true,
true);
} else {
validateTimestampRegisters<FamilyType>(cmdList,
startCmdList,
RegisterOffsets::globalTimestampUn, globalEndAddressHigh,
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh,
RegisterOffsets::globalTimestampLdw, globalEndAddress,
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
true,
false);
true);
}
}

View File

@@ -105,8 +105,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture {
ASSERT_EQ(6u, srmCommands.size());
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[0]), globalAddress, RegisterOffsets::csGprR12);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), contextAddress, RegisterOffsets::csGprR12);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), contextAddress, RegisterOffsets::csGprR12);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
validateLrrCommand<FamilyType>(reinterpret_cast<MI_LOAD_REGISTER_REG *>(*srmCommands[4]), RegisterOffsets::globalTimestampLdw);
@@ -116,8 +116,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture {
ASSERT_EQ(4u, srmCommands.size());
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[0]), globalAddress, RegisterOffsets::globalTimestampLdw);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
}
}