mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 14:33:04 +08:00
fix: store whole global timestamp before context timestamp
Related-To: NEO-15192 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0842e16412
commit
6951ff1a07
@@ -329,7 +329,10 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions, size_t &patchIndex);
|
||||
void programRegionGroupBarrier(Kernel &kernel, const ze_group_count_t &threadGroupDimensions, size_t localRegionSize, size_t &patchIndex);
|
||||
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation);
|
||||
void adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool workloadPartition, bool copyOperation, bool globalTimestamp);
|
||||
void writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation);
|
||||
void pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination);
|
||||
void writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp);
|
||||
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
|
||||
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
|
||||
|
||||
@@ -3357,49 +3357,47 @@ void CommandListCoreFamily<gfxCoreFamily>::programRegionGroupBarrier(Kernel &ker
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition, bool copyOperation) {
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto baseAddr = event->getPacketAddress(this->device);
|
||||
writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(), maskLsb, workloadPartition, copyOperation, true);
|
||||
writeKernelTimestamp(baseAddr, event, outTimeStampSyncCmds, beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(), maskLsb, workloadPartition, copyOperation, false);
|
||||
}
|
||||
|
||||
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
|
||||
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
||||
|
||||
void **globalPostSyncCmdBuffer = nullptr;
|
||||
void **contextPostSyncCmdBuffer = nullptr;
|
||||
|
||||
void *globalPostSyncCmd = nullptr;
|
||||
void *contextPostSyncCmd = nullptr;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::writeKernelTimestamp(uint64_t baseAddr, Event *event, CommandToPatchContainer *outTimeStampSyncCmds, size_t offset, bool maskLsb, bool workloadPartition, bool copyOperation, bool isGlobalTimestamp) {
|
||||
void **postSyncCmdBuffer = nullptr;
|
||||
void *postSyncCmd = nullptr;
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
globalPostSyncCmdBuffer = &globalPostSyncCmd;
|
||||
contextPostSyncCmdBuffer = &contextPostSyncCmd;
|
||||
postSyncCmdBuffer = &postSyncCmd;
|
||||
}
|
||||
uint64_t address = ptrOffset(baseAddr, offset);
|
||||
|
||||
uint64_t globalAddress = ptrOffset(baseAddr, globalOffset);
|
||||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
uint32_t registerOffset = isGlobalTimestamp ? RegisterOffsets::globalTimestampLdw : RegisterOffsets::gpThreadTimeRegAddressOffsetLow;
|
||||
writeTimestamp(commandContainer, registerOffset, address, maskLsb, workloadPartition, postSyncCmdBuffer, copyOperation);
|
||||
pushTimestampPatch(outTimeStampSyncCmds, offset, postSyncCmd);
|
||||
adjustWriteKernelTimestamp(address, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation, isGlobalTimestamp);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::writeTimestamp(NEO::CommandContainer &container, uint32_t regOffset, uint64_t address, bool maskLsb, bool workloadPartition, void **postSyncCmdBuffer, bool copyOperation) {
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(
|
||||
container, regOffset, mask, address, workloadPartition, postSyncCmdBuffer, copyOperation);
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(
|
||||
*container.getCommandStream(), regOffset, address, workloadPartition, postSyncCmdBuffer, copyOperation);
|
||||
}
|
||||
}
|
||||
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::pushTimestampPatch(CommandToPatchContainer *container, uint64_t offset, void *pDestination) {
|
||||
if (container) {
|
||||
CommandToPatch ctxCmd;
|
||||
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
|
||||
|
||||
ctxCmd.offset = globalOffset;
|
||||
ctxCmd.pDestination = globalPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
|
||||
ctxCmd.offset = contextOffset;
|
||||
ctxCmd.pDestination = contextPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
ctxCmd.offset = offset;
|
||||
ctxCmd.pDestination = pDestination;
|
||||
container->push_back(ctxCmd);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, workloadPartition, copyOperation);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -15,7 +15,7 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation) {}
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -36,8 +36,8 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation) {}
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation, bool globalTimestamp) {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
|
||||
|
||||
@@ -21,37 +21,18 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation) {
|
||||
uint64_t globalHighAddress = globalAddress + sizeof(uint32_t);
|
||||
uint64_t contextHighAddress = contextAddress + sizeof(uint32_t);
|
||||
|
||||
void **globalPostSyncCmdBuffer = nullptr;
|
||||
void **contextPostSyncCmdBuffer = nullptr;
|
||||
|
||||
void *globalPostSyncCmd = nullptr;
|
||||
void *contextPostSyncCmd = nullptr;
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t address, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds,
|
||||
bool workloadPartition, bool copyOperation, bool globalTimestamp) {
|
||||
uint64_t highAddress = address + sizeof(uint32_t);
|
||||
void **postSyncCmdBuffer = nullptr;
|
||||
void *postSyncCmd = nullptr;
|
||||
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
globalPostSyncCmdBuffer = &globalPostSyncCmd;
|
||||
contextPostSyncCmdBuffer = &contextPostSyncCmd;
|
||||
postSyncCmdBuffer = &postSyncCmd;
|
||||
}
|
||||
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampUn, globalHighAddress, workloadPartition, globalPostSyncCmdBuffer, copyOperation);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextHighAddress, workloadPartition, contextPostSyncCmdBuffer, copyOperation);
|
||||
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
CommandToPatch ctxCmd;
|
||||
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
|
||||
|
||||
ctxCmd.offset = globalHighAddress - baseAddress;
|
||||
ctxCmd.pDestination = globalPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
|
||||
ctxCmd.offset = contextHighAddress - baseAddress;
|
||||
ctxCmd.pDestination = contextPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
}
|
||||
uint32_t registerOffset = globalTimestamp ? RegisterOffsets::globalTimestampUn : RegisterOffsets::gpThreadTimeRegAddressOffsetHigh;
|
||||
writeTimestamp(commandContainer, registerOffset, highAddress, false, workloadPartition, postSyncCmdBuffer, copyOperation);
|
||||
pushTimestampPatch(outTimeStampSyncCmds, highAddress - baseAddress, postSyncCmd);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -73,6 +73,15 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
bool workloadPartition,
|
||||
bool useMask);
|
||||
template <typename FamilyType>
|
||||
void validateTimestampLongRegisters(GenCmdList &cmdList,
|
||||
GenCmdList::iterator &startIt,
|
||||
uint32_t firstLoadRegisterRegSrcAddress,
|
||||
uint64_t firstStoreRegMemAddress,
|
||||
uint32_t secondLoadRegisterRegSrcAddress,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
bool workloadPartition,
|
||||
bool useMask);
|
||||
|
||||
struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
|
||||
void setUp() {
|
||||
|
||||
@@ -80,7 +80,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::globalTimestampUn, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
@@ -89,7 +89,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
}
|
||||
}
|
||||
|
||||
itor++;
|
||||
itor = useMask ? find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
|
||||
if (useMask) {
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
@@ -128,7 +128,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
@@ -141,6 +141,145 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
startIt = itor;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void validateTimestampLongRegisters(GenCmdList &cmdList,
|
||||
GenCmdList::iterator &startIt,
|
||||
uint32_t firstLoadRegisterRegSrcAddress,
|
||||
uint64_t firstStoreRegMemAddress,
|
||||
uint32_t secondLoadRegisterRegSrcAddress,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
uint32_t thirdLoadRegisterRegSrcAddress,
|
||||
uint64_t thirdStoreRegMemAddress,
|
||||
uint32_t fourthLoadRegisterRegSrcAddress,
|
||||
uint64_t fourthStoreRegMemAddress,
|
||||
bool workloadPartition,
|
||||
bool useMask) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = useMask ? find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
|
||||
if (useMask) {
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
itor++;
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
|
||||
itor = useMask ? find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end()) : find<MI_STORE_REGISTER_MEM *>(startIt, cmdList.end());
|
||||
if (useMask) {
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(RegisterOffsets::csGprR12, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(thirdLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(thirdStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
itor++;
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(fourthLoadRegisterRegSrcAddress, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(fourthStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
itor++;
|
||||
startIt = itor;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void CmdListPipelineSelectStateFixture::testBody() {
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
|
||||
@@ -3013,7 +3013,7 @@ HWTEST_F(CommandListCreateTests, givenCommandListWhenAppendingBarrierWithIncorre
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_ERROR_INVALID_ARGUMENT);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) {
|
||||
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
@@ -3051,7 +3051,54 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandF
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets) {
|
||||
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
auto baseAddr = event->getGpuAddress(device);
|
||||
auto contextOffset = event->getContextStartOffset();
|
||||
auto globalOffset = event->getGlobalStartOffset();
|
||||
EXPECT_EQ(baseAddr, event->getPacketAddress(device));
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event.get(), true);
|
||||
EXPECT_EQ(1u, event->getPacketsInUse());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostPVC) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
@@ -3086,6 +3133,85 @@ HWTEST_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandFo
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtMostXeCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event.get(), false);
|
||||
|
||||
auto contextOffset = event->getContextEndOffset();
|
||||
auto globalOffset = event->getGlobalEndOffset();
|
||||
auto baseAddr = event->getGpuAddress(device);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreateTests, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, IsAtLeastXe2HpgCore) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<FamilyType::gfxCoreFamily>::GfxFamily;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
commandList->appendEventForProfilingCopyCommand(event.get(), false);
|
||||
|
||||
auto contextOffset = event->getContextEndOffset();
|
||||
auto globalOffset = event->getGlobalEndOffset();
|
||||
auto baseAddr = event->getGpuAddress(device);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::globalTimestampUn);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::bcs0Base + RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr + sizeof(uint32_t), contextOffset));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListCreateTests, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::copy, 0u);
|
||||
|
||||
@@ -857,40 +857,45 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
EXPECT_EQ(timestampAddress, NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*cmd));
|
||||
|
||||
auto startCmdList = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
true,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t);
|
||||
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
|
||||
validateTimestampLongRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::globalTimestampUn, globalStartAddressHigh,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh,
|
||||
true,
|
||||
true);
|
||||
} else {
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampUn, globalStartAddressHigh,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextStartAddressHigh,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
true,
|
||||
false);
|
||||
true);
|
||||
}
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
true,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t);
|
||||
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
|
||||
validateTimestampLongRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::globalTimestampUn, globalEndAddressHigh,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh,
|
||||
true,
|
||||
true);
|
||||
} else {
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
RegisterOffsets::globalTimestampUn, globalEndAddressHigh,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetHigh, contextEndAddressHigh,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
true,
|
||||
false);
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -105,8 +105,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture {
|
||||
ASSERT_EQ(6u, srmCommands.size());
|
||||
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[0]), globalAddress, RegisterOffsets::csGprR12);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), contextAddress, RegisterOffsets::csGprR12);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), contextAddress, RegisterOffsets::csGprR12);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
|
||||
|
||||
validateLrrCommand<FamilyType>(reinterpret_cast<MI_LOAD_REGISTER_REG *>(*srmCommands[4]), RegisterOffsets::globalTimestampLdw);
|
||||
@@ -116,8 +116,8 @@ struct CommandListXe2AndLaterFixture : public DeviceFixture {
|
||||
ASSERT_EQ(4u, srmCommands.size());
|
||||
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[0]), globalAddress, RegisterOffsets::globalTimestampLdw);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[1]), globalAddress + sizeof(uint32_t), RegisterOffsets::globalTimestampUn);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[2]), contextAddress, RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
validateSrmCommand<FamilyType>(reinterpret_cast<MI_STORE_REGISTER_MEM *>(*srmCommands[3]), contextAddress + sizeof(uint32_t), RegisterOffsets::gpThreadTimeRegAddressOffsetHigh);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user