feature: add debug flags for timestamps

PrintCalculatedTimestamps - print ts in level zero paths
PrintTimestampPacketContents - add logging also to level zero paths
ForceUseOnlyGlobalTimestamps - force using a global ts

Related-To: HSD-14023527252
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2024-11-20 23:48:17 +00:00
committed by Compute-Runtime-Automation
parent 46c345789d
commit 4ad8c17db9
6 changed files with 137 additions and 2 deletions

View File

@@ -189,6 +189,9 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
const std::pair<uint64_t, uint64_t> currentContext(eventCompletion.getContextStartValue(packetId),
eventCompletion.getContextEndValue(packetId));
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintTimestampPacketContents.get(), stdout, "kernel id: %d, packet: %d, globalStartTS: %llu, globalEndTS: %llu, contextStartTS: %llu, contextEndTS: %llu\n",
kernelId, packetId, currentGlobal.first, getEndTS(isGlobalTsOverflowed, currentGlobal, globalEndTS), currentContext.first, getEndTS(isContextTsOverflowed, currentContext, contextEndTS));
globalStartTS = std::min(globalStartTS, currentGlobal.first);
contextStartTS = std::min(contextStartTS, currentContext.first);
globalEndTS = getEndTS(isGlobalTsOverflowed, currentGlobal, globalEndTS);
@@ -780,6 +783,9 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
eventTsSetFunc(globalEndTS, result.context.kernelEnd);
eventTsSetFunc(globalEndTS, result.global.kernelEnd);
}
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintCalculatedTimestamps.get(), stdout, "globalStartTS: %llu, globalEndTS: %llu, contextStartTS: %llu, contextEndTS: %llu\n",
result.global.kernelStart, result.global.kernelEnd, result.context.kernelStart, result.context.kernelEnd);
return ZE_RESULT_SUCCESS;
}

View File

@@ -2465,6 +2465,123 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, TimestampEventCreate, givenEventTimestampsWhenQue
EXPECT_EQ(data.globalEnd, result.global.kernelEnd);
}
HWTEST_F(TimestampEventCreate, givenFlagPrintCalculatedTimestampsWhenCallQueryKernelTimestampThenProperLogIsPrinted) {
debugManager.flags.PrintCalculatedTimestamps.set(1);
typename MockTimestampPackets32::Packet data = {};
data.contextStart = 1u;
data.contextEnd = 2u;
data.globalStart = 3u;
data.globalEnd = 4u;
event->hostAddressFromPool = &data;
ze_kernel_timestamp_result_t result = {};
testing::internal::CaptureStdout();
event->queryKernelTimestamp(&result);
std::string output = testing::internal::GetCapturedStdout();
std::stringstream expected;
expected << "globalStartTS: " << result.global.kernelStart << ", "
<< "globalEndTS: " << result.global.kernelEnd << ", "
<< "contextStartTS: " << result.context.kernelStart << ", "
<< "contextEndTS: " << result.context.kernelEnd << std::endl;
EXPECT_EQ(0, output.compare(expected.str().c_str()));
}
TEST_F(TimestampEventUsedPacketSignalCreate, givenFlagPrintTimestampPacketContentsWhenMultiPacketAndCallQueryKernelTimestampThenProperLogIsPrinted) {
debugManager.flags.PrintTimestampPacketContents.set(1);
typename MockTimestampPackets32::Packet packetData[2];
packetData[0].contextStart = 1u;
packetData[0].contextEnd = 2u;
packetData[0].globalStart = 3u;
packetData[0].globalEnd = 4u;
packetData[1].contextStart = 5u;
packetData[1].contextEnd = 6u;
packetData[1].globalStart = 7u;
packetData[1].globalEnd = 8u;
event->hostAddressFromPool = packetData;
ze_kernel_timestamp_result_t results;
auto packedCount = 2u;
event->setPacketsInUse(packedCount);
testing::internal::CaptureStdout();
event->queryKernelTimestamp(&results);
std::string output = testing::internal::GetCapturedStdout();
std::stringstream expected;
for (uint32_t i = 0; i < packedCount; i++) {
expected << "kernel id: 0, "
<< "packet: " << i << ", "
<< "globalStartTS: " << packetData[i].globalStart << ", "
<< "globalEndTS: " << packetData[i].globalEnd << ", "
<< "contextStartTS: " << packetData[i].contextStart << ", "
<< "contextEndTS: " << packetData[i].contextEnd << std::endl;
}
EXPECT_EQ(0, output.compare(expected.str().c_str()));
}
HWTEST2_F(TimestampEventCreateMultiKernel, givenFlagPrintTimestampPacketContentsWhenMultiKernelsAndMultiPacketsAndCallQueryKernelTimestampThenProperLogIsPrinted, IsAtLeastXeHpCore) {
debugManager.flags.PrintTimestampPacketContents.set(1);
typename MockTimestampPackets32::Packet packetData[4];
event->hostAddressFromPool = packetData;
// 1st kernel 1st packet
packetData[0].contextStart = 1;
packetData[0].contextEnd = 2;
packetData[0].globalStart = 3;
packetData[0].globalEnd = 4;
// 1st kernel 2nd packet
packetData[1].contextStart = 5;
packetData[1].contextEnd = 6;
packetData[1].globalStart = 7;
packetData[1].globalEnd = 8;
// 2nd kernel 1st packet
packetData[2].contextStart = 9;
packetData[2].contextEnd = 10;
packetData[2].globalStart = 11;
packetData[2].globalEnd = 12;
// 2nd kernel 2st packet
packetData[3].contextStart = 13;
packetData[3].contextEnd = 14;
packetData[3].globalStart = 15;
packetData[3].globalEnd = 16;
auto packedCount = 2u;
auto kernelCount = 2u;
// set packet for first kernel
event->setPacketsInUse(packedCount);
event->setKernelCount(kernelCount);
// set packet for second kernel
event->setPacketsInUse(packedCount);
ze_kernel_timestamp_result_t results;
testing::internal::CaptureStdout();
event->queryKernelTimestamp(&results);
std::string output = testing::internal::GetCapturedStdout();
std::stringstream expected;
auto i = 0u;
for (uint32_t kernelId = 0u; kernelId < kernelCount; kernelId++) {
for (uint32_t packet = 0; packet < packedCount; packet++) {
expected << "kernel id: " << kernelId << ", "
<< "packet: " << packet << ", "
<< "globalStartTS: " << packetData[i].globalStart << ", "
<< "globalEndTS: " << packetData[i].globalEnd << ", "
<< "contextStartTS: " << packetData[i].contextStart << ", "
<< "contextEndTS: " << packetData[i].contextEnd << std::endl;
i++;
}
}
EXPECT_EQ(0, output.compare(expected.str().c_str()));
}
TEST_F(TimestampEventUsedPacketSignalCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSet) {
typename MockTimestampPackets32::Packet packetData[2];
event->setPacketsInUse(2u);

View File

@@ -300,6 +300,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampWidth, -1, "-1: default from KM
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0: Fifo will be polled based on input in milliseconds.")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabled, 1: enable use only global timestamp")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
@@ -331,6 +332,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintDispatchParameters, false, "prints dispatch pa
DECLARE_DEBUG_VARIABLE(bool, PrintProgramBinaryProcessingTime, false, "prints execution time of Program::processGenBinary() method during program building")
DECLARE_DEBUG_VARIABLE(bool, PrintRelocations, false, "prints relocations debug information")
DECLARE_DEBUG_VARIABLE(bool, PrintTimestampPacketContents, false, "prints all timestamps values during profiling data calculation")
DECLARE_DEBUG_VARIABLE(bool, PrintCalculatedTimestamps, false, "prints final l0 timestamps values for profiling data calculation")
DECLARE_DEBUG_VARIABLE(bool, PrintGlobalTimestampInNs, false, "prints host and device timestamp in nanoseconds")
DECLARE_DEBUG_VARIABLE(bool, WddmResidencyLogger, false, "gather Wddm residency statistics to file")
DECLARE_DEBUG_VARIABLE(bool, PrintBOCreateDestroyResult, false, "tracks the result of creation and destruction of BOs")

View File

@@ -550,7 +550,7 @@ int32_t GfxCoreHelperHw<GfxFamily>::getDefaultThreadArbitrationPolicy() const {
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::useOnlyGlobalTimestamps() const {
return false;
return debugManager.flags.ForceUseOnlyGlobalTimestamps.get();
}
template <typename GfxFamily>

View File

@@ -646,4 +646,6 @@ DirectSubmissionControllerBcsTimeoutDivisor = -1
ForceZeroCopyForUseHostPtr = 0
Enable10ThreadsPerEu = -1
OverrideRegionCount = -1
ForceUseOnlyGlobalTimestamps = 0
PrintCalculatedTimestamps = 0
# Please don't edit below this line

View File

@@ -1902,4 +1902,12 @@ HWTEST_F(GfxCoreHelperTest, givenHwHelperWhenThreadGroupCountIsAlignedToDssThenT
uint32_t threadCount = maxThreadCount;
helper.alignThreadGroupCountToDssSize(threadCount, dssCount, threadsPerDss, threadGroupSize);
EXPECT_EQ(dssCount, threadCount);
}
}
HWTEST_F(GfxCoreHelperTest, givenDebugFlagForceUseOnlyGlobalTimestampsSetWhenCallUseOnlyGlobalTimestampsThenTrueIsReturned) {
DebugManagerStateRestore restore;
debugManager.flags.ForceUseOnlyGlobalTimestamps.set(1);
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.useOnlyGlobalTimestamps());
}