Fix kernel timestamp overflow condition

This patch fixes the case when the kernel or
context end timestamp overflows.

Related-To: LOCI-3140

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2022-07-22 18:28:16 +00:00
committed by Compute-Runtime-Automation
parent 996f6e4383
commit a6a91b2cf5
2 changed files with 74 additions and 15 deletions

View File

@@ -62,32 +62,50 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
constexpr uint32_t skipL3EventPacketIndex = 2u;
globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0);
globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0);
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
auto getEndTS = [](bool &isOverflowed, const std::pair<uint64_t, uint64_t> &currTs, const uint64_t &end) {
auto &[currStartTs, currEndTs] = currTs;
if (isOverflowed == false) {
if (currEndTs < currStartTs) {
isOverflowed = true;
return currEndTs;
} else {
return std::max(end, currEndTs);
}
} else {
// if already overflowed, then track the endTs of new overflowing ones
if (currEndTs < currStartTs) {
return std::max(end, currEndTs);
}
}
return end;
};
bool isGlobalTsOverflowed = false;
bool isContextTsOverflowed = false;
for (uint32_t kernelId = 0; kernelId < kernelCount; kernelId++) {
for (auto packetId = 0u; packetId < kernelEventCompletionData[kernelId].getPacketsUsed(); packetId++) {
const auto &eventCompletion = kernelEventCompletionData[kernelId];
for (auto packetId = 0u; packetId < eventCompletion.getPacketsUsed(); packetId++) {
if (this->l3FlushAppliedOnKernel.test(kernelId) && ((packetId % skipL3EventPacketIndex) != 0)) {
continue;
}
if (globalStartTS > kernelEventCompletionData[kernelId].getGlobalStartValue(packetId)) {
globalStartTS = kernelEventCompletionData[kernelId].getGlobalStartValue(packetId);
}
if (contextStartTS > kernelEventCompletionData[kernelId].getContextStartValue(packetId)) {
contextStartTS = kernelEventCompletionData[kernelId].getContextStartValue(packetId);
}
if (contextEndTS < kernelEventCompletionData[kernelId].getContextEndValue(packetId)) {
contextEndTS = kernelEventCompletionData[kernelId].getContextEndValue(packetId);
}
if (globalEndTS < kernelEventCompletionData[kernelId].getGlobalEndValue(packetId)) {
globalEndTS = kernelEventCompletionData[kernelId].getGlobalEndValue(packetId);
}
const std::pair<uint64_t, uint64_t> currentGlobal(eventCompletion.getGlobalStartValue(packetId),
eventCompletion.getGlobalEndValue(packetId));
const std::pair<uint64_t, uint64_t> currentContext(eventCompletion.getContextStartValue(packetId),
eventCompletion.getContextEndValue(packetId));
globalStartTS = std::min(globalStartTS, currentGlobal.first);
contextStartTS = std::min(contextStartTS, currentContext.first);
globalEndTS = getEndTS(isGlobalTsOverflowed, currentGlobal, globalEndTS);
contextEndTS = getEndTS(isContextTsOverflowed, currentContext, contextEndTS);
}
}
return ZE_RESULT_SUCCESS;
}

View File

@@ -1518,6 +1518,47 @@ TEST_F(TimestampEventCreate, givenTimeStampEventUsedOnTwoKernelsWhenL3FlushSetOn
EXPECT_EQ(static_cast<uint64_t>(kernelEndValue), results.global.kernelEnd);
}
TEST_F(TimestampEventCreate, givenOverflowingTimeStampDataOnTwoKernelsWhenQueryKernelTimestampIsCalledOverflowIsObserved) {
typename MockTimestampPackets32::Packet packetData[4] = {};
event->hostAddress = packetData;
uint32_t maxTimeStampValue = std::numeric_limits<uint32_t>::max();
//1st kernel 1st packet (overflowing context timestamp)
packetData[0].contextStart = maxTimeStampValue - 1;
packetData[0].contextEnd = maxTimeStampValue + 1;
packetData[0].globalStart = maxTimeStampValue - 2;
packetData[0].globalEnd = maxTimeStampValue - 1;
//2nd kernel 1st packet (overflowing global timestamp)
packetData[1].contextStart = maxTimeStampValue - 2;
packetData[1].contextEnd = maxTimeStampValue - 1;
packetData[1].globalStart = maxTimeStampValue - 1;
packetData[1].globalEnd = maxTimeStampValue + 1;
//2nd kernel 2nd packet (overflowing context timestamp)
memcpy(&packetData[2], &packetData[0], sizeof(MockTimestampPackets32::Packet));
packetData[2].contextStart = maxTimeStampValue;
packetData[2].contextEnd = maxTimeStampValue + 2;
EXPECT_EQ(1u, event->getPacketsUsedInLastKernel());
event->increaseKernelCount();
event->setPacketsInUse(2u);
ze_kernel_timestamp_result_t results;
event->queryKernelTimestamp(&results);
auto &hwHelper = HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
if (hwHelper.useOnlyGlobalTimestamps() == false) {
EXPECT_EQ(static_cast<uint64_t>(maxTimeStampValue - 2), results.context.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(maxTimeStampValue + 2), results.context.kernelEnd);
}
EXPECT_EQ(static_cast<uint64_t>(maxTimeStampValue - 2), results.global.kernelStart);
EXPECT_EQ(static_cast<uint64_t>(maxTimeStampValue + 1), results.global.kernelEnd);
}
HWTEST_EXCLUDE_PRODUCT(TimestampEventCreate, givenEventTimestampsWhenQueryKernelTimestampThenCorrectDataAreSet, IGFX_GEN12LP_CORE);
TEST_F(TimestampEventCreate, givenEventWhenQueryKernelTimestampThenNotReadyReturned) {