fix: calculate event timestamps based on submit time and host time diff

Related-To: HSD-18034426016, HSD-18034426678
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-10-23 15:22:11 +00:00
committed by Compute-Runtime-Automation
parent 820f32abe2
commit 7e7eea9995
10 changed files with 197 additions and 206 deletions

View File

@@ -98,18 +98,15 @@ HWTEST_F(EnqueueHandlerTimestampEnabledTest, givenProflingAndTimeStampPacketsEna
CsrDependencies csrDeps;
EnqueueProperties enqueueProperties(false, false, false, true, false, false, nullptr);
EXPECT_EQ(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.cpuTimeInNs, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeInNs, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
ev->queueTimeStamp.gpuTimeStamp = 1000;
ev->calculateSubmitTimestampData();
EXPECT_NE(ev->submitTimeStamp.cpuTimeInNs, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeInNs, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
delete ev;
@@ -136,18 +133,15 @@ HWTEST_F(EnqueueHandlerTimestampDisabledTest, givenProflingEnabledTimeStampPacke
CsrDependencies csrDeps;
EnqueueProperties enqueueProperties(false, false, false, true, false, false, nullptr);
EXPECT_EQ(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_EQ(ev->submitTimeStamp.cpuTimeInNs, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeInNs, 0u);
EXPECT_EQ(ev->submitTimeStamp.gpuTimeStamp, 0u);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0, csrDeps, nullptr, false);
EXPECT_NE(ev->submitTimeStamp.cpuTimeinNS, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
ev->queueTimeStamp.gpuTimeStamp = 1000;
ev->calculateSubmitTimestampData();
EXPECT_NE(ev->submitTimeStamp.cpuTimeInNs, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeInNs, 0u);
EXPECT_NE(ev->submitTimeStamp.gpuTimeStamp, 0u);
delete ev;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -75,28 +75,16 @@ struct MyUserEvent : public VirtualEvent {
};
struct MyEvent : public Event {
using Event::completeTimeStamp;
using Event::endTimeStamp;
using Event::queueTimeStamp;
using Event::startTimeStamp;
using Event::submitTimeStamp;
MyEvent(CommandQueue *cmdQueue, cl_command_type cmdType, TaskCountType taskLevel, TaskCountType taskCount)
: Event(cmdQueue, cmdType, taskLevel, taskCount) {
}
TimeStampData getQueueTimeStamp() {
return this->queueTimeStamp;
};
TimeStampData getSubmitTimeStamp() {
return this->submitTimeStamp;
};
uint64_t getStartTimeStamp() {
return this->startTimeStamp;
};
uint64_t getEndTimeStamp() {
return this->endTimeStamp;
};
uint64_t getCompleteTimeStamp() {
return this->completeTimeStamp;
}
uint64_t getGlobalStartTimestamp() const {
return this->globalStartTimestamp;

View File

@@ -40,6 +40,7 @@
#include "event_fixture.h"
#include <cmath>
#include <memory>
#include <type_traits>
@@ -824,10 +825,12 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
HwTimeStamps timestamp{};
timestamp.globalStartTS = 2;
event.queueTimeStamp.gpuTimeStamp = 1;
event.queueTimeStamp.cpuTimeinNS = 100;
event.queueTimeStamp.cpuTimeInNs = 100;
event.queueTimeStamp.gpuTimeInNs = 10;
event.queueTimeStamp.gpuTimeStamp = 2;
event.queueTimeStamp.cpuTimeinNS = 200;
event.submitTimeStamp.gpuTimeStamp = 2;
event.submitTimeStamp.cpuTimeInNs = 200;
event.submitTimeStamp.gpuTimeInNs = 50;
TagNode<HwTimeStamps> timestampNode{};
timestampNode.tagForCpuAccess = &timestamp;
@@ -838,7 +841,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
auto c0 = event.submitTimeStamp.cpuTimeinNS - gfxCoreHelper.getGpuTimeStampInNS(event.submitTimeStamp.gpuTimeStamp, resolution);
auto c0 = event.submitTimeStamp.cpuTimeInNs - gfxCoreHelper.getGpuTimeStampInNS(event.submitTimeStamp.gpuTimeStamp, resolution);
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution) + c0);
event.timeStampNode = nullptr;
@@ -885,11 +888,13 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
HwTimeStamps timestamp{};
timestamp.globalStartTS = 3;
event.queueTimeStamp.gpuTimeStamp = 2;
event.submitTimeStamp.gpuTimeStamp = 4;
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
TagNode<HwTimeStamps> timestampNode{};
timestampNode.tagForCpuAccess = &timestamp;
event.timeStampNode = &timestampNode;
@@ -898,8 +903,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
auto refStartTime = static_cast<uint64_t>(timestamp.globalStartTS * resolution) + static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
auto refStartTime = static_cast<uint64_t>((timestamp.globalStartTS + (1ULL << gfxCoreHelper.getGlobalTimeStampBits())) * resolution);
EXPECT_EQ(start, refStartTime);
event.timeStampNode = nullptr;
@@ -1315,50 +1319,52 @@ TEST_F(EventTest, WhenSettingCpuTimeStampThenCorrectTimeIsSet) {
ev.setProfilingEnabled(true);
ev.setStartTimeStamp();
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
EXPECT_NE(0ULL, outCPUtimeStamp);
EXPECT_NE(0ULL, ev.startTimeStamp.cpuTimeInNs);
EXPECT_NE(0ULL, ev.startTimeStamp.gpuTimeInNs);
EXPECT_NE(0ULL, ev.startTimeStamp.gpuTimeStamp);
ev.setEndTimeStamp();
outCPUtimeStamp = ev.getEndTimeStamp();
EXPECT_NE(0ULL, outCPUtimeStamp);
outCPUtimeStamp = ev.getCompleteTimeStamp();
EXPECT_NE(0ULL, outCPUtimeStamp);
EXPECT_NE(0ULL, ev.endTimeStamp.cpuTimeInNs);
EXPECT_NE(0ULL, ev.endTimeStamp.gpuTimeInNs);
EXPECT_NE(0ULL, ev.endTimeStamp.gpuTimeStamp);
EXPECT_NE(0ULL, ev.completeTimeStamp.cpuTimeInNs);
EXPECT_NE(0ULL, ev.completeTimeStamp.gpuTimeInNs);
EXPECT_NE(0ULL, ev.completeTimeStamp.gpuTimeStamp);
}
TEST_F(EventTest, whenSettingQueueTimestampThenCorrectTimestampIsSet) {
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
TEST_F(EventTest, whenSettingQueueTimestampThenOnlyCpuTimeIsSet) {
MyEvent event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0);
TimeStampData queueTimeStamp = {1234, 5678};
event.setQueueTimeStamp(queueTimeStamp);
auto timeStamp = event.getQueueTimeStamp();
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
event.setQueueTimeStamp();
EXPECT_EQ(0ull, event.queueTimeStamp.gpuTimeStamp);
EXPECT_NE(0ull, event.queueTimeStamp.cpuTimeInNs);
EXPECT_EQ(0ull, event.queueTimeStamp.gpuTimeInNs);
}
TEST_F(EventTest, whenSettingSubmitTimestampThenCorrectTimestampIsSet) {
MyEvent event(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
TEST_F(EventTest, whenSettingSubmitTimestampThenQueueAndSubmitTimestampsAreSet) {
MyEvent event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 3, 0);
uint64_t cpuTimeStamp = 1234u;
event.queueTimeStamp.cpuTimeInNs = cpuTimeStamp;
auto resolution = pDevice->getDeviceInfo().profilingTimerResolution;
auto expectedQueueGpuTimeStamp = 1000ul;
auto expectedGpuDiff = 300;
auto expectedCpuDiff = static_cast<uint64_t>(std::ceil(expectedGpuDiff * resolution));
auto expectedSubmitGpuTimeInNs = static_cast<uint64_t>((expectedQueueGpuTimeStamp + expectedGpuDiff) * resolution);
TimeStampData submitTimeStamp{};
submitTimeStamp.cpuTimeinNS = cpuTimeStamp + expectedCpuDiff;
submitTimeStamp.gpuTimeStamp = expectedQueueGpuTimeStamp + expectedGpuDiff;
TimeStampData submitTimeStamp = {1234, 5678};
event.setSubmitTimeStamp(submitTimeStamp);
auto timeStamp = event.getSubmitTimeStamp();
EXPECT_EQ(1234ULL, timeStamp.gpuTimeStamp);
EXPECT_EQ(5678ULL, timeStamp.cpuTimeinNS);
}
TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) {
MyEvent ev(nullptr, CL_COMMAND_COPY_BUFFER, 3, 0);
ev.setStartTimeStamp();
uint64_t outCPUtimeStamp = ev.getStartTimeStamp();
EXPECT_EQ(0ULL, outCPUtimeStamp);
ev.setEndTimeStamp();
outCPUtimeStamp = ev.getEndTimeStamp();
EXPECT_EQ(0ULL, outCPUtimeStamp);
outCPUtimeStamp = ev.getCompleteTimeStamp();
EXPECT_EQ(0ULL, outCPUtimeStamp);
EXPECT_EQ(expectedQueueGpuTimeStamp, event.queueTimeStamp.gpuTimeStamp);
EXPECT_EQ(cpuTimeStamp, event.queueTimeStamp.cpuTimeInNs);
EXPECT_EQ(expectedSubmitGpuTimeInNs - expectedCpuDiff, event.queueTimeStamp.gpuTimeInNs);
EXPECT_EQ(submitTimeStamp.gpuTimeStamp, event.submitTimeStamp.gpuTimeStamp);
EXPECT_EQ(submitTimeStamp.cpuTimeinNS, event.submitTimeStamp.cpuTimeInNs);
EXPECT_EQ(expectedSubmitGpuTimeInNs, event.submitTimeStamp.gpuTimeInNs);
}
HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {

View File

@@ -150,10 +150,10 @@ GEN12LPTEST_F(ProfilingTestsGen12LP, givenRawTimestampsDebugModeWhenDataIsQuerie
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.cpuTimeinNS = 1;
event.queueTimeStamp.cpuTimeInNs = 1;
event.queueTimeStamp.gpuTimeStamp = 2;
event.submitTimeStamp.cpuTimeinNS = 3;
event.submitTimeStamp.cpuTimeInNs = 3;
event.submitTimeStamp.gpuTimeStamp = 4;
event.setCPUProfilingPath(false);

View File

@@ -36,7 +36,6 @@ struct MockEvent : public BaseEventType {
using BaseEventType::timeStampNode;
using Event::calcProfilingData;
using Event::calculateSubmitTimestampData;
using Event::cmdToSubmit;
using Event::isWaitForTimestampsEnabled;
using Event::magic;

View File

@@ -203,9 +203,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNo
auto mockEvent = static_cast<MockEvent<Event> *>(event);
EXPECT_NE(0u, mockEvent->queueTimeStamp.gpuTimeStamp);
EXPECT_NE(0u, mockEvent->queueTimeStamp.cpuTimeinNS);
EXPECT_LT(mockEvent->queueTimeStamp.cpuTimeinNS, mockEvent->submitTimeStamp.cpuTimeinNS);
EXPECT_NE(0u, mockEvent->submitTimeStamp.gpuTimeStamp);
EXPECT_NE(0u, mockEvent->queueTimeStamp.gpuTimeInNs);
EXPECT_NE(0u, mockEvent->queueTimeStamp.cpuTimeInNs);
EXPECT_LT(mockEvent->queueTimeStamp.cpuTimeInNs, mockEvent->submitTimeStamp.cpuTimeInNs);
EXPECT_LT(mockEvent->queueTimeStamp.gpuTimeInNs, mockEvent->submitTimeStamp.gpuTimeInNs);
EXPECT_LT(mockEvent->queueTimeStamp.gpuTimeStamp, mockEvent->submitTimeStamp.gpuTimeStamp);
clReleaseEvent(event);
}
@@ -599,10 +601,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTests, givenRawTimestampsDebugModeWhen
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.cpuTimeinNS = 1;
event.queueTimeStamp.gpuTimeStamp = 2;
event.submitTimeStamp.cpuTimeinNS = 3;
event.submitTimeStamp.gpuTimeStamp = 4;
event.setCPUProfilingPath(false);
@@ -643,10 +643,12 @@ TEST_F(EventProfilingTests, givenSubmitTimeMuchGreaterThanQueueTimeWhenCalculati
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.cpuTimeinNS = 1;
event.queueTimeStamp.cpuTimeInNs = 1;
event.queueTimeStamp.gpuTimeInNs = 1;
event.queueTimeStamp.gpuTimeStamp = 2;
event.submitTimeStamp.cpuTimeinNS = (1ull << 33) + 3;
event.submitTimeStamp.cpuTimeInNs = (1ull << 33) + 3;
event.submitTimeStamp.gpuTimeInNs = (1ull << 33) + 3;
event.submitTimeStamp.gpuTimeStamp = (1ull << 33) + 4;
event.timeStampNode = &timestampNode;
@@ -695,9 +697,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenRawTimestampsDebugModeWhenS
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.cpuTimeinNS = 83;
event.queueTimeStamp.cpuTimeInNs = 83;
event.queueTimeStamp.gpuTimeStamp = 1;
event.submitTimeStamp.cpuTimeInNs = 83;
event.submitTimeStamp.gpuTimeStamp = 1;
event.setCPUProfilingPath(false);
event.timeStampNode = &timestampNode;
event.calcProfilingData();
@@ -1261,8 +1266,8 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneEleme
ev->calcProfilingData();
EXPECT_EQ(12u, ev->getStartTimeStamp());
EXPECT_EQ(13u, ev->getEndTimeStamp());
EXPECT_EQ(12u, ev->startTimeStamp.gpuTimeStamp);
EXPECT_EQ(13u, ev->endTimeStamp.gpuTimeStamp);
EXPECT_EQ(12u, ev->getGlobalStartTimestamp());
ev->timeStampNode = nullptr;
@@ -1280,8 +1285,8 @@ TEST_F(ProfilingTimestampPacketsTest, givenMultiOsContextCapableSetToTrueWhenCal
csr.multiOsContextCapable = true;
ev->calcProfilingData();
EXPECT_EQ(50u, ev->getStartTimeStamp());
EXPECT_EQ(350u, ev->getEndTimeStamp());
EXPECT_EQ(50u, ev->startTimeStamp.gpuTimeStamp);
EXPECT_EQ(350u, ev->endTimeStamp.gpuTimeStamp);
}
TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWhenCalculatingThenDontUseThatPacket) {
@@ -1304,8 +1309,8 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampPacketWithoutProfilingDataWh
ev->timestampPacketContainer->peekNodes()[1]->setProfilingCapable(false);
ev->calcProfilingData();
EXPECT_EQ(static_cast<uint64_t>(globalStart0), ev->getStartTimeStamp());
EXPECT_EQ(static_cast<uint64_t>(globalEnd0), ev->getEndTimeStamp());
EXPECT_EQ(static_cast<uint64_t>(globalStart0), ev->startTimeStamp.gpuTimeStamp);
EXPECT_EQ(static_cast<uint64_t>(globalEnd0), ev->endTimeStamp.gpuTimeStamp);
}
TEST_F(ProfilingTimestampPacketsTest, givenPrintTimestampPacketContentsSetWhenCalcProfilingDataThenTimeStampsArePrinted) {
@@ -1354,8 +1359,8 @@ TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithThreeEle
ev->calcProfilingData();
EXPECT_EQ(2u, ev->getStartTimeStamp());
EXPECT_EQ(13u, ev->getEndTimeStamp());
EXPECT_EQ(2u, ev->startTimeStamp.gpuTimeStamp);
EXPECT_EQ(13u, ev->endTimeStamp.gpuTimeStamp);
EXPECT_EQ(2u, ev->getGlobalStartTimestamp());
}