fix: use submit timestamp in kernel start time calculation
Related-To: NEO-8394 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
b8831e87b0
commit
ad3aeb6eea
|
@ -370,17 +370,17 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
||||||
auto &device = this->cmdQueue->getDevice();
|
auto &device = this->cmdQueue->getDevice();
|
||||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||||
auto frequency = device.getDeviceInfo().profilingTimerResolution;
|
auto frequency = device.getDeviceInfo().profilingTimerResolution;
|
||||||
auto gpuQueueTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(queueTimeStamp.gpuTimeStamp, frequency);
|
auto gpuSubmitTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(submitTimeStamp.gpuTimeStamp, frequency);
|
||||||
|
|
||||||
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
||||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency);
|
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency);
|
||||||
while (startTimeStamp < gpuQueueTimeStamp) {
|
while (startTimeStamp < gpuSubmitTimeStamp) {
|
||||||
startTimeStamp += static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * frequency);
|
startTimeStamp += static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * frequency);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int64_t c0 = queueTimeStamp.cpuTimeinNS - gpuQueueTimeStamp;
|
int64_t c0 = submitTimeStamp.cpuTimeinNS - gpuSubmitTimeStamp;
|
||||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||||
if (startTimeStamp < queueTimeStamp.cpuTimeinNS) {
|
if (startTimeStamp < submitTimeStamp.cpuTimeinNS) {
|
||||||
c0 += static_cast<uint64_t>((1ULL << (gfxCoreHelper.getGlobalTimeStampBits())) * frequency);
|
c0 += static_cast<uint64_t>((1ULL << (gfxCoreHelper.getGlobalTimeStampBits())) * frequency);
|
||||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -825,6 +825,10 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
|
||||||
timestamp.globalStartTS = 2;
|
timestamp.globalStartTS = 2;
|
||||||
event.queueTimeStamp.gpuTimeStamp = 1;
|
event.queueTimeStamp.gpuTimeStamp = 1;
|
||||||
event.queueTimeStamp.cpuTimeinNS = 100;
|
event.queueTimeStamp.cpuTimeinNS = 100;
|
||||||
|
|
||||||
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
|
event.queueTimeStamp.cpuTimeinNS = 200;
|
||||||
|
|
||||||
TagNode<HwTimeStamps> timestampNode{};
|
TagNode<HwTimeStamps> timestampNode{};
|
||||||
timestampNode.tagForCpuAccess = ×tamp;
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
event.timeStampNode = ×tampNode;
|
event.timeStampNode = ×tampNode;
|
||||||
|
@ -834,7 +838,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
|
||||||
|
|
||||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||||
auto c0 = event.queueTimeStamp.cpuTimeinNS - gfxCoreHelper.getGpuTimeStampInNS(event.queueTimeStamp.gpuTimeStamp, resolution);
|
auto c0 = event.submitTimeStamp.cpuTimeinNS - gfxCoreHelper.getGpuTimeStampInNS(event.submitTimeStamp.gpuTimeStamp, resolution);
|
||||||
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution) + c0);
|
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution) + c0);
|
||||||
|
|
||||||
event.timeStampNode = nullptr;
|
event.timeStampNode = nullptr;
|
||||||
|
@ -883,8 +887,9 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
|
|
||||||
HwTimeStamps timestamp{};
|
HwTimeStamps timestamp{};
|
||||||
timestamp.globalStartTS = 1;
|
timestamp.globalStartTS = 3;
|
||||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
|
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||||
TagNode<HwTimeStamps> timestampNode{};
|
TagNode<HwTimeStamps> timestampNode{};
|
||||||
timestampNode.tagForCpuAccess = ×tamp;
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
event.timeStampNode = ×tampNode;
|
event.timeStampNode = ×tampNode;
|
||||||
|
@ -894,7 +899,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||||
|
|
||||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||||
auto refStartTime = static_cast<uint64_t>(timestamp.globalStartTS * resolution + (1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
|
auto refStartTime = static_cast<uint64_t>(timestamp.globalStartTS * resolution) + static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
|
||||||
EXPECT_EQ(start, refStartTime);
|
EXPECT_EQ(start, refStartTime);
|
||||||
|
|
||||||
event.timeStampNode = nullptr;
|
event.timeStampNode = nullptr;
|
||||||
|
|
|
@ -625,6 +625,50 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTests, givenRawTimestampsDebugModeWhen
|
||||||
event.timeStampNode = nullptr;
|
event.timeStampNode = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(EventProfilingTests, givenSubmitTimeMuchGreaterThanQueueTimeWhenCalculatingStartTimeThenItIsGreaterThanSubmitTime) {
|
||||||
|
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||||
|
MockContext context(device.get());
|
||||||
|
MockCommandQueue cmdQ(&context, device.get(), nullptr, false);
|
||||||
|
cmdQ.setProfilingEnabled();
|
||||||
|
|
||||||
|
HwTimeStamps timestamp{};
|
||||||
|
timestamp.globalStartTS = 10;
|
||||||
|
timestamp.contextStartTS = 20;
|
||||||
|
timestamp.globalEndTS = 80;
|
||||||
|
timestamp.contextEndTS = 56;
|
||||||
|
|
||||||
|
MockTagNode<HwTimeStamps> timestampNode{};
|
||||||
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
|
||||||
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
|
cl_event clEvent = &event;
|
||||||
|
|
||||||
|
event.queueTimeStamp.cpuTimeinNS = 1;
|
||||||
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
|
|
||||||
|
event.submitTimeStamp.cpuTimeinNS = (1ull << 33) + 3;
|
||||||
|
event.submitTimeStamp.gpuTimeStamp = (1ull << 33) + 4;
|
||||||
|
|
||||||
|
event.timeStampNode = ×tampNode;
|
||||||
|
|
||||||
|
cl_ulong queued = 0ul;
|
||||||
|
cl_ulong submited = 0ul;
|
||||||
|
cl_ulong start = 0ul;
|
||||||
|
cl_ulong end = 0ul;
|
||||||
|
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr);
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||||
|
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr);
|
||||||
|
|
||||||
|
EXPECT_LT(0ull, queued);
|
||||||
|
EXPECT_LT(queued, submited);
|
||||||
|
EXPECT_LT(submited, start);
|
||||||
|
EXPECT_LT(start, end);
|
||||||
|
|
||||||
|
event.timeStampNode = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
|
||||||
DebugManagerStateRestore stateRestore;
|
DebugManagerStateRestore stateRestore;
|
||||||
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
||||||
|
|
Loading…
Reference in New Issue