fix: use submit timestamp in kernel start time calculation
Related-To: NEO-8394 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
b8831e87b0
commit
ad3aeb6eea
|
@ -370,17 +370,17 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con
|
|||
auto &device = this->cmdQueue->getDevice();
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
auto frequency = device.getDeviceInfo().profilingTimerResolution;
|
||||
auto gpuQueueTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(queueTimeStamp.gpuTimeStamp, frequency);
|
||||
auto gpuSubmitTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(submitTimeStamp.gpuTimeStamp, frequency);
|
||||
|
||||
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency);
|
||||
while (startTimeStamp < gpuQueueTimeStamp) {
|
||||
while (startTimeStamp < gpuSubmitTimeStamp) {
|
||||
startTimeStamp += static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * frequency);
|
||||
}
|
||||
} else {
|
||||
int64_t c0 = queueTimeStamp.cpuTimeinNS - gpuQueueTimeStamp;
|
||||
int64_t c0 = submitTimeStamp.cpuTimeinNS - gpuSubmitTimeStamp;
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
if (startTimeStamp < queueTimeStamp.cpuTimeinNS) {
|
||||
if (startTimeStamp < submitTimeStamp.cpuTimeinNS) {
|
||||
c0 += static_cast<uint64_t>((1ULL << (gfxCoreHelper.getGlobalTimeStampBits())) * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
}
|
||||
|
|
|
@ -825,6 +825,10 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
|
|||
timestamp.globalStartTS = 2;
|
||||
event.queueTimeStamp.gpuTimeStamp = 1;
|
||||
event.queueTimeStamp.cpuTimeinNS = 100;
|
||||
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
event.queueTimeStamp.cpuTimeinNS = 200;
|
||||
|
||||
TagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
event.timeStampNode = ×tampNode;
|
||||
|
@ -834,7 +838,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseNotEnabledWhenCalculateStartT
|
|||
|
||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||
auto c0 = event.queueTimeStamp.cpuTimeinNS - gfxCoreHelper.getGpuTimeStampInNS(event.queueTimeStamp.gpuTimeStamp, resolution);
|
||||
auto c0 = event.submitTimeStamp.cpuTimeinNS - gfxCoreHelper.getGpuTimeStampInNS(event.submitTimeStamp.gpuTimeStamp, resolution);
|
||||
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution) + c0);
|
||||
|
||||
event.timeStampNode = nullptr;
|
||||
|
@ -883,8 +887,9 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
|||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
|
||||
HwTimeStamps timestamp{};
|
||||
timestamp.globalStartTS = 1;
|
||||
timestamp.globalStartTS = 3;
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||
TagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
event.timeStampNode = ×tampNode;
|
||||
|
@ -894,7 +899,7 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
|||
|
||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
auto refStartTime = static_cast<uint64_t>(timestamp.globalStartTS * resolution + (1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
|
||||
auto refStartTime = static_cast<uint64_t>(timestamp.globalStartTS * resolution) + static_cast<uint64_t>((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution);
|
||||
EXPECT_EQ(start, refStartTime);
|
||||
|
||||
event.timeStampNode = nullptr;
|
||||
|
|
|
@ -625,6 +625,50 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTests, givenRawTimestampsDebugModeWhen
|
|||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(EventProfilingTests, givenSubmitTimeMuchGreaterThanQueueTimeWhenCalculatingStartTimeThenItIsGreaterThanSubmitTime) {
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockContext context(device.get());
|
||||
MockCommandQueue cmdQ(&context, device.get(), nullptr, false);
|
||||
cmdQ.setProfilingEnabled();
|
||||
|
||||
HwTimeStamps timestamp{};
|
||||
timestamp.globalStartTS = 10;
|
||||
timestamp.contextStartTS = 20;
|
||||
timestamp.globalEndTS = 80;
|
||||
timestamp.contextEndTS = 56;
|
||||
|
||||
MockTagNode<HwTimeStamps> timestampNode{};
|
||||
timestampNode.tagForCpuAccess = ×tamp;
|
||||
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
cl_event clEvent = &event;
|
||||
|
||||
event.queueTimeStamp.cpuTimeinNS = 1;
|
||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||
|
||||
event.submitTimeStamp.cpuTimeinNS = (1ull << 33) + 3;
|
||||
event.submitTimeStamp.gpuTimeStamp = (1ull << 33) + 4;
|
||||
|
||||
event.timeStampNode = ×tampNode;
|
||||
|
||||
cl_ulong queued = 0ul;
|
||||
cl_ulong submited = 0ul;
|
||||
cl_ulong start = 0ul;
|
||||
cl_ulong end = 0ul;
|
||||
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr);
|
||||
|
||||
EXPECT_LT(0ull, queued);
|
||||
EXPECT_LT(queued, submited);
|
||||
EXPECT_LT(submited, start);
|
||||
EXPECT_LT(start, end);
|
||||
|
||||
event.timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenRawTimestampsDebugModeWhenStartTimeStampLTQueueTimeStampThenIncreaseStartTimeStamp) {
|
||||
DebugManagerStateRestore stateRestore;
|
||||
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
||||
|
|
Loading…
Reference in New Issue