From 789cb3327bc3862f61a368f28bed6c51032eb405 Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Wed, 12 Sep 2018 11:17:36 +0200 Subject: [PATCH] Add support for returning raw GPU timestamps via registry flag. Change-Id: Id80ef2a95132f1cdc1d14e45d406925b11599db1 --- Jenkinsfile | 2 +- runtime/event/event.cpp | 16 +++++- runtime/os_interface/DebugVariables_base.inl | 1 + unit_tests/mocks/mock_event.h | 2 + unit_tests/profiling/profiling_tests.cpp | 53 ++++++++++++++++++++ unit_tests/test_files/igdrcl.config | 3 +- 6 files changed, 74 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 521f30da90..0655976868 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,4 @@ #!groovy neoDependenciesRev='798076-1088' strategy='EQUAL' -allowedCD=272 +allowedCD=270 diff --git a/runtime/event/event.cpp b/runtime/event/event.cpp index ae6b8a080a..e9d02692c1 100644 --- a/runtime/event/event.cpp +++ b/runtime/event/event.cpp @@ -180,11 +180,18 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName, switch (paramName) { case CL_PROFILING_COMMAND_QUEUED: src = &queueTimeStamp.CPUTimeinNS; + if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { + src = &queueTimeStamp.GPUTimeStamp; + } + srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_SUBMIT: src = &submitTimeStamp.CPUTimeinNS; + if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { + src = &submitTimeStamp.GPUTimeStamp; + } srcSize = sizeof(cl_ulong); break; @@ -291,8 +298,16 @@ bool Event::calcProfilingData() { cpuDuration = static_cast(gpuDuration * frequency); cpuCompleteDuration = static_cast(gpuCompleteDuration * frequency); startTimeStamp = static_cast(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0; + endTimeStamp = startTimeStamp + cpuDuration; completeTimeStamp = startTimeStamp + cpuCompleteDuration; + + if (DebugManager.flags.ReturnRawGpuTimestamps.get()) { + startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS; + endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS; + completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS; + } + dataCalculated = true; } return dataCalculated; @@ -385,7 +400,6 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) { } auto childEventRef = childEventsToNotify.detachNodes(); - while (childEventRef != nullptr) { auto childEvent = childEventRef->ref; diff --git a/runtime/os_interface/DebugVariables_base.inl b/runtime/os_interface/DebugVariables_base.inl index 8da6643e03..d49a7c91f9 100644 --- a/runtime/os_interface/DebugVariables_base.inl +++ b/runtime/os_interface/DebugVariables_base.inl @@ -110,3 +110,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont o DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id") DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") DECLARE_DEBUG_VARIABLE(bool, EnableTimestampPacket, false, "Write Timestamp Packet for each set of gpu walkers") +DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.") diff --git a/unit_tests/mocks/mock_event.h b/unit_tests/mocks/mock_event.h index 3a6256cbf6..67ffbf5973 100644 --- a/unit_tests/mocks/mock_event.h +++ b/unit_tests/mocks/mock_event.h @@ -47,6 +47,8 @@ struct MockEvent : public BaseEventType { using BaseEventType::timeStampNode; using Event::magic; + using Event::queueTimeStamp; + using Event::submitTimeStamp; }; #undef FORWARD_CONSTRUCTOR diff --git a/unit_tests/profiling/profiling_tests.cpp b/unit_tests/profiling/profiling_tests.cpp index d2d3033b72..56e55c493d 100644 --- a/unit_tests/profiling/profiling_tests.cpp +++ b/unit_tests/profiling/profiling_tests.cpp @@ -31,6 +31,7 @@ #include "runtime/utilities/tag_allocator.h" #include "unit_tests/command_queue/command_enqueue_fixture.h" +#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_context.h" @@ -462,6 +463,58 @@ TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEn cmdQ.device = nullptr; } +TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) { + DebugManagerStateRestore stateRestore; + DebugManager.flags.ReturnRawGpuTimestamps.set(1); + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(nullptr)); + MyOSTime::instanceNum = 0; + device->setOSTime(new MyOSTime()); + EXPECT_EQ(1, MyOSTime::instanceNum); + MockContext context; + cl_command_queue_properties props[5] = {0, 0, 0, 0, 0}; + MockCommandQueue cmdQ(&context, device.get(), props); + cmdQ.setProfilingEnabled(); + cmdQ.device = device.get(); + + HwTimeStamps timestamp; + timestamp.GlobalStartTS = 10; + timestamp.ContextStartTS = 20; + timestamp.GlobalEndTS = 80; + timestamp.ContextEndTS = 56; + timestamp.GlobalCompleteTS = 0; + timestamp.ContextCompleteTS = 70; + + MockTagNode timestampNode; + timestampNode.tag = ×tamp; + + MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); + cl_event clEvent = &event; + + event.queueTimeStamp.CPUTimeinNS = 1; + event.queueTimeStamp.GPUTimeStamp = 2; + + event.submitTimeStamp.CPUTimeinNS = 3; + event.submitTimeStamp.GPUTimeStamp = 4; + + event.setCPUProfilingPath(false); + event.timeStampNode = ×tampNode; + event.calcProfilingData(); + + cl_ulong queued, submited, start, end, complete; + + clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr); + clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr); + clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); + clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr); + clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr); + + EXPECT_EQ(timestamp.ContextCompleteTS, complete); + EXPECT_EQ(timestamp.ContextEndTS, end); + EXPECT_EQ(timestamp.ContextStartTS, start); + EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited); + EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued); +} + struct ProfilingWithPerfCountersTests : public ProfilingTests, public PerformanceCountersFixture { void SetUp() override { diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index aa7a68945c..449f2299d5 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -80,4 +80,5 @@ RebuildPrecompiledKernels = false CreateMultipleDevices = 0 EnableExperimentalCommandBuffer = 0 LoopAtPlatformInitialize = false -EnableTimestampPacket = false \ No newline at end of file +EnableTimestampPacket = false +ReturnRawGpuTimestamps = 0 \ No newline at end of file