Add support for returning raw GPU timestamps via registry flag.

Change-Id: Id80ef2a95132f1cdc1d14e45d406925b11599db1
This commit is contained in:
Mrozek, Michal 2018-09-12 11:17:36 +02:00 committed by sys_ocldev
parent 58c34fd72c
commit 789cb3327b
6 changed files with 74 additions and 3 deletions

2
Jenkinsfile vendored
View File

@ -1,4 +1,4 @@
#!groovy
neoDependenciesRev='798076-1088'
strategy='EQUAL'
allowedCD=272
allowedCD=270

View File

@ -180,11 +180,18 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
switch (paramName) {
case CL_PROFILING_COMMAND_QUEUED:
src = &queueTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &queueTimeStamp.GPUTimeStamp;
}
srcSize = sizeof(cl_ulong);
break;
case CL_PROFILING_COMMAND_SUBMIT:
src = &submitTimeStamp.CPUTimeinNS;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
src = &submitTimeStamp.GPUTimeStamp;
}
srcSize = sizeof(cl_ulong);
break;
@ -291,8 +298,16 @@ bool Event::calcProfilingData() {
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
startTimeStamp = static_cast<uint64_t>(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0;
endTimeStamp = startTimeStamp + cpuDuration;
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS;
endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS;
}
dataCalculated = true;
}
return dataCalculated;
@ -385,7 +400,6 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
}
auto childEventRef = childEventsToNotify.detachNodes();
while (childEventRef != nullptr) {
auto childEvent = childEventRef->ref;

View File

@ -110,3 +110,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont o
DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id")
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
DECLARE_DEBUG_VARIABLE(bool, EnableTimestampPacket, false, "Write Timestamp Packet for each set of gpu walkers")
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.")

View File

@ -47,6 +47,8 @@ struct MockEvent : public BaseEventType {
using BaseEventType::timeStampNode;
using Event::magic;
using Event::queueTimeStamp;
using Event::submitTimeStamp;
};
#undef FORWARD_CONSTRUCTOR

View File

@ -31,6 +31,7 @@
#include "runtime/utilities/tag_allocator.h"
#include "unit_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
@ -462,6 +463,58 @@ TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEn
cmdQ.device = nullptr;
}
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MyOSTime::instanceNum = 0;
device->setOSTime(new MyOSTime());
EXPECT_EQ(1, MyOSTime::instanceNum);
MockContext context;
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
MockCommandQueue cmdQ(&context, device.get(), props);
cmdQ.setProfilingEnabled();
cmdQ.device = device.get();
HwTimeStamps timestamp;
timestamp.GlobalStartTS = 10;
timestamp.ContextStartTS = 20;
timestamp.GlobalEndTS = 80;
timestamp.ContextEndTS = 56;
timestamp.GlobalCompleteTS = 0;
timestamp.ContextCompleteTS = 70;
MockTagNode<HwTimeStamps> timestampNode;
timestampNode.tag = &timestamp;
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
cl_event clEvent = &event;
event.queueTimeStamp.CPUTimeinNS = 1;
event.queueTimeStamp.GPUTimeStamp = 2;
event.submitTimeStamp.CPUTimeinNS = 3;
event.submitTimeStamp.GPUTimeStamp = 4;
event.setCPUProfilingPath(false);
event.timeStampNode = &timestampNode;
event.calcProfilingData();
cl_ulong queued, submited, start, end, complete;
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr);
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr);
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr);
EXPECT_EQ(timestamp.ContextCompleteTS, complete);
EXPECT_EQ(timestamp.ContextEndTS, end);
EXPECT_EQ(timestamp.ContextStartTS, start);
EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited);
EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued);
}
struct ProfilingWithPerfCountersTests : public ProfilingTests,
public PerformanceCountersFixture {
void SetUp() override {

View File

@ -80,4 +80,5 @@ RebuildPrecompiledKernels = false
CreateMultipleDevices = 0
EnableExperimentalCommandBuffer = 0
LoopAtPlatformInitialize = false
EnableTimestampPacket = false
EnableTimestampPacket = false
ReturnRawGpuTimestamps = 0