Add support for returning raw GPU timestamps via registry flag.
Change-Id: Id80ef2a95132f1cdc1d14e45d406925b11599db1
This commit is contained in:
parent
58c34fd72c
commit
789cb3327b
|
@ -1,4 +1,4 @@
|
|||
#!groovy
|
||||
neoDependenciesRev='798076-1088'
|
||||
strategy='EQUAL'
|
||||
allowedCD=272
|
||||
allowedCD=270
|
||||
|
|
|
@ -180,11 +180,18 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
|||
switch (paramName) {
|
||||
case CL_PROFILING_COMMAND_QUEUED:
|
||||
src = &queueTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &queueTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
case CL_PROFILING_COMMAND_SUBMIT:
|
||||
src = &submitTimeStamp.CPUTimeinNS;
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
src = &submitTimeStamp.GPUTimeStamp;
|
||||
}
|
||||
srcSize = sizeof(cl_ulong);
|
||||
break;
|
||||
|
||||
|
@ -291,8 +298,16 @@ bool Event::calcProfilingData() {
|
|||
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
||||
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0;
|
||||
|
||||
endTimeStamp = startTimeStamp + cpuDuration;
|
||||
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
||||
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS;
|
||||
endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
|
||||
completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS;
|
||||
}
|
||||
|
||||
dataCalculated = true;
|
||||
}
|
||||
return dataCalculated;
|
||||
|
@ -385,7 +400,6 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
|
|||
}
|
||||
|
||||
auto childEventRef = childEventsToNotify.detachNodes();
|
||||
|
||||
while (childEventRef != nullptr) {
|
||||
auto childEvent = childEventRef->ref;
|
||||
|
||||
|
|
|
@ -110,3 +110,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont o
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableTimestampPacket, false, "Write Timestamp Packet for each set of gpu walkers")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.")
|
||||
|
|
|
@ -47,6 +47,8 @@ struct MockEvent : public BaseEventType {
|
|||
|
||||
using BaseEventType::timeStampNode;
|
||||
using Event::magic;
|
||||
using Event::queueTimeStamp;
|
||||
using Event::submitTimeStamp;
|
||||
};
|
||||
|
||||
#undef FORWARD_CONSTRUCTOR
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#include "unit_tests/command_queue/command_enqueue_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
#include "unit_tests/mocks/mock_context.h"
|
||||
|
@ -462,6 +463,58 @@ TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEn
|
|||
cmdQ.device = nullptr;
|
||||
}
|
||||
|
||||
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) {
|
||||
DebugManagerStateRestore stateRestore;
|
||||
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
|
||||
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MyOSTime::instanceNum = 0;
|
||||
device->setOSTime(new MyOSTime());
|
||||
EXPECT_EQ(1, MyOSTime::instanceNum);
|
||||
MockContext context;
|
||||
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
|
||||
MockCommandQueue cmdQ(&context, device.get(), props);
|
||||
cmdQ.setProfilingEnabled();
|
||||
cmdQ.device = device.get();
|
||||
|
||||
HwTimeStamps timestamp;
|
||||
timestamp.GlobalStartTS = 10;
|
||||
timestamp.ContextStartTS = 20;
|
||||
timestamp.GlobalEndTS = 80;
|
||||
timestamp.ContextEndTS = 56;
|
||||
timestamp.GlobalCompleteTS = 0;
|
||||
timestamp.ContextCompleteTS = 70;
|
||||
|
||||
MockTagNode<HwTimeStamps> timestampNode;
|
||||
timestampNode.tag = ×tamp;
|
||||
|
||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||
cl_event clEvent = &event;
|
||||
|
||||
event.queueTimeStamp.CPUTimeinNS = 1;
|
||||
event.queueTimeStamp.GPUTimeStamp = 2;
|
||||
|
||||
event.submitTimeStamp.CPUTimeinNS = 3;
|
||||
event.submitTimeStamp.GPUTimeStamp = 4;
|
||||
|
||||
event.setCPUProfilingPath(false);
|
||||
event.timeStampNode = ×tampNode;
|
||||
event.calcProfilingData();
|
||||
|
||||
cl_ulong queued, submited, start, end, complete;
|
||||
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr);
|
||||
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr);
|
||||
|
||||
EXPECT_EQ(timestamp.ContextCompleteTS, complete);
|
||||
EXPECT_EQ(timestamp.ContextEndTS, end);
|
||||
EXPECT_EQ(timestamp.ContextStartTS, start);
|
||||
EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited);
|
||||
EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued);
|
||||
}
|
||||
|
||||
struct ProfilingWithPerfCountersTests : public ProfilingTests,
|
||||
public PerformanceCountersFixture {
|
||||
void SetUp() override {
|
||||
|
|
|
@ -80,4 +80,5 @@ RebuildPrecompiledKernels = false
|
|||
CreateMultipleDevices = 0
|
||||
EnableExperimentalCommandBuffer = 0
|
||||
LoopAtPlatformInitialize = false
|
||||
EnableTimestampPacket = false
|
||||
EnableTimestampPacket = false
|
||||
ReturnRawGpuTimestamps = 0
|
Loading…
Reference in New Issue