Add profiling calculation from timestamp packets

Change-Id: Ie7f8c703ca5ea5eb1f5207871ef94cbc7ece18b7
Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
This commit is contained in:
Jobczyk, Lukasz 2019-01-21 11:44:56 +01:00 committed by sys_ocldev
parent 3fe78d263b
commit c1cb1f9be6
7 changed files with 175 additions and 47 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -251,49 +251,76 @@ cl_ulong Event::getDelta(cl_ulong startTime,
}
bool Event::calcProfilingData() {
if (!dataCalculated && !profilingCpuPath) {
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
const auto timestamps = timestampPacketContainer->peekNodes();
uint64_t contextStartTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::ContextStart);
uint64_t contextEndTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::ContextEnd);
uint64_t globalStartTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::GlobalStart);
for (const auto &timestamp : timestamps) {
if (timestamp->tag->getData(TimestampPacket::DataIndex::ContextStart) < contextStartTS) {
contextStartTS = timestamp->tag->getData(TimestampPacket::DataIndex::ContextStart);
}
if (timestamp->tag->getData(TimestampPacket::DataIndex::ContextEnd) > contextEndTS) {
contextEndTS = timestamp->tag->getData(TimestampPacket::DataIndex::ContextEnd);
}
if (timestamp->tag->getData(TimestampPacket::DataIndex::GlobalStart) < globalStartTS) {
globalStartTS = timestamp->tag->getData(TimestampPacket::DataIndex::GlobalStart);
}
}
calcProfilingData(contextStartTS, contextEndTS, &contextEndTS, globalStartTS);
} else if (timeStampNode) {
calcProfilingData(
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextStartTS,
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextEndTS,
&(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextCompleteTS,
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->GlobalStartTS);
}
}
return dataCalculated;
}
void Event::calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
uint64_t gpuDuration = 0;
uint64_t cpuDuration = 0;
uint64_t gpuCompleteDuration = 0;
uint64_t cpuCompleteDuration = 0;
int64_t c0 = 0;
if (!dataCalculated && timeStampNode && !profilingCpuPath) {
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
to determine the value of the const we can use one pair of values
const = CpuTimeQueue - GpuTimeQueue * scalar
*/
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
int64_t c0 = queueTimeStamp.CPUTimeinNS - static_cast<uint64_t>(queueTimeStamp.GPUTimeStamp * frequency);
/* calculation based on equation
CpuTime = GpuTime * scalar + const( == c0)
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
to determine the value of the const we can use one pair of values
const = CpuTimeQueue - GpuTimeQueue * scalar
*/
//If device enqueue has not updated complete timestamp, assign end timestamp
if (((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS == 0)
((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
c0 = queueTimeStamp.CPUTimeinNS - static_cast<uint64_t>(queueTimeStamp.GPUTimeStamp * frequency);
gpuDuration = getDelta(
((HwTimeStamps *)timeStampNode->tag)->ContextStartTS,
((HwTimeStamps *)timeStampNode->tag)->ContextEndTS);
gpuCompleteDuration = getDelta(
((HwTimeStamps *)timeStampNode->tag)->ContextStartTS,
((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS);
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
startTimeStamp = static_cast<uint64_t>(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0;
endTimeStamp = startTimeStamp + cpuDuration;
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS;
endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS;
}
dataCalculated = true;
//If device enqueue has not updated complete timestamp, assign end timestamp
gpuDuration = getDelta(contextStartTS, contextEndTS);
if (*contextCompleteTS == 0) {
*contextCompleteTS = contextEndTS;
gpuCompleteDuration = gpuDuration;
} else {
gpuCompleteDuration = getDelta(contextStartTS, *contextCompleteTS);
}
return dataCalculated;
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
endTimeStamp = startTimeStamp + cpuDuration;
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
startTimeStamp = contextStartTS;
endTimeStamp = contextEndTS;
completeTimeStamp = *contextCompleteTS;
}
dataCalculated = true;
}
inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -93,7 +93,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp);
cl_ulong getDelta(cl_ulong startTime,
cl_ulong endTime);
bool calcProfilingData();
void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
bool isCPUProfilingPath() const {
return profilingCpuPath;
@ -321,6 +320,9 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
}
}
bool calcProfilingData();
MOCKABLE_VIRTUAL void calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
// executes all callbacks associated with this event
void executeCallbacks(int32_t executionStatus);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Intel Corporation
* Copyright (C) 2018-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -56,6 +56,10 @@ class TimestampPacket {
return reinterpret_cast<uint64_t>(&data[index]);
}
uint32_t getData(DataIndex operationType) const {
return data[static_cast<uint32_t>(operationType)];
}
void initialize() {
for (auto index = 0u; index < data.size(); index++) {
data[index] = 1;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -11,6 +11,7 @@
#include "runtime/event/user_event.h"
#include "unit_tests/api/cl_api_tests.h"
#include "unit_tests/fixtures/device_instrumentation_fixture.h"
#include "unit_tests/mocks/mock_event.h"
#include "unit_tests/os_interface/mock_performance_counters.h"
#include "test.h"
@ -216,7 +217,7 @@ TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenP
}
TEST(clGetEventProfilingInfo, WHENCalcProfilingDataTHENFalse) {
Event *pEvent = new Event(nullptr, 0, 0, 0);
auto *pEvent = new MockEvent<Event>(nullptr, 0, 0, 0);
EXPECT_FALSE(pEvent->calcProfilingData());
delete pEvent;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -99,4 +99,21 @@ struct MyEvent : public Event {
uint64_t getCompleteTimeStamp() {
return this->completeTimeStamp;
}
uint64_t getGlobalStartTimestamp() const {
return this->globalStartTimestamp;
}
bool getDataCalcStatus() const {
return this->dataCalculated;
}
void calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) override {
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
globalStartTimestamp = globalStartTS;
}
Event::calcProfilingData(contextStartTS, contextEndTS, contextCompleteTS, globalStartTS);
}
uint64_t globalStartTimestamp;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -31,9 +31,11 @@ struct MockEvent : public BaseEventType {
FORWARD_FUNC(submitCommand, BaseEventType);
using BaseEventType::timeStampNode;
using Event::calcProfilingData;
using Event::magic;
using Event::queueTimeStamp;
using Event::submitTimeStamp;
using Event::timestampPacketContainer;
};
#undef FORWARD_CONSTRUCTOR

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -16,6 +16,7 @@
#include "runtime/utilities/tag_allocator.h"
#include "unit_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/event/event_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/mocks/mock_command_queue.h"
@ -24,6 +25,7 @@
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_program.h"
#include "unit_tests/os_interface/mock_performance_counters.h"
#include "unit_tests/utilities/base_object_utils.h"
#include "test.h"
namespace OCLRT {
@ -181,7 +183,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfolingWHENWa
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation());
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
clReleaseEvent(event);
}
@ -662,7 +664,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
auto itorAfterReportPerf = find<MI_REPORT_PERF_COUNT *>(itorGPGPUWalkerCmd, cmdList.end());
ASSERT_NE(cmdList.end(), itorAfterReportPerf);
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
clReleaseEvent(event);
@ -725,7 +727,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
auto itorAfterReportPerf = find<MI_REPORT_PERF_COUNT *>(itorGPGPUWalkerCmd, cmdList.end());
ASSERT_NE(cmdList.end(), itorAfterReportPerf);
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
clReleaseEvent(event);
@ -853,4 +855,77 @@ HWTEST_F(ProfilingWithPerfCountersTests,
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
}
struct MockTimestampPacketContainer : public TimestampPacketContainer {
~MockTimestampPacketContainer() override {
for (const auto &node : timestampPacketNodes) {
delete node->tag;
delete node;
}
timestampPacketNodes.clear();
}
};
struct ProfilingTimestampPacketsTest : public ::testing::Test {
void SetUp() override {
DebugManager.flags.ReturnRawGpuTimestamps.set(true);
cmdQ->setProfilingEnabled();
ev->timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>();
}
void addTimestampNode(int contextStart, int contextEnd, int globalStart) {
auto timestampPacket = new TimestampPacket();
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)) = contextStart;
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd)) = contextEnd;
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::GlobalStart)) = globalStart;
auto node = new MockTagNode<TimestampPacket>();
node->tag = timestampPacket;
ev->timestampPacketContainer->add(node);
}
DebugManagerStateRestore restorer;
MockContext context;
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
ReleaseableObjectPtr<MockCommandQueue> cmdQ = clUniquePtr(new MockCommandQueue(&context, context.getDevice(0), props));
ReleaseableObjectPtr<MockEvent<MyEvent>> ev = clUniquePtr(new MockEvent<MyEvent>(cmdQ.get(), CL_COMMAND_USER, Event::eventNotReady, Event::eventNotReady));
};
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneElementAndTimestampNodeWhenCalculatingProfilingThenTimesAreTakenFromPacket) {
addTimestampNode(10, 11, 12);
HwTimeStamps hwTimestamps;
hwTimestamps.ContextStartTS = 100;
hwTimestamps.ContextEndTS = 110;
hwTimestamps.GlobalStartTS = 120;
MockTagNode<HwTimeStamps> hwTimestampsNode;
hwTimestampsNode.tag = &hwTimestamps;
ev->timeStampNode = &hwTimestampsNode;
ev->calcProfilingData();
EXPECT_EQ(10u, ev->getStartTimeStamp());
EXPECT_EQ(11u, ev->getEndTimeStamp());
EXPECT_EQ(12u, ev->getGlobalStartTimestamp());
ev->timeStampNode = nullptr;
}
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithThreeElementsWhenCalculatingProfilingThenTimesAreTakenFromProperPacket) {
addTimestampNode(10, 11, 12);
addTimestampNode(1, 21, 22);
addTimestampNode(5, 31, 2);
ev->calcProfilingData();
EXPECT_EQ(1u, ev->getStartTimeStamp());
EXPECT_EQ(31u, ev->getEndTimeStamp());
EXPECT_EQ(2u, ev->getGlobalStartTimestamp());
}
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithZeroElementsWhenCalculatingProfilingThenDataIsNotCalculated) {
EXPECT_EQ(0u, ev->timestampPacketContainer->peekNodes().size());
ev->calcProfilingData();
EXPECT_FALSE(ev->getDataCalcStatus());
}
} // namespace OCLRT