Add profiling calculation from timestamp packets
Change-Id: Ie7f8c703ca5ea5eb1f5207871ef94cbc7ece18b7 Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
3fe78d263b
commit
c1cb1f9be6
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -251,49 +251,76 @@ cl_ulong Event::getDelta(cl_ulong startTime,
|
|||
}
|
||||
|
||||
bool Event::calcProfilingData() {
|
||||
if (!dataCalculated && !profilingCpuPath) {
|
||||
if (timestampPacketContainer && timestampPacketContainer->peekNodes().size() > 0) {
|
||||
const auto timestamps = timestampPacketContainer->peekNodes();
|
||||
|
||||
uint64_t contextStartTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::ContextStart);
|
||||
uint64_t contextEndTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::ContextEnd);
|
||||
uint64_t globalStartTS = timestamps[0]->tag->getData(TimestampPacket::DataIndex::GlobalStart);
|
||||
|
||||
for (const auto ×tamp : timestamps) {
|
||||
if (timestamp->tag->getData(TimestampPacket::DataIndex::ContextStart) < contextStartTS) {
|
||||
contextStartTS = timestamp->tag->getData(TimestampPacket::DataIndex::ContextStart);
|
||||
}
|
||||
if (timestamp->tag->getData(TimestampPacket::DataIndex::ContextEnd) > contextEndTS) {
|
||||
contextEndTS = timestamp->tag->getData(TimestampPacket::DataIndex::ContextEnd);
|
||||
}
|
||||
if (timestamp->tag->getData(TimestampPacket::DataIndex::GlobalStart) < globalStartTS) {
|
||||
globalStartTS = timestamp->tag->getData(TimestampPacket::DataIndex::GlobalStart);
|
||||
}
|
||||
}
|
||||
calcProfilingData(contextStartTS, contextEndTS, &contextEndTS, globalStartTS);
|
||||
} else if (timeStampNode) {
|
||||
calcProfilingData(
|
||||
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextStartTS,
|
||||
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextEndTS,
|
||||
&(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->ContextCompleteTS,
|
||||
(reinterpret_cast<HwTimeStamps *>(timeStampNode->tag))->GlobalStartTS);
|
||||
}
|
||||
}
|
||||
return dataCalculated;
|
||||
}
|
||||
|
||||
void Event::calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
|
||||
|
||||
uint64_t gpuDuration = 0;
|
||||
uint64_t cpuDuration = 0;
|
||||
|
||||
uint64_t gpuCompleteDuration = 0;
|
||||
uint64_t cpuCompleteDuration = 0;
|
||||
|
||||
int64_t c0 = 0;
|
||||
if (!dataCalculated && timeStampNode && !profilingCpuPath) {
|
||||
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
/* calculation based on equation
|
||||
CpuTime = GpuTime * scalar + const( == c0)
|
||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||
to determine the value of the const we can use one pair of values
|
||||
const = CpuTimeQueue - GpuTimeQueue * scalar
|
||||
*/
|
||||
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||
int64_t c0 = queueTimeStamp.CPUTimeinNS - static_cast<uint64_t>(queueTimeStamp.GPUTimeStamp * frequency);
|
||||
/* calculation based on equation
|
||||
CpuTime = GpuTime * scalar + const( == c0)
|
||||
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)
|
||||
to determine the value of the const we can use one pair of values
|
||||
const = CpuTimeQueue - GpuTimeQueue * scalar
|
||||
*/
|
||||
|
||||
//If device enqueue has not updated complete timestamp, assign end timestamp
|
||||
if (((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS == 0)
|
||||
((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
|
||||
|
||||
c0 = queueTimeStamp.CPUTimeinNS - static_cast<uint64_t>(queueTimeStamp.GPUTimeStamp * frequency);
|
||||
gpuDuration = getDelta(
|
||||
((HwTimeStamps *)timeStampNode->tag)->ContextStartTS,
|
||||
((HwTimeStamps *)timeStampNode->tag)->ContextEndTS);
|
||||
gpuCompleteDuration = getDelta(
|
||||
((HwTimeStamps *)timeStampNode->tag)->ContextStartTS,
|
||||
((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS);
|
||||
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
||||
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
||||
startTimeStamp = static_cast<uint64_t>(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0;
|
||||
|
||||
endTimeStamp = startTimeStamp + cpuDuration;
|
||||
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
||||
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS;
|
||||
endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
|
||||
completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS;
|
||||
}
|
||||
|
||||
dataCalculated = true;
|
||||
//If device enqueue has not updated complete timestamp, assign end timestamp
|
||||
gpuDuration = getDelta(contextStartTS, contextEndTS);
|
||||
if (*contextCompleteTS == 0) {
|
||||
*contextCompleteTS = contextEndTS;
|
||||
gpuCompleteDuration = gpuDuration;
|
||||
} else {
|
||||
gpuCompleteDuration = getDelta(contextStartTS, *contextCompleteTS);
|
||||
}
|
||||
return dataCalculated;
|
||||
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
|
||||
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
|
||||
|
||||
startTimeStamp = static_cast<uint64_t>(globalStartTS * frequency) + c0;
|
||||
endTimeStamp = startTimeStamp + cpuDuration;
|
||||
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
|
||||
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
startTimeStamp = contextStartTS;
|
||||
endTimeStamp = contextEndTS;
|
||||
completeTimeStamp = *contextCompleteTS;
|
||||
}
|
||||
|
||||
dataCalculated = true;
|
||||
}
|
||||
|
||||
inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -93,7 +93,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp);
|
||||
cl_ulong getDelta(cl_ulong startTime,
|
||||
cl_ulong endTime);
|
||||
bool calcProfilingData();
|
||||
void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
|
||||
bool isCPUProfilingPath() const {
|
||||
return profilingCpuPath;
|
||||
|
@ -321,6 +320,9 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
|||
}
|
||||
}
|
||||
|
||||
bool calcProfilingData();
|
||||
MOCKABLE_VIRTUAL void calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
|
||||
|
||||
// executes all callbacks associated with this event
|
||||
void executeCallbacks(int32_t executionStatus);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018 Intel Corporation
|
||||
* Copyright (C) 2018-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -56,6 +56,10 @@ class TimestampPacket {
|
|||
return reinterpret_cast<uint64_t>(&data[index]);
|
||||
}
|
||||
|
||||
uint32_t getData(DataIndex operationType) const {
|
||||
return data[static_cast<uint32_t>(operationType)];
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
for (auto index = 0u; index < data.size(); index++) {
|
||||
data[index] = 1;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -11,6 +11,7 @@
|
|||
#include "runtime/event/user_event.h"
|
||||
#include "unit_tests/api/cl_api_tests.h"
|
||||
#include "unit_tests/fixtures/device_instrumentation_fixture.h"
|
||||
#include "unit_tests/mocks/mock_event.h"
|
||||
#include "unit_tests/os_interface/mock_performance_counters.h"
|
||||
#include "test.h"
|
||||
|
||||
|
@ -216,7 +217,7 @@ TEST(clGetEventProfilingInfo, givenTimestampThatOverlapWhenGetDeltaIsCalledThenP
|
|||
}
|
||||
|
||||
TEST(clGetEventProfilingInfo, WHENCalcProfilingDataTHENFalse) {
|
||||
Event *pEvent = new Event(nullptr, 0, 0, 0);
|
||||
auto *pEvent = new MockEvent<Event>(nullptr, 0, 0, 0);
|
||||
EXPECT_FALSE(pEvent->calcProfilingData());
|
||||
delete pEvent;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -99,4 +99,21 @@ struct MyEvent : public Event {
|
|||
uint64_t getCompleteTimeStamp() {
|
||||
return this->completeTimeStamp;
|
||||
}
|
||||
|
||||
uint64_t getGlobalStartTimestamp() const {
|
||||
return this->globalStartTimestamp;
|
||||
}
|
||||
|
||||
bool getDataCalcStatus() const {
|
||||
return this->dataCalculated;
|
||||
}
|
||||
|
||||
void calcProfilingData(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) override {
|
||||
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
|
||||
globalStartTimestamp = globalStartTS;
|
||||
}
|
||||
Event::calcProfilingData(contextStartTS, contextEndTS, contextCompleteTS, globalStartTS);
|
||||
}
|
||||
|
||||
uint64_t globalStartTimestamp;
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -31,9 +31,11 @@ struct MockEvent : public BaseEventType {
|
|||
FORWARD_FUNC(submitCommand, BaseEventType);
|
||||
|
||||
using BaseEventType::timeStampNode;
|
||||
using Event::calcProfilingData;
|
||||
using Event::magic;
|
||||
using Event::queueTimeStamp;
|
||||
using Event::submitTimeStamp;
|
||||
using Event::timestampPacketContainer;
|
||||
};
|
||||
|
||||
#undef FORWARD_CONSTRUCTOR
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -16,6 +16,7 @@
|
|||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#include "unit_tests/command_queue/command_enqueue_fixture.h"
|
||||
#include "unit_tests/event/event_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
|
@ -24,6 +25,7 @@
|
|||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
#include "unit_tests/mocks/mock_program.h"
|
||||
#include "unit_tests/os_interface/mock_performance_counters.h"
|
||||
#include "unit_tests/utilities/base_object_utils.h"
|
||||
#include "test.h"
|
||||
|
||||
namespace OCLRT {
|
||||
|
@ -181,7 +183,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GIVENCommandQueueWithProfolingWHENWa
|
|||
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pBeforePC->getPostSyncOperation());
|
||||
|
||||
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
|
||||
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
|
@ -662,7 +664,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
|
|||
auto itorAfterReportPerf = find<MI_REPORT_PERF_COUNT *>(itorGPGPUWalkerCmd, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorAfterReportPerf);
|
||||
|
||||
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
|
||||
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
|
||||
|
||||
clReleaseEvent(event);
|
||||
|
||||
|
@ -725,7 +727,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWit
|
|||
auto itorAfterReportPerf = find<MI_REPORT_PERF_COUNT *>(itorGPGPUWalkerCmd, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorAfterReportPerf);
|
||||
|
||||
EXPECT_TRUE(static_cast<Event *>(event)->calcProfilingData());
|
||||
EXPECT_TRUE(static_cast<MockEvent<Event> *>(event)->calcProfilingData());
|
||||
|
||||
clReleaseEvent(event);
|
||||
|
||||
|
@ -853,4 +855,77 @@ HWTEST_F(ProfilingWithPerfCountersTests,
|
|||
|
||||
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
||||
}
|
||||
|
||||
struct MockTimestampPacketContainer : public TimestampPacketContainer {
|
||||
~MockTimestampPacketContainer() override {
|
||||
for (const auto &node : timestampPacketNodes) {
|
||||
delete node->tag;
|
||||
delete node;
|
||||
}
|
||||
timestampPacketNodes.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct ProfilingTimestampPacketsTest : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.ReturnRawGpuTimestamps.set(true);
|
||||
cmdQ->setProfilingEnabled();
|
||||
ev->timestampPacketContainer = std::make_unique<MockTimestampPacketContainer>();
|
||||
}
|
||||
|
||||
void addTimestampNode(int contextStart, int contextEnd, int globalStart) {
|
||||
auto timestampPacket = new TimestampPacket();
|
||||
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextStart)) = contextStart;
|
||||
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd)) = contextEnd;
|
||||
*reinterpret_cast<uint32_t *>(timestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::GlobalStart)) = globalStart;
|
||||
auto node = new MockTagNode<TimestampPacket>();
|
||||
node->tag = timestampPacket;
|
||||
ev->timestampPacketContainer->add(node);
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
MockContext context;
|
||||
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
|
||||
ReleaseableObjectPtr<MockCommandQueue> cmdQ = clUniquePtr(new MockCommandQueue(&context, context.getDevice(0), props));
|
||||
ReleaseableObjectPtr<MockEvent<MyEvent>> ev = clUniquePtr(new MockEvent<MyEvent>(cmdQ.get(), CL_COMMAND_USER, Event::eventNotReady, Event::eventNotReady));
|
||||
};
|
||||
|
||||
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithOneElementAndTimestampNodeWhenCalculatingProfilingThenTimesAreTakenFromPacket) {
|
||||
addTimestampNode(10, 11, 12);
|
||||
|
||||
HwTimeStamps hwTimestamps;
|
||||
hwTimestamps.ContextStartTS = 100;
|
||||
hwTimestamps.ContextEndTS = 110;
|
||||
hwTimestamps.GlobalStartTS = 120;
|
||||
MockTagNode<HwTimeStamps> hwTimestampsNode;
|
||||
hwTimestampsNode.tag = &hwTimestamps;
|
||||
ev->timeStampNode = &hwTimestampsNode;
|
||||
|
||||
ev->calcProfilingData();
|
||||
|
||||
EXPECT_EQ(10u, ev->getStartTimeStamp());
|
||||
EXPECT_EQ(11u, ev->getEndTimeStamp());
|
||||
EXPECT_EQ(12u, ev->getGlobalStartTimestamp());
|
||||
|
||||
ev->timeStampNode = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithThreeElementsWhenCalculatingProfilingThenTimesAreTakenFromProperPacket) {
|
||||
addTimestampNode(10, 11, 12);
|
||||
addTimestampNode(1, 21, 22);
|
||||
addTimestampNode(5, 31, 2);
|
||||
|
||||
ev->calcProfilingData();
|
||||
|
||||
EXPECT_EQ(1u, ev->getStartTimeStamp());
|
||||
EXPECT_EQ(31u, ev->getEndTimeStamp());
|
||||
EXPECT_EQ(2u, ev->getGlobalStartTimestamp());
|
||||
}
|
||||
|
||||
TEST_F(ProfilingTimestampPacketsTest, givenTimestampsPacketContainerWithZeroElementsWhenCalculatingProfilingThenDataIsNotCalculated) {
|
||||
EXPECT_EQ(0u, ev->timestampPacketContainer->peekNodes().size());
|
||||
ev->calcProfilingData();
|
||||
|
||||
EXPECT_FALSE(ev->getDataCalcStatus());
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
|
Loading…
Reference in New Issue