performance: Reuse GPU timestamp instead of KMD escape
This can be enabled only if related debug flag will be set. Related-To: NEO-10615 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
parent
c1004b77bf
commit
83e8ae4a20
|
@ -1048,7 +1048,7 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
|
||||||
|
|
||||||
ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
|
ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
|
||||||
NEO::TimeStampData queueTimeStamp;
|
NEO::TimeStampData queueTimeStamp;
|
||||||
bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp);
|
bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp, true);
|
||||||
if (!retVal)
|
if (!retVal)
|
||||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||||
|
|
||||||
|
|
|
@ -489,7 +489,7 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
|
||||||
const auto recalculate =
|
const auto recalculate =
|
||||||
(currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
(currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
|
||||||
if (referenceTs.cpuTimeinNS == 0 || recalculate) {
|
if (referenceTs.cpuTimeinNS == 0 || recalculate) {
|
||||||
device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs);
|
device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -163,12 +163,14 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
||||||
// if paramValue is NULL, it is ignored
|
// if paramValue is NULL, it is ignored
|
||||||
switch (paramName) {
|
switch (paramName) {
|
||||||
case CL_PROFILING_COMMAND_QUEUED:
|
case CL_PROFILING_COMMAND_QUEUED:
|
||||||
|
calcProfilingData();
|
||||||
timestamp = getProfilingInfoData(queueTimeStamp);
|
timestamp = getProfilingInfoData(queueTimeStamp);
|
||||||
src = ×tamp;
|
src = ×tamp;
|
||||||
srcSize = sizeof(cl_ulong);
|
srcSize = sizeof(cl_ulong);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_PROFILING_COMMAND_SUBMIT:
|
case CL_PROFILING_COMMAND_SUBMIT:
|
||||||
|
calcProfilingData();
|
||||||
timestamp = getProfilingInfoData(submitTimeStamp);
|
timestamp = getProfilingInfoData(submitTimeStamp);
|
||||||
src = ×tamp;
|
src = ×tamp;
|
||||||
srcSize = sizeof(cl_ulong);
|
srcSize = sizeof(cl_ulong);
|
||||||
|
@ -365,15 +367,55 @@ bool Event::calcProfilingData() {
|
||||||
return dataCalculated;
|
return dataCalculated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Event::updateTimestamp(ProfilingInfo ×tamp, uint64_t newGpuTimestamp) const {
|
||||||
|
auto &device = this->cmdQueue->getDevice();
|
||||||
|
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||||
|
auto resolution = device.getDeviceInfo().profilingTimerResolution;
|
||||||
|
timestamp.gpuTimeStamp = newGpuTimestamp;
|
||||||
|
timestamp.gpuTimeInNs = gfxCoreHelper.getGpuTimeStampInNS(timestamp.gpuTimeStamp, resolution);
|
||||||
|
timestamp.cpuTimeInNs = timestamp.gpuTimeInNs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Timestamp returned from GPU is initially 32 bits. This method performs XOR with
|
||||||
|
* other timestamp that tracks overflows, so passed timestamp will have correct overflow bits
|
||||||
|
*
|
||||||
|
* @param[out] timestamp Overflow bits will be added to this timestamp
|
||||||
|
* @param[in] timestampWithOverflow Timestamp that tracks overflows in remaining 32 most significant bits
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void Event::addOverflowToTimestamp(uint64_t ×tamp, uint64_t timestampWithOverflow) const {
|
||||||
|
auto &device = this->cmdQueue->getDevice();
|
||||||
|
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||||
|
timestamp |= timestampWithOverflow & (maxNBitValue(64) - maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()));
|
||||||
|
}
|
||||||
|
|
||||||
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
|
void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
|
||||||
auto &device = this->cmdQueue->getDevice();
|
auto &device = this->cmdQueue->getDevice();
|
||||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||||
auto resolution = device.getDeviceInfo().profilingTimerResolution;
|
auto resolution = device.getDeviceInfo().profilingTimerResolution;
|
||||||
|
|
||||||
startTimeStamp.gpuTimeStamp = globalStartTS;
|
startTimeStamp.gpuTimeStamp = globalStartTS;
|
||||||
while (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) {
|
addOverflowToTimestamp(startTimeStamp.gpuTimeStamp, submitTimeStamp.gpuTimeStamp);
|
||||||
startTimeStamp.gpuTimeStamp += static_cast<uint64_t>(1ULL << gfxCoreHelper.getGlobalTimeStampBits());
|
if (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) {
|
||||||
|
auto diff = submitTimeStamp.gpuTimeStamp - startTimeStamp.gpuTimeStamp;
|
||||||
|
auto diffInNS = gfxCoreHelper.getGpuTimeStampInNS(diff, resolution);
|
||||||
|
auto osTime = device.getOSTime();
|
||||||
|
if (diffInNS < osTime->getTimestampRefreshTimeout()) {
|
||||||
|
auto alignedSubmitTimestamp = startTimeStamp.gpuTimeStamp - 1;
|
||||||
|
auto alignedQueueTimestamp = startTimeStamp.gpuTimeStamp - 2;
|
||||||
|
if (startTimeStamp.gpuTimeStamp <= 2) {
|
||||||
|
alignedSubmitTimestamp = 0;
|
||||||
|
alignedQueueTimestamp = 0;
|
||||||
|
}
|
||||||
|
updateTimestamp(submitTimeStamp, alignedSubmitTimestamp);
|
||||||
|
updateTimestamp(queueTimeStamp, alignedQueueTimestamp);
|
||||||
|
osTime->setRefreshTimestampsFlag();
|
||||||
|
} else {
|
||||||
|
startTimeStamp.gpuTimeStamp += static_cast<uint64_t>(1ULL << gfxCoreHelper.getGlobalTimeStampBits());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
UNRECOVERABLE_IF(startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp);
|
||||||
auto gpuTicksDiff = startTimeStamp.gpuTimeStamp - submitTimeStamp.gpuTimeStamp;
|
auto gpuTicksDiff = startTimeStamp.gpuTimeStamp - submitTimeStamp.gpuTimeStamp;
|
||||||
auto timeDiff = static_cast<uint64_t>(gpuTicksDiff * resolution);
|
auto timeDiff = static_cast<uint64_t>(gpuTicksDiff * resolution);
|
||||||
startTimeStamp.cpuTimeInNs = submitTimeStamp.cpuTimeInNs + timeDiff;
|
startTimeStamp.cpuTimeInNs = submitTimeStamp.cpuTimeInNs + timeDiff;
|
||||||
|
@ -614,8 +656,8 @@ void Event::submitCommand(bool abortTasks) {
|
||||||
this->setSubmitTimeStamp();
|
this->setSubmitTimeStamp();
|
||||||
if (profilingCpuPath) {
|
if (profilingCpuPath) {
|
||||||
setStartTimeStamp();
|
setStartTimeStamp();
|
||||||
} else {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (perfCountersEnabled && perfCounterNode) {
|
if (perfCountersEnabled && perfCounterNode) {
|
||||||
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
|
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -360,6 +360,9 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||||
bool isWaitForTimestampsEnabled() const;
|
bool isWaitForTimestampsEnabled() const;
|
||||||
bool areTimestampsCompleted();
|
bool areTimestampsCompleted();
|
||||||
|
|
||||||
|
void updateTimestamp(ProfilingInfo ×tamp, uint64_t newGpuTimestamp) const;
|
||||||
|
void addOverflowToTimestamp(uint64_t ×tamp, uint64_t timestampWithOverflow) const;
|
||||||
|
|
||||||
bool currentCmdQVirtualEvent = false;
|
bool currentCmdQVirtualEvent = false;
|
||||||
std::atomic<Command *> cmdToSubmit{nullptr};
|
std::atomic<Command *> cmdToSubmit{nullptr};
|
||||||
std::atomic<Command *> submittedCmd{nullptr};
|
std::atomic<Command *> submittedCmd{nullptr};
|
||||||
|
|
|
@ -25,7 +25,9 @@ TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) {
|
||||||
cl_ulong hostTimestamp[2] = {0, 0};
|
cl_ulong hostTimestamp[2] = {0, 0};
|
||||||
|
|
||||||
auto mDev = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
|
auto mDev = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
|
||||||
mDev->setOSTime(new MockOSTime());
|
auto osTime = new MockOSTime();
|
||||||
|
osTime->setDeviceTimerResolution(mDev->getHardwareInfo());
|
||||||
|
mDev->setOSTime(osTime);
|
||||||
|
|
||||||
mDev->getDeviceAndHostTimer(
|
mDev->getDeviceAndHostTimer(
|
||||||
&deviceTimestamp[0],
|
&deviceTimestamp[0],
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -889,11 +889,12 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||||
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
|
MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
|
||||||
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||||
|
auto osTime = pClDevice->getDevice().getOSTime();
|
||||||
|
|
||||||
HwTimeStamps timestamp{};
|
HwTimeStamps timestamp{};
|
||||||
timestamp.globalStartTS = 3;
|
timestamp.globalStartTS = 3;
|
||||||
event.queueTimeStamp.gpuTimeStamp = 2;
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
event.submitTimeStamp.gpuTimeStamp = 4;
|
event.submitTimeStamp.gpuTimeStamp = osTime->getTimestampRefreshTimeout() + 4;
|
||||||
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||||
TagNode<HwTimeStamps> timestampNode{};
|
TagNode<HwTimeStamps> timestampNode{};
|
||||||
timestampNode.tagForCpuAccess = ×tamp;
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
@ -909,6 +910,76 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
|
||||||
event.timeStampNode = nullptr;
|
event.timeStampNode = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitWhenCalculateStartTimestampThenAdjustTimestmaps) {
|
||||||
|
DebugManagerStateRestore dbgRestore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
|
||||||
|
MockContext context{};
|
||||||
|
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||||
|
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
|
||||||
|
MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
|
||||||
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
|
auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||||
|
|
||||||
|
HwTimeStamps timestamp{};
|
||||||
|
timestamp.globalStartTS = 3;
|
||||||
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
|
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||||
|
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||||
|
TagNode<HwTimeStamps> timestampNode{};
|
||||||
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
event.timeStampNode = ×tampNode;
|
||||||
|
|
||||||
|
uint64_t start = 0u;
|
||||||
|
uint64_t submit = 0u;
|
||||||
|
uint64_t queue = 0u;
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
|
||||||
|
EXPECT_EQ(submit, static_cast<uint64_t>((timestamp.globalStartTS - 1) * resolution));
|
||||||
|
EXPECT_EQ(queue, static_cast<uint64_t>((timestamp.globalStartTS - 2) * resolution));
|
||||||
|
|
||||||
|
event.timeStampNode = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitAndStartTSBelowOneWhenCalculateStartTimestampThenAdjustTimestmaps) {
|
||||||
|
DebugManagerStateRestore dbgRestore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
|
||||||
|
MockContext context{};
|
||||||
|
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||||
|
|
||||||
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
|
||||||
|
MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
|
||||||
|
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
|
||||||
|
auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
|
||||||
|
|
||||||
|
HwTimeStamps timestamp{};
|
||||||
|
timestamp.globalStartTS = 2;
|
||||||
|
event.queueTimeStamp.gpuTimeStamp = 2;
|
||||||
|
event.submitTimeStamp.gpuTimeStamp = 4;
|
||||||
|
event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
|
||||||
|
TagNode<HwTimeStamps> timestampNode{};
|
||||||
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
event.timeStampNode = ×tampNode;
|
||||||
|
|
||||||
|
uint64_t start = 0u;
|
||||||
|
uint64_t submit = 0u;
|
||||||
|
uint64_t queue = 0u;
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
|
||||||
|
event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
|
||||||
|
EXPECT_EQ(submit, 0ul);
|
||||||
|
EXPECT_EQ(queue, 0ul);
|
||||||
|
|
||||||
|
event.timeStampNode = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) {
|
TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) {
|
||||||
MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false);
|
MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false);
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2023 Intel Corporation
|
* Copyright (C) 2018-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -61,6 +61,58 @@ struct ProfilingTests : public CommandEnqueueFixture,
|
||||||
uint32_t crossThreadData[32];
|
uint32_t crossThreadData[32];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename TagType>
|
||||||
|
struct MockTagNode : public TagNode<TagType> {
|
||||||
|
public:
|
||||||
|
using TagNode<TagType>::tagForCpuAccess;
|
||||||
|
using TagNode<TagType>::gfxAllocation;
|
||||||
|
MockTagNode() {
|
||||||
|
gfxAllocation = nullptr;
|
||||||
|
tagForCpuAccess = nullptr;
|
||||||
|
}
|
||||||
|
void returnTag() {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class MyOSDeviceTime : public DeviceTime {
|
||||||
|
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class MyOSTime : public OSTime {
|
||||||
|
public:
|
||||||
|
static int instanceNum;
|
||||||
|
MyOSTime() {
|
||||||
|
instanceNum++;
|
||||||
|
this->deviceTime = std::make_unique<MyOSDeviceTime>();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getCpuTime(uint64_t *timeStamp) override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
double getHostTimerResolution() const override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint64_t getCpuRawTimestamp() override {
|
||||||
|
EXPECT_FALSE(true);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int MyOSTime::instanceNum = 0;
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) {
|
||||||
typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM;
|
typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM;
|
||||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||||
|
@ -442,7 +494,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath)
|
||||||
cl_event event;
|
cl_event event;
|
||||||
pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event);
|
pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event);
|
||||||
auto eventObj = static_cast<Event *>(event);
|
auto eventObj = static_cast<Event *>(event);
|
||||||
EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_FALSE);
|
EXPECT_FALSE(eventObj->isCPUProfilingPath());
|
||||||
pCmdQ->finish();
|
pCmdQ->finish();
|
||||||
|
|
||||||
uint64_t queued, submit;
|
uint64_t queued, submit;
|
||||||
|
@ -455,6 +507,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath)
|
||||||
|
|
||||||
EXPECT_LT(0u, queued);
|
EXPECT_LT(0u, queued);
|
||||||
EXPECT_LT(queued, submit);
|
EXPECT_LT(queued, submit);
|
||||||
|
|
||||||
eventObj->release();
|
eventObj->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,7 +527,17 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
|
||||||
|
|
||||||
uint64_t queued = 0u, submit = 0u;
|
uint64_t queued = 0u, submit = 0u;
|
||||||
cl_int retVal;
|
cl_int retVal;
|
||||||
|
HwTimeStamps timestamp;
|
||||||
|
timestamp.globalStartTS = 10;
|
||||||
|
timestamp.contextStartTS = 10;
|
||||||
|
timestamp.globalEndTS = 80;
|
||||||
|
timestamp.contextEndTS = 80;
|
||||||
|
MockTagNode<HwTimeStamps> timestampNode;
|
||||||
|
timestampNode.tagForCpuAccess = ×tamp;
|
||||||
|
static_cast<MockEvent<Event> *>(eventObj)->timeStampNode = ×tampNode;
|
||||||
|
if (eventObj->getTimestampPacketNodes()) {
|
||||||
|
eventObj->getTimestampPacketNodes()->releaseNodes();
|
||||||
|
}
|
||||||
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0);
|
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0);
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0);
|
retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0);
|
||||||
|
@ -483,60 +546,11 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
|
||||||
EXPECT_LT(0u, queued);
|
EXPECT_LT(0u, queued);
|
||||||
EXPECT_LT(queued, submit);
|
EXPECT_LT(queued, submit);
|
||||||
|
|
||||||
|
static_cast<MockEvent<Event> *>(eventObj)->timeStampNode = nullptr;
|
||||||
eventObj->release();
|
eventObj->release();
|
||||||
userEventObj->release();
|
userEventObj->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename TagType>
|
|
||||||
struct MockTagNode : public TagNode<TagType> {
|
|
||||||
public:
|
|
||||||
using TagNode<TagType>::tagForCpuAccess;
|
|
||||||
using TagNode<TagType>::gfxAllocation;
|
|
||||||
MockTagNode() {
|
|
||||||
gfxAllocation = nullptr;
|
|
||||||
tagForCpuAccess = nullptr;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class MyOSDeviceTime : public DeviceTime {
|
|
||||||
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return 1.0;
|
|
||||||
}
|
|
||||||
uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class MyOSTime : public OSTime {
|
|
||||||
public:
|
|
||||||
static int instanceNum;
|
|
||||||
MyOSTime() {
|
|
||||||
instanceNum++;
|
|
||||||
this->deviceTime = std::make_unique<MyOSDeviceTime>();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool getCpuTime(uint64_t *timeStamp) override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
double getHostTimerResolution() const override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
uint64_t getCpuRawTimestamp() override {
|
|
||||||
EXPECT_FALSE(true);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int MyOSTime::instanceNum = 0;
|
|
||||||
|
|
||||||
using EventProfilingTest = ProfilingTests;
|
using EventProfilingTest = ProfilingTests;
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
||||||
|
|
|
@ -346,6 +346,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
|
||||||
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
|
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
|
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
|
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
|
||||||
|
DECLARE_DEBUG_VARIABLE(bool, EnableReusingGpuTimestamps, false, "When enabled, GPU timestamp will be reused for next device time requests")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
|
DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
|
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
|
||||||
|
|
|
@ -652,7 +652,7 @@ EngineControl &Device::getEngine(uint32_t index) {
|
||||||
|
|
||||||
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
|
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
|
||||||
TimeStampData timeStamp;
|
TimeStampData timeStamp;
|
||||||
auto retVal = getOSTime()->getGpuCpuTime(&timeStamp);
|
auto retVal = getOSTime()->getGpuCpuTime(&timeStamp, true);
|
||||||
if (retVal) {
|
if (retVal) {
|
||||||
*hostTimestamp = timeStamp.cpuTimeinNS;
|
*hostTimestamp = timeStamp.cpuTimeinNS;
|
||||||
if (debugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
if (debugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
||||||
|
|
|
@ -124,6 +124,7 @@ void RootDeviceEnvironment::initGmm() {
|
||||||
void RootDeviceEnvironment::initOsTime() {
|
void RootDeviceEnvironment::initOsTime() {
|
||||||
if (!osTime) {
|
if (!osTime) {
|
||||||
osTime = OSTime::create(osInterface.get());
|
osTime = OSTime::create(osInterface.get());
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
|
|
||||||
#include "shared/source/os_interface/os_time.h"
|
#include "shared/source/os_interface/os_time.h"
|
||||||
|
|
||||||
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||||
|
#include "shared/source/helpers/debug_helpers.h"
|
||||||
#include "shared/source/helpers/hw_info.h"
|
#include "shared/source/helpers/hw_info.h"
|
||||||
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
@ -17,6 +19,13 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) {
|
||||||
return hwInfo.capabilityTable.defaultProfilingTimerResolution;
|
return hwInfo.capabilityTable.defaultProfilingTimerResolution;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
DeviceTime::DeviceTime() {
|
||||||
|
reusingTimestampsEnabled = debugManager.flags.EnableReusingGpuTimestamps.get();
|
||||||
|
if (reusingTimestampsEnabled) {
|
||||||
|
timestampRefreshTimeoutNS = NSEC_PER_MSEC * 100; // 100ms
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
||||||
pGpuCpuTime->cpuTimeinNS = 0;
|
pGpuCpuTime->cpuTimeinNS = 0;
|
||||||
pGpuCpuTime->gpuTimeStamp = 0;
|
pGpuCpuTime->gpuTimeStamp = 0;
|
||||||
|
@ -31,8 +40,63 @@ uint64_t DeviceTime::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) cons
|
||||||
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
|
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) {
|
void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) {
|
||||||
if (!getGpuCpuTimeImpl(pGpuCpuTime, osTime)) {
|
deviceTimerResolution = getDynamicDeviceTimerResolution(hwInfo);
|
||||||
|
if (debugManager.flags.OverrideProfilingTimerResolution.get() != -1) {
|
||||||
|
deviceTimerResolution = static_cast<double>(debugManager.flags.OverrideProfilingTimerResolution.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief If this method is called within interval, GPU timestamp
|
||||||
|
* will be calculated based on CPU timestamp and previous GPU ticks
|
||||||
|
* to reduce amount of internal KMD calls. Interval is selected
|
||||||
|
* adaptively, based on misalignment between calculated ticks and actual ticks.
|
||||||
|
*
|
||||||
|
* @return returns false if internal call to KMD failed. True otherwise.
|
||||||
|
*/
|
||||||
|
bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall) {
|
||||||
|
uint64_t cpuTimeinNS;
|
||||||
|
osTime->getCpuTime(&cpuTimeinNS);
|
||||||
|
|
||||||
|
auto cpuTimeDiffInNS = cpuTimeinNS - fetchedTimestamps.cpuTimeinNS;
|
||||||
|
if (forceKmdCall || cpuTimeDiffInNS >= timestampRefreshTimeoutNS) {
|
||||||
|
refreshTimestamps = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!reusingTimestampsEnabled || refreshTimestamps) {
|
||||||
|
if (!getGpuCpuTimeImpl(timeStamp, osTime)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!reusingTimestampsEnabled) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (initialGpuTimeStamp) {
|
||||||
|
UNRECOVERABLE_IF(deviceTimerResolution == 0);
|
||||||
|
auto calculatedTimestamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
|
||||||
|
auto diff = abs(static_cast<int64_t>(timeStamp->gpuTimeStamp - calculatedTimestamp));
|
||||||
|
auto elapsedTicks = timeStamp->gpuTimeStamp - fetchedTimestamps.gpuTimeStamp;
|
||||||
|
int64_t adaptValue = static_cast<int64_t>(diff * deviceTimerResolution);
|
||||||
|
adaptValue = std::min(adaptValue, static_cast<int64_t>(timestampRefreshMinTimeoutNS));
|
||||||
|
if (diff * 1.0f / elapsedTicks > 0.05) {
|
||||||
|
adaptValue = adaptValue * (-1);
|
||||||
|
}
|
||||||
|
timestampRefreshTimeoutNS += adaptValue;
|
||||||
|
timestampRefreshTimeoutNS = std::max(timestampRefreshMinTimeoutNS, std::min(timestampRefreshMaxTimeoutNS, timestampRefreshTimeoutNS));
|
||||||
|
}
|
||||||
|
fetchedTimestamps = *timeStamp;
|
||||||
|
refreshTimestamps = false;
|
||||||
|
} else {
|
||||||
|
timeStamp->cpuTimeinNS = cpuTimeinNS;
|
||||||
|
UNRECOVERABLE_IF(deviceTimerResolution == 0);
|
||||||
|
timeStamp->gpuTimeStamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall) {
|
||||||
|
if (!getGpuCpuTimestamps(pGpuCpuTime, osTime, forceKmdCall)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
#define NSEC_PER_SEC (1000000000ULL)
|
#define NSEC_PER_SEC (1000000000ULL)
|
||||||
|
#define NSEC_PER_MSEC (NSEC_PER_SEC / 1000)
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
class OSInterface;
|
class OSInterface;
|
||||||
|
@ -25,15 +25,32 @@ class OSTime;
|
||||||
|
|
||||||
class DeviceTime {
|
class DeviceTime {
|
||||||
public:
|
public:
|
||||||
|
DeviceTime();
|
||||||
virtual ~DeviceTime() = default;
|
virtual ~DeviceTime() = default;
|
||||||
bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime);
|
bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall);
|
||||||
virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime);
|
virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime);
|
||||||
virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const;
|
virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const;
|
||||||
virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const;
|
virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const;
|
||||||
|
bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall);
|
||||||
|
void setDeviceTimerResolution(HardwareInfo const &hwInfo);
|
||||||
|
void setRefreshTimestampsFlag() {
|
||||||
|
refreshTimestamps = true;
|
||||||
|
}
|
||||||
|
uint64_t getTimestampRefreshTimeout() const {
|
||||||
|
return timestampRefreshTimeoutNS;
|
||||||
|
};
|
||||||
|
|
||||||
std::optional<uint64_t> initialGpuTimeStamp{};
|
std::optional<uint64_t> initialGpuTimeStamp{};
|
||||||
bool waitingForGpuTimeStampOverflow = false;
|
bool waitingForGpuTimeStampOverflow = false;
|
||||||
uint64_t gpuTimeStampOverflowCounter = 0;
|
uint64_t gpuTimeStampOverflowCounter = 0;
|
||||||
|
|
||||||
|
double deviceTimerResolution = 0;
|
||||||
|
const uint64_t timestampRefreshMinTimeoutNS = NSEC_PER_MSEC; // 1ms
|
||||||
|
const uint64_t timestampRefreshMaxTimeoutNS = NSEC_PER_SEC; // 1s
|
||||||
|
uint64_t timestampRefreshTimeoutNS = 0;
|
||||||
|
bool refreshTimestamps = true;
|
||||||
|
bool reusingTimestampsEnabled = false;
|
||||||
|
TimeStampData fetchedTimestamps{};
|
||||||
};
|
};
|
||||||
|
|
||||||
class OSTime {
|
class OSTime {
|
||||||
|
@ -47,8 +64,13 @@ class OSTime {
|
||||||
virtual uint64_t getCpuRawTimestamp();
|
virtual uint64_t getCpuRawTimestamp();
|
||||||
|
|
||||||
static double getDeviceTimerResolution(HardwareInfo const &hwInfo);
|
static double getDeviceTimerResolution(HardwareInfo const &hwInfo);
|
||||||
|
|
||||||
|
bool getGpuCpuTime(TimeStampData *gpuCpuTime, bool forceKmdCall) {
|
||||||
|
return deviceTime->getGpuCpuTime(gpuCpuTime, this, forceKmdCall);
|
||||||
|
}
|
||||||
|
|
||||||
bool getGpuCpuTime(TimeStampData *gpuCpuTime) {
|
bool getGpuCpuTime(TimeStampData *gpuCpuTime) {
|
||||||
return deviceTime->getGpuCpuTime(gpuCpuTime, this);
|
return deviceTime->getGpuCpuTime(gpuCpuTime, this, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const {
|
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const {
|
||||||
|
@ -61,6 +83,18 @@ class OSTime {
|
||||||
|
|
||||||
uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; }
|
uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; }
|
||||||
|
|
||||||
|
void setDeviceTimerResolution(HardwareInfo const &hwInfo) const {
|
||||||
|
deviceTime->setDeviceTimerResolution(hwInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setRefreshTimestampsFlag() const {
|
||||||
|
deviceTime->setRefreshTimestampsFlag();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t getTimestampRefreshTimeout() const {
|
||||||
|
return deviceTime->getTimestampRefreshTimeout();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
OSTime() = default;
|
OSTime() = default;
|
||||||
OSInterface *osInterface = nullptr;
|
OSInterface *osInterface = nullptr;
|
||||||
|
|
|
@ -18,15 +18,28 @@ class MockDeviceTimeDrm : public DeviceTimeDrm {
|
||||||
using DeviceTimeDrm::pDrm;
|
using DeviceTimeDrm::pDrm;
|
||||||
|
|
||||||
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
||||||
|
getGpuCpuTimeImplCalled++;
|
||||||
if (callBaseGetGpuCpuTimeImpl) {
|
if (callBaseGetGpuCpuTimeImpl) {
|
||||||
return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime);
|
return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime);
|
||||||
}
|
}
|
||||||
*pGpuCpuTime = gpuCpuTimeValue;
|
*pGpuCpuTime = gpuCpuTimeValue;
|
||||||
return getGpuCpuTimeImplResult;
|
return getGpuCpuTimeImplResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||||
|
if (callGetDynamicDeviceTimerResolution) {
|
||||||
|
return DeviceTimeDrm::getDynamicDeviceTimerResolution(hwInfo);
|
||||||
|
}
|
||||||
|
return dynamicDeviceTimerResolutionValue;
|
||||||
|
}
|
||||||
|
|
||||||
bool callBaseGetGpuCpuTimeImpl = true;
|
bool callBaseGetGpuCpuTimeImpl = true;
|
||||||
bool getGpuCpuTimeImplResult = true;
|
bool getGpuCpuTimeImplResult = true;
|
||||||
TimeStampData gpuCpuTimeValue{};
|
TimeStampData gpuCpuTimeValue{};
|
||||||
|
uint32_t getGpuCpuTimeImplCalled = 0;
|
||||||
|
|
||||||
|
bool callGetDynamicDeviceTimerResolution = false;
|
||||||
|
double dynamicDeviceTimerResolutionValue = 1.0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MockOSTimeLinux : public OSTimeLinux {
|
class MockOSTimeLinux : public OSTimeLinux {
|
||||||
|
|
|
@ -50,11 +50,11 @@ const char *MockDevice::getProductAbbrev() const {
|
||||||
MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex)
|
MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex)
|
||||||
: RootDevice(executionEnvironment, rootDeviceIndex) {
|
: RootDevice(executionEnvironment, rootDeviceIndex) {
|
||||||
UltDeviceFactory::initializeMemoryManager(*executionEnvironment);
|
UltDeviceFactory::initializeMemoryManager(*executionEnvironment);
|
||||||
|
auto &hwInfo = getHardwareInfo();
|
||||||
if (!getOSTime()) {
|
if (!getOSTime()) {
|
||||||
getRootDeviceEnvironmentRef().osTime = MockOSTime::create();
|
getRootDeviceEnvironmentRef().osTime = MockOSTime::create();
|
||||||
|
getRootDeviceEnvironmentRef().osTime->setDeviceTimerResolution(hwInfo);
|
||||||
}
|
}
|
||||||
auto &hwInfo = getHardwareInfo();
|
|
||||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo);
|
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo);
|
||||||
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm();
|
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm();
|
||||||
if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) {
|
if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) {
|
||||||
|
|
|
@ -600,4 +600,5 @@ ForceSynchronizedDispatchMode = -1
|
||||||
DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
|
DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
|
||||||
ReadOnlyAllocationsTypeMask = 0
|
ReadOnlyAllocationsTypeMask = 0
|
||||||
EnableLogLevel = 6
|
EnableLogLevel = 6
|
||||||
|
EnableReusingGpuTimestamps = 0
|
||||||
# Please don't edit below this line
|
# Please don't edit below this line
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
||||||
#include "shared/source/os_interface/linux/os_time_linux.h"
|
#include "shared/source/os_interface/linux/os_time_linux.h"
|
||||||
#include "shared/source/os_interface/os_interface.h"
|
#include "shared/source/os_interface/os_interface.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/common/mocks/linux/mock_os_time_linux.h"
|
#include "shared/test/common/mocks/linux/mock_os_time_linux.h"
|
||||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
|
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
|
||||||
|
@ -18,7 +19,7 @@
|
||||||
|
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
|
|
||||||
static int actualTime = 0;
|
static uint64_t actualTime = 0;
|
||||||
|
|
||||||
int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() {
|
int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -48,6 +49,8 @@ struct DrmTimeTest : public ::testing::Test {
|
||||||
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||||
osTime->setResolutionFunc(resolutionFuncTrue);
|
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||||
osTime->setGetTimeFunc(getTimeFuncTrue);
|
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getMutableHardwareInfo();
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
deviceTime = osTime->getDeviceTime();
|
deviceTime = osTime->getDeviceTime();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,7 +205,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultRes
|
||||||
|
|
||||||
drm->getParamRetValue = 0;
|
drm->getParamRetValue = 0;
|
||||||
drm->ioctlRes = -1;
|
drm->ioctlRes = -1;
|
||||||
|
deviceTime->callGetDynamicDeviceTimerResolution = true;
|
||||||
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
||||||
EXPECT_DOUBLE_EQ(result, defaultResolution);
|
EXPECT_DOUBLE_EQ(result, defaultResolution);
|
||||||
}
|
}
|
||||||
|
@ -239,7 +242,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectR
|
||||||
// 19200000 is frequency yelding 52.083ns resolution
|
// 19200000 is frequency yelding 52.083ns resolution
|
||||||
drm->getParamRetValue = 19200000;
|
drm->getParamRetValue = 19200000;
|
||||||
drm->ioctlRes = 0;
|
drm->ioctlRes = 0;
|
||||||
|
deviceTime->callGetDynamicDeviceTimerResolution = true;
|
||||||
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
|
||||||
EXPECT_DOUBLE_EQ(result, 52.08333333333333);
|
EXPECT_DOUBLE_EQ(result, 52.08333333333333);
|
||||||
}
|
}
|
||||||
|
@ -282,3 +285,118 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsReturne
|
||||||
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
|
||||||
|
// Recreate mock to apply debug flag
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||||
|
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||||
|
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
auto deviceTime = osTime->getDeviceTime();
|
||||||
|
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
|
||||||
|
auto cpuTimeBefore = actualTime;
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
|
||||||
|
auto cpuTimeAfter = actualTime;
|
||||||
|
|
||||||
|
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
|
||||||
|
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
|
||||||
|
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
|
||||||
|
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
|
||||||
|
// Recreate mock to apply debug flag
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||||
|
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||||
|
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
auto deviceTime = osTime->getDeviceTime();
|
||||||
|
deviceTime->callBaseGetGpuCpuTimeImpl = false;
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
|
||||||
|
const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100;
|
||||||
|
EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout());
|
||||||
|
|
||||||
|
auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) {
|
||||||
|
actualTime = cpuTimeNS;
|
||||||
|
deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS;
|
||||||
|
deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp;
|
||||||
|
};
|
||||||
|
setTimestamps(0, 0ull, 0ull);
|
||||||
|
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
// Error is smaller than 5%, timeout can be increased
|
||||||
|
auto newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout();
|
||||||
|
setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10);
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
|
||||||
|
auto diff = (gpuCpuTime.gpuTimeStamp - actualTime);
|
||||||
|
EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout());
|
||||||
|
EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS);
|
||||||
|
|
||||||
|
// Error is larger than 5%, timeout should be decreased
|
||||||
|
newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout() + 10;
|
||||||
|
setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2);
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u);
|
||||||
|
|
||||||
|
EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) {
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
osTime->setRefreshTimestampsFlag();
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) {
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime, true);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(0);
|
||||||
|
// Recreate mock to apply debug flag
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
|
||||||
|
osTime->setResolutionFunc(resolutionFuncTrue);
|
||||||
|
osTime->setGetTimeFunc(getTimeFuncTrue);
|
||||||
|
EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout());
|
||||||
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
#include "shared/source/execution_environment/root_device_environment.h"
|
#include "shared/source/execution_environment/root_device_environment.h"
|
||||||
#include "shared/source/os_interface/os_interface.h"
|
#include "shared/source/os_interface/os_interface.h"
|
||||||
|
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
#include "shared/test/common/mocks/mock_ostime.h"
|
#include "shared/test/common/mocks/mock_ostime.h"
|
||||||
#include "shared/test/common/mocks/windows/mock_os_time_win.h"
|
#include "shared/test/common/mocks/windows/mock_os_time_win.h"
|
||||||
|
@ -29,10 +30,19 @@ BOOL WINAPI queryPerformanceCounterMock(
|
||||||
class MockDeviceTimeWin : public MockDeviceTime {
|
class MockDeviceTimeWin : public MockDeviceTime {
|
||||||
public:
|
public:
|
||||||
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
|
||||||
|
getGpuCpuTimeImplCalled++;
|
||||||
*pGpuCpuTime = gpuCpuTimeValue;
|
*pGpuCpuTime = gpuCpuTimeValue;
|
||||||
return true;
|
return getGpuCpuTimeImplResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
|
||||||
|
return deviceTimerResolution;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getGpuCpuTimeImplResult = true;
|
||||||
TimeStampData gpuCpuTimeValue{};
|
TimeStampData gpuCpuTimeValue{};
|
||||||
|
uint32_t getGpuCpuTimeImplCalled = 0;
|
||||||
|
double deviceTimerResolution = 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OSTimeWinTest : public ::testing::Test {
|
struct OSTimeWinTest : public ::testing::Test {
|
||||||
|
@ -196,3 +206,156 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur
|
||||||
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
|
||||||
|
LARGE_INTEGER frequency = {};
|
||||||
|
frequency.QuadPart = NSEC_PER_SEC;
|
||||||
|
osTime->setFrequency(frequency);
|
||||||
|
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
deviceTime->gpuCpuTimeValue = {1u, 1u};
|
||||||
|
valueToSet.QuadPart = 1;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
|
||||||
|
auto cpuTimeBefore = gpuCpuTime.cpuTimeinNS;
|
||||||
|
valueToSet.QuadPart = 5;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
|
||||||
|
auto cpuTimeAfter = gpuCpuTime.cpuTimeinNS;
|
||||||
|
|
||||||
|
auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
|
||||||
|
|
||||||
|
auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
|
||||||
|
auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
|
||||||
|
EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
|
||||||
|
LARGE_INTEGER frequency = {};
|
||||||
|
frequency.QuadPart = NSEC_PER_SEC;
|
||||||
|
osTime->setFrequency(frequency);
|
||||||
|
|
||||||
|
// Recreate mock to apply debug flag
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
|
||||||
|
const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100;
|
||||||
|
EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout());
|
||||||
|
|
||||||
|
auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) {
|
||||||
|
valueToSet.QuadPart = cpuTimeNS;
|
||||||
|
deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS;
|
||||||
|
deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp;
|
||||||
|
};
|
||||||
|
setTimestamps(0, 0ull, 0ull);
|
||||||
|
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
// Error is smaller than 5%, timeout can be increased
|
||||||
|
auto newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout();
|
||||||
|
setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10);
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
|
||||||
|
auto diff = (gpuCpuTime.gpuTimeStamp - valueToSet.QuadPart);
|
||||||
|
EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout());
|
||||||
|
EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS);
|
||||||
|
|
||||||
|
// Error is larger than 5%, timeout should be decreased
|
||||||
|
newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout() + 10;
|
||||||
|
setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2);
|
||||||
|
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u);
|
||||||
|
|
||||||
|
EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) {
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
deviceTime->getGpuCpuTimeImplResult = false;
|
||||||
|
EXPECT_FALSE(osTime->getGpuCpuTime(&gpuCpuTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
deviceTime->gpuCpuTimeValue = {1u, 1u};
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
osTime->setRefreshTimestampsFlag();
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(true);
|
||||||
|
osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
|
||||||
|
LARGE_INTEGER frequency = {};
|
||||||
|
frequency.QuadPart = NSEC_PER_SEC;
|
||||||
|
osTime->setFrequency(frequency);
|
||||||
|
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
|
||||||
|
TimeStampData gpuCpuTime;
|
||||||
|
deviceTime->gpuCpuTimeValue = {1u, 1u};
|
||||||
|
valueToSet.QuadPart = 1;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
|
||||||
|
|
||||||
|
valueToSet.QuadPart = 5;
|
||||||
|
osTime->getGpuCpuTime(&gpuCpuTime, true);
|
||||||
|
EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) {
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
debugManager.flags.EnableReusingGpuTimestamps.set(0);
|
||||||
|
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||||
|
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||||
|
auto deviceTime = new MockDeviceTimeWin();
|
||||||
|
osTime->deviceTime.reset(deviceTime);
|
||||||
|
osTime->setDeviceTimerResolution(*hwInfo);
|
||||||
|
EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout());
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue