diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 7165ad3ce8..1985211583 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1048,7 +1048,7 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) { NEO::TimeStampData queueTimeStamp; - bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp); + bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp, true); if (!retVal) return ZE_RESULT_ERROR_DEVICE_LOST; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 39f7cc9320..16866d9144 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -489,7 +489,7 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) { const auto recalculate = (currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec; if (referenceTs.cpuTimeinNS == 0 || recalculate) { - device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs); + device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs, true); } } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 5f8b266660..f3e73d761f 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -163,12 +163,14 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName, // if paramValue is NULL, it is ignored switch (paramName) { case CL_PROFILING_COMMAND_QUEUED: + calcProfilingData(); timestamp = getProfilingInfoData(queueTimeStamp); src = ×tamp; srcSize = sizeof(cl_ulong); break; case CL_PROFILING_COMMAND_SUBMIT: + calcProfilingData(); timestamp = getProfilingInfoData(submitTimeStamp); src = ×tamp; srcSize = sizeof(cl_ulong); @@ -365,15 +367,55 @@ bool Event::calcProfilingData() { return dataCalculated; } +void Event::updateTimestamp(ProfilingInfo ×tamp, uint64_t newGpuTimestamp) const { + auto &device = this->cmdQueue->getDevice(); + auto &gfxCoreHelper = device.getGfxCoreHelper(); + auto resolution = device.getDeviceInfo().profilingTimerResolution; + timestamp.gpuTimeStamp = newGpuTimestamp; + timestamp.gpuTimeInNs = gfxCoreHelper.getGpuTimeStampInNS(timestamp.gpuTimeStamp, resolution); + timestamp.cpuTimeInNs = timestamp.gpuTimeInNs; +} + +/** + * @brief Timestamp returned from GPU is initially 32 bits. This method performs XOR with + * other timestamp that tracks overflows, so passed timestamp will have correct overflow bits + * + * @param[out] timestamp Overflow bits will be added to this timestamp + * @param[in] timestampWithOverflow Timestamp that tracks overflows in remaining 32 most significant bits + * + */ +void Event::addOverflowToTimestamp(uint64_t ×tamp, uint64_t timestampWithOverflow) const { + auto &device = this->cmdQueue->getDevice(); + auto &gfxCoreHelper = device.getGfxCoreHelper(); + timestamp |= timestampWithOverflow & (maxNBitValue(64) - maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits())); +} + void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) { auto &device = this->cmdQueue->getDevice(); auto &gfxCoreHelper = device.getGfxCoreHelper(); auto resolution = device.getDeviceInfo().profilingTimerResolution; startTimeStamp.gpuTimeStamp = globalStartTS; - while (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) { - startTimeStamp.gpuTimeStamp += static_cast(1ULL << gfxCoreHelper.getGlobalTimeStampBits()); + addOverflowToTimestamp(startTimeStamp.gpuTimeStamp, submitTimeStamp.gpuTimeStamp); + if (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) { + auto diff = submitTimeStamp.gpuTimeStamp - startTimeStamp.gpuTimeStamp; + auto diffInNS = gfxCoreHelper.getGpuTimeStampInNS(diff, resolution); + auto osTime = device.getOSTime(); + if (diffInNS < osTime->getTimestampRefreshTimeout()) { + auto alignedSubmitTimestamp = startTimeStamp.gpuTimeStamp - 1; + auto alignedQueueTimestamp = startTimeStamp.gpuTimeStamp - 2; + if (startTimeStamp.gpuTimeStamp <= 2) { + alignedSubmitTimestamp = 0; + alignedQueueTimestamp = 0; + } + updateTimestamp(submitTimeStamp, alignedSubmitTimestamp); + updateTimestamp(queueTimeStamp, alignedQueueTimestamp); + osTime->setRefreshTimestampsFlag(); + } else { + startTimeStamp.gpuTimeStamp += static_cast(1ULL << gfxCoreHelper.getGlobalTimeStampBits()); + } } + UNRECOVERABLE_IF(startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp); auto gpuTicksDiff = startTimeStamp.gpuTimeStamp - submitTimeStamp.gpuTimeStamp; auto timeDiff = static_cast(gpuTicksDiff * resolution); startTimeStamp.cpuTimeInNs = submitTimeStamp.cpuTimeInNs + timeDiff; @@ -614,8 +656,8 @@ void Event::submitCommand(bool abortTasks) { this->setSubmitTimeStamp(); if (profilingCpuPath) { setStartTimeStamp(); - } else { } + if (perfCountersEnabled && perfCounterNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation()); } diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index 3fe75d4eb3..385125d320 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -360,6 +360,9 @@ class Event : public BaseObject<_cl_event>, public IDNode { bool isWaitForTimestampsEnabled() const; bool areTimestampsCompleted(); + void updateTimestamp(ProfilingInfo ×tamp, uint64_t newGpuTimestamp) const; + void addOverflowToTimestamp(uint64_t ×tamp, uint64_t timestampWithOverflow) const; + bool currentCmdQVirtualEvent = false; std::atomic cmdToSubmit{nullptr}; std::atomic submittedCmd{nullptr}; diff --git a/opencl/test/unit_test/device/device_timers_tests.cpp b/opencl/test/unit_test/device/device_timers_tests.cpp index 0a0835ba42..04d874c9fb 100644 --- a/opencl/test/unit_test/device/device_timers_tests.cpp +++ b/opencl/test/unit_test/device/device_timers_tests.cpp @@ -25,7 +25,9 @@ TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) { cl_ulong hostTimestamp[2] = {0, 0}; auto mDev = MockDevice::createWithNewExecutionEnvironment(nullptr); - mDev->setOSTime(new MockOSTime()); + auto osTime = new MockOSTime(); + osTime->setDeviceTimerResolution(mDev->getHardwareInfo()); + mDev->setOSTime(osTime); mDev->getDeviceAndHostTimer( &deviceTimestamp[0], diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 2f803ea023..6a9100c47b 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -889,11 +889,12 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; + auto osTime = pClDevice->getDevice().getOSTime(); HwTimeStamps timestamp{}; timestamp.globalStartTS = 3; event.queueTimeStamp.gpuTimeStamp = 2; - event.submitTimeStamp.gpuTimeStamp = 4; + event.submitTimeStamp.gpuTimeStamp = osTime->getTimestampRefreshTimeout() + 4; event.submitTimeStamp.gpuTimeInNs = static_cast(4 * resolution); TagNode timestampNode{}; timestampNode.tagForCpuAccess = ×tamp; @@ -909,6 +910,76 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle event.timeStampNode = nullptr; } +TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitWhenCalculateStartTimestampThenAdjustTimestmaps) { + DebugManagerStateRestore dbgRestore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + + MockContext context{}; + auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + + const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; + MockCommandQueue cmdQ(&context, mockDevice.get(), props, false); + MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); + auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution; + + HwTimeStamps timestamp{}; + timestamp.globalStartTS = 3; + event.queueTimeStamp.gpuTimeStamp = 2; + event.submitTimeStamp.gpuTimeStamp = 4; + event.submitTimeStamp.gpuTimeInNs = static_cast(4 * resolution); + TagNode timestampNode{}; + timestampNode.tagForCpuAccess = ×tamp; + event.timeStampNode = ×tampNode; + + uint64_t start = 0u; + uint64_t submit = 0u; + uint64_t queue = 0u; + event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); + event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr); + event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr); + + EXPECT_EQ(start, static_cast(timestamp.globalStartTS * resolution)); + EXPECT_EQ(submit, static_cast((timestamp.globalStartTS - 1) * resolution)); + EXPECT_EQ(queue, static_cast((timestamp.globalStartTS - 2) * resolution)); + + event.timeStampNode = nullptr; +} + +TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitAndStartTSBelowOneWhenCalculateStartTimestampThenAdjustTimestmaps) { + DebugManagerStateRestore dbgRestore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + + MockContext context{}; + auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + + const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; + MockCommandQueue cmdQ(&context, mockDevice.get(), props, false); + MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); + auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution; + + HwTimeStamps timestamp{}; + timestamp.globalStartTS = 2; + event.queueTimeStamp.gpuTimeStamp = 2; + event.submitTimeStamp.gpuTimeStamp = 4; + event.submitTimeStamp.gpuTimeInNs = static_cast(4 * resolution); + TagNode timestampNode{}; + timestampNode.tagForCpuAccess = ×tamp; + event.timeStampNode = ×tampNode; + + uint64_t start = 0u; + uint64_t submit = 0u; + uint64_t queue = 0u; + event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr); + event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr); + event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr); + + EXPECT_EQ(start, static_cast(timestamp.globalStartTS * resolution)); + EXPECT_EQ(submit, 0ul); + EXPECT_EQ(queue, 0ul); + + event.timeStampNode = nullptr; +} + TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) { MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 03d7ab139f..fe8622d6fd 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -61,6 +61,58 @@ struct ProfilingTests : public CommandEnqueueFixture, uint32_t crossThreadData[32]; }; +template +struct MockTagNode : public TagNode { + public: + using TagNode::tagForCpuAccess; + using TagNode::gfxAllocation; + MockTagNode() { + gfxAllocation = nullptr; + tagForCpuAccess = nullptr; + } + void returnTag() { + } +}; + +class MyOSDeviceTime : public DeviceTime { + double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { + EXPECT_FALSE(true); + return 1.0; + } + uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { + EXPECT_FALSE(true); + return 0; + } + bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override { + EXPECT_FALSE(true); + return false; + } +}; + +class MyOSTime : public OSTime { + public: + static int instanceNum; + MyOSTime() { + instanceNum++; + this->deviceTime = std::make_unique(); + } + + bool getCpuTime(uint64_t *timeStamp) override { + EXPECT_FALSE(true); + return false; + }; + double getHostTimerResolution() const override { + EXPECT_FALSE(true); + return 0; + } + uint64_t getCpuRawTimestamp() override { + EXPECT_FALSE(true); + return 0; + } +}; + +int MyOSTime::instanceNum = 0; + HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) { typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; @@ -442,7 +494,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath) cl_event event; pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); - EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_FALSE); + EXPECT_FALSE(eventObj->isCPUProfilingPath()); pCmdQ->finish(); uint64_t queued, submit; @@ -455,6 +507,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath) EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); + eventObj->release(); } @@ -474,7 +527,17 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) { uint64_t queued = 0u, submit = 0u; cl_int retVal; - + HwTimeStamps timestamp; + timestamp.globalStartTS = 10; + timestamp.contextStartTS = 10; + timestamp.globalEndTS = 80; + timestamp.contextEndTS = 80; + MockTagNode timestampNode; + timestampNode.tagForCpuAccess = ×tamp; + static_cast *>(eventObj)->timeStampNode = ×tampNode; + if (eventObj->getTimestampPacketNodes()) { + eventObj->getTimestampPacketNodes()->releaseNodes(); + } retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0); EXPECT_EQ(CL_SUCCESS, retVal); retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0); @@ -483,60 +546,11 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) { EXPECT_LT(0u, queued); EXPECT_LT(queued, submit); + static_cast *>(eventObj)->timeStampNode = nullptr; eventObj->release(); userEventObj->release(); } -template -struct MockTagNode : public TagNode { - public: - using TagNode::tagForCpuAccess; - using TagNode::gfxAllocation; - MockTagNode() { - gfxAllocation = nullptr; - tagForCpuAccess = nullptr; - } -}; - -class MyOSDeviceTime : public DeviceTime { - double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { - EXPECT_FALSE(true); - return 1.0; - } - uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override { - EXPECT_FALSE(true); - return 0; - } - bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override { - EXPECT_FALSE(true); - return false; - } -}; - -class MyOSTime : public OSTime { - public: - static int instanceNum; - MyOSTime() { - instanceNum++; - this->deviceTime = std::make_unique(); - } - - bool getCpuTime(uint64_t *timeStamp) override { - EXPECT_FALSE(true); - return false; - }; - double getHostTimerResolution() const override { - EXPECT_FALSE(true); - return 0; - } - uint64_t getCpuRawTimestamp() override { - EXPECT_FALSE(true); - return 0; - } -}; - -int MyOSTime::instanceNum = 0; - using EventProfilingTest = ProfilingTests; HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) { diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index ce85b55da4..8f210a2786 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -346,6 +346,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") +DECLARE_DEBUG_VARIABLE(bool, EnableReusingGpuTimestamps, false, "When enabled, GPU timestamp will be reused for next device time requests") DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled") DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index aadfdf542f..1a980c7956 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -652,7 +652,7 @@ EngineControl &Device::getEngine(uint32_t index) { bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { TimeStampData timeStamp; - auto retVal = getOSTime()->getGpuCpuTime(&timeStamp); + auto retVal = getOSTime()->getGpuCpuTime(&timeStamp, true); if (retVal) { *hostTimestamp = timeStamp.cpuTimeinNS; if (debugManager.flags.EnableDeviceBasedTimestamps.get()) { diff --git a/shared/source/execution_environment/root_device_environment.cpp b/shared/source/execution_environment/root_device_environment.cpp index a43689f729..b29408e4c1 100644 --- a/shared/source/execution_environment/root_device_environment.cpp +++ b/shared/source/execution_environment/root_device_environment.cpp @@ -124,6 +124,7 @@ void RootDeviceEnvironment::initGmm() { void RootDeviceEnvironment::initOsTime() { if (!osTime) { osTime = OSTime::create(osInterface.get()); + osTime->setDeviceTimerResolution(*hwInfo); } } diff --git a/shared/source/os_interface/os_time.cpp b/shared/source/os_interface/os_time.cpp index fa06f5a6db..427d365f1a 100644 --- a/shared/source/os_interface/os_time.cpp +++ b/shared/source/os_interface/os_time.cpp @@ -7,6 +7,8 @@ #include "shared/source/os_interface/os_time.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_info.h" #include @@ -17,6 +19,13 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) { return hwInfo.capabilityTable.defaultProfilingTimerResolution; }; +DeviceTime::DeviceTime() { + reusingTimestampsEnabled = debugManager.flags.EnableReusingGpuTimestamps.get(); + if (reusingTimestampsEnabled) { + timestampRefreshTimeoutNS = NSEC_PER_MSEC * 100; // 100ms + } +} + bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) { pGpuCpuTime->cpuTimeinNS = 0; pGpuCpuTime->gpuTimeStamp = 0; @@ -31,8 +40,63 @@ uint64_t DeviceTime::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) cons return static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo)); } -bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) { - if (!getGpuCpuTimeImpl(pGpuCpuTime, osTime)) { +void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) { + deviceTimerResolution = getDynamicDeviceTimerResolution(hwInfo); + if (debugManager.flags.OverrideProfilingTimerResolution.get() != -1) { + deviceTimerResolution = static_cast(debugManager.flags.OverrideProfilingTimerResolution.get()); + } +} + +/** + * @brief If this method is called within interval, GPU timestamp + * will be calculated based on CPU timestamp and previous GPU ticks + * to reduce amount of internal KMD calls. Interval is selected + * adaptively, based on misalignment between calculated ticks and actual ticks. + * + * @return returns false if internal call to KMD failed. True otherwise. + */ +bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall) { + uint64_t cpuTimeinNS; + osTime->getCpuTime(&cpuTimeinNS); + + auto cpuTimeDiffInNS = cpuTimeinNS - fetchedTimestamps.cpuTimeinNS; + if (forceKmdCall || cpuTimeDiffInNS >= timestampRefreshTimeoutNS) { + refreshTimestamps = true; + } + + if (!reusingTimestampsEnabled || refreshTimestamps) { + if (!getGpuCpuTimeImpl(timeStamp, osTime)) { + return false; + } + if (!reusingTimestampsEnabled) { + return true; + } + if (initialGpuTimeStamp) { + UNRECOVERABLE_IF(deviceTimerResolution == 0); + auto calculatedTimestamp = fetchedTimestamps.gpuTimeStamp + static_cast(cpuTimeDiffInNS / deviceTimerResolution); + auto diff = abs(static_cast(timeStamp->gpuTimeStamp - calculatedTimestamp)); + auto elapsedTicks = timeStamp->gpuTimeStamp - fetchedTimestamps.gpuTimeStamp; + int64_t adaptValue = static_cast(diff * deviceTimerResolution); + adaptValue = std::min(adaptValue, static_cast(timestampRefreshMinTimeoutNS)); + if (diff * 1.0f / elapsedTicks > 0.05) { + adaptValue = adaptValue * (-1); + } + timestampRefreshTimeoutNS += adaptValue; + timestampRefreshTimeoutNS = std::max(timestampRefreshMinTimeoutNS, std::min(timestampRefreshMaxTimeoutNS, timestampRefreshTimeoutNS)); + } + fetchedTimestamps = *timeStamp; + refreshTimestamps = false; + } else { + timeStamp->cpuTimeinNS = cpuTimeinNS; + UNRECOVERABLE_IF(deviceTimerResolution == 0); + timeStamp->gpuTimeStamp = fetchedTimestamps.gpuTimeStamp + static_cast(cpuTimeDiffInNS / deviceTimerResolution); + } + + return true; +} + +bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall) { + if (!getGpuCpuTimestamps(pGpuCpuTime, osTime, forceKmdCall)) { return false; } diff --git a/shared/source/os_interface/os_time.h b/shared/source/os_interface/os_time.h index 2e2e4b2211..6932232d22 100644 --- a/shared/source/os_interface/os_time.h +++ b/shared/source/os_interface/os_time.h @@ -10,7 +10,7 @@ #include #define NSEC_PER_SEC (1000000000ULL) - +#define NSEC_PER_MSEC (NSEC_PER_SEC / 1000) namespace NEO { class OSInterface; @@ -25,15 +25,32 @@ class OSTime; class DeviceTime { public: + DeviceTime(); virtual ~DeviceTime() = default; - bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime); + bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall); virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime); virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const; virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const; + bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall); + void setDeviceTimerResolution(HardwareInfo const &hwInfo); + void setRefreshTimestampsFlag() { + refreshTimestamps = true; + } + uint64_t getTimestampRefreshTimeout() const { + return timestampRefreshTimeoutNS; + }; std::optional initialGpuTimeStamp{}; bool waitingForGpuTimeStampOverflow = false; uint64_t gpuTimeStampOverflowCounter = 0; + + double deviceTimerResolution = 0; + const uint64_t timestampRefreshMinTimeoutNS = NSEC_PER_MSEC; // 1ms + const uint64_t timestampRefreshMaxTimeoutNS = NSEC_PER_SEC; // 1s + uint64_t timestampRefreshTimeoutNS = 0; + bool refreshTimestamps = true; + bool reusingTimestampsEnabled = false; + TimeStampData fetchedTimestamps{}; }; class OSTime { @@ -47,8 +64,13 @@ class OSTime { virtual uint64_t getCpuRawTimestamp(); static double getDeviceTimerResolution(HardwareInfo const &hwInfo); + + bool getGpuCpuTime(TimeStampData *gpuCpuTime, bool forceKmdCall) { + return deviceTime->getGpuCpuTime(gpuCpuTime, this, forceKmdCall); + } + bool getGpuCpuTime(TimeStampData *gpuCpuTime) { - return deviceTime->getGpuCpuTime(gpuCpuTime, this); + return deviceTime->getGpuCpuTime(gpuCpuTime, this, false); } double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const { @@ -61,6 +83,18 @@ class OSTime { uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; } + void setDeviceTimerResolution(HardwareInfo const &hwInfo) const { + deviceTime->setDeviceTimerResolution(hwInfo); + } + + void setRefreshTimestampsFlag() const { + deviceTime->setRefreshTimestampsFlag(); + } + + uint64_t getTimestampRefreshTimeout() const { + return deviceTime->getTimestampRefreshTimeout(); + } + protected: OSTime() = default; OSInterface *osInterface = nullptr; diff --git a/shared/test/common/mocks/linux/mock_os_time_linux.h b/shared/test/common/mocks/linux/mock_os_time_linux.h index f778f03228..a73f51e3b0 100644 --- a/shared/test/common/mocks/linux/mock_os_time_linux.h +++ b/shared/test/common/mocks/linux/mock_os_time_linux.h @@ -18,15 +18,28 @@ class MockDeviceTimeDrm : public DeviceTimeDrm { using DeviceTimeDrm::pDrm; bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override { + getGpuCpuTimeImplCalled++; if (callBaseGetGpuCpuTimeImpl) { return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime); } *pGpuCpuTime = gpuCpuTimeValue; return getGpuCpuTimeImplResult; } + + double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { + if (callGetDynamicDeviceTimerResolution) { + return DeviceTimeDrm::getDynamicDeviceTimerResolution(hwInfo); + } + return dynamicDeviceTimerResolutionValue; + } + bool callBaseGetGpuCpuTimeImpl = true; bool getGpuCpuTimeImplResult = true; TimeStampData gpuCpuTimeValue{}; + uint32_t getGpuCpuTimeImplCalled = 0; + + bool callGetDynamicDeviceTimerResolution = false; + double dynamicDeviceTimerResolutionValue = 1.0; }; class MockOSTimeLinux : public OSTimeLinux { diff --git a/shared/test/common/mocks/mock_device.cpp b/shared/test/common/mocks/mock_device.cpp index ebf6d0f77c..be85053e9a 100644 --- a/shared/test/common/mocks/mock_device.cpp +++ b/shared/test/common/mocks/mock_device.cpp @@ -50,11 +50,11 @@ const char *MockDevice::getProductAbbrev() const { MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex) : RootDevice(executionEnvironment, rootDeviceIndex) { UltDeviceFactory::initializeMemoryManager(*executionEnvironment); - + auto &hwInfo = getHardwareInfo(); if (!getOSTime()) { getRootDeviceEnvironmentRef().osTime = MockOSTime::create(); + getRootDeviceEnvironmentRef().osTime->setDeviceTimerResolution(hwInfo); } - auto &hwInfo = getHardwareInfo(); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo); executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm(); if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) { diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index b3f273e205..04f122e510 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -600,4 +600,5 @@ ForceSynchronizedDispatchMode = -1 DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1 ReadOnlyAllocationsTypeMask = 0 EnableLogLevel = 6 +EnableReusingGpuTimestamps = 0 # Please don't edit below this line diff --git a/shared/test/unit_test/os_interface/linux/os_time_test.cpp b/shared/test/unit_test/os_interface/linux/os_time_test.cpp index b493299d86..e563b5483f 100644 --- a/shared/test/unit_test/os_interface/linux/os_time_test.cpp +++ b/shared/test/unit_test/os_interface/linux/os_time_test.cpp @@ -9,6 +9,7 @@ #include "shared/source/os_interface/linux/ioctl_helper.h" #include "shared/source/os_interface/linux/os_time_linux.h" #include "shared/source/os_interface/os_interface.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/linux/mock_os_time_linux.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" @@ -18,7 +19,7 @@ #include -static int actualTime = 0; +static uint64_t actualTime = 0; int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() { return -1; @@ -48,6 +49,8 @@ struct DrmTimeTest : public ::testing::Test { osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); osTime->setResolutionFunc(resolutionFuncTrue); osTime->setGetTimeFunc(getTimeFuncTrue); + auto hwInfo = rootDeviceEnvironment.getMutableHardwareInfo(); + osTime->setDeviceTimerResolution(*hwInfo); deviceTime = osTime->getDeviceTime(); } @@ -202,7 +205,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultRes drm->getParamRetValue = 0; drm->ioctlRes = -1; - + deviceTime->callGetDynamicDeviceTimerResolution = true; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, defaultResolution); } @@ -239,7 +242,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectR // 19200000 is frequency yelding 52.083ns resolution drm->getParamRetValue = 19200000; drm->ioctlRes = 0; - + deviceTime->callGetDynamicDeviceTimerResolution = true; auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo); EXPECT_DOUBLE_EQ(result, 52.08333333333333); } @@ -282,3 +285,118 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsReturne EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp()); } } + +TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + + // Recreate mock to apply debug flag + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + osTime->setResolutionFunc(resolutionFuncTrue); + osTime->setGetTimeFunc(getTimeFuncTrue); + osTime->setDeviceTimerResolution(*hwInfo); + auto deviceTime = osTime->getDeviceTime(); + + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp; + auto cpuTimeBefore = actualTime; + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp; + auto cpuTimeAfter = actualTime; + + auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore; + auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo); + auto gpuTimestampDiff = static_cast(cpuTimeDiff / deviceTimerResolution); + EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff); +} + +TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + + // Recreate mock to apply debug flag + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + osTime->setResolutionFunc(resolutionFuncTrue); + osTime->setGetTimeFunc(getTimeFuncTrue); + osTime->setDeviceTimerResolution(*hwInfo); + auto deviceTime = osTime->getDeviceTime(); + deviceTime->callBaseGetGpuCpuTimeImpl = false; + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + + const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100; + EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout()); + + auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) { + actualTime = cpuTimeNS; + deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS; + deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp; + }; + setTimestamps(0, 0ull, 0ull); + + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + // Error is smaller than 5%, timeout can be increased + auto newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout(); + setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); + + auto diff = (gpuCpuTime.gpuTimeStamp - actualTime); + EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout()); + EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS); + + // Error is larger than 5%, timeout should be decreased + newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout() + 10; + setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u); + + EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS); +} + +TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) { + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + osTime->setRefreshTimestampsFlag(); + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); +} + +TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) { + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + osTime->getGpuCpuTime(&gpuCpuTime, true); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); +} + +TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(0); + // Recreate mock to apply debug flag + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + osTime->setResolutionFunc(resolutionFuncTrue); + osTime->setGetTimeFunc(getTimeFuncTrue); + EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout()); +} diff --git a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp index 8f1464df66..3a9315fe6d 100644 --- a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/os_interface/os_interface.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_ostime.h" #include "shared/test/common/mocks/windows/mock_os_time_win.h" @@ -29,10 +30,19 @@ BOOL WINAPI queryPerformanceCounterMock( class MockDeviceTimeWin : public MockDeviceTime { public: bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override { + getGpuCpuTimeImplCalled++; *pGpuCpuTime = gpuCpuTimeValue; - return true; + return getGpuCpuTimeImplResult; } + + double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override { + return deviceTimerResolution; + } + + bool getGpuCpuTimeImplResult = true; TimeStampData gpuCpuTimeValue{}; + uint32_t getGpuCpuTimeImplCalled = 0; + double deviceTimerResolution = 1; }; struct OSTimeWinTest : public ::testing::Test { @@ -196,3 +206,156 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp()); } } + +TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); + LARGE_INTEGER frequency = {}; + frequency.QuadPart = NSEC_PER_SEC; + osTime->setFrequency(frequency); + + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime->setDeviceTimerResolution(*hwInfo); + + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + TimeStampData gpuCpuTime; + deviceTime->gpuCpuTimeValue = {1u, 1u}; + valueToSet.QuadPart = 1; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp; + auto cpuTimeBefore = gpuCpuTime.cpuTimeinNS; + valueToSet.QuadPart = 5; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp; + auto cpuTimeAfter = gpuCpuTime.cpuTimeinNS; + + auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore; + + auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo); + auto gpuTimestampDiff = static_cast(cpuTimeDiff / deviceTimerResolution); + EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff); +} + +TEST_F(OSTimeWinTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); + LARGE_INTEGER frequency = {}; + frequency.QuadPart = NSEC_PER_SEC; + osTime->setFrequency(frequency); + + // Recreate mock to apply debug flag + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + osTime->setDeviceTimerResolution(*hwInfo); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + + const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100; + EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout()); + + auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) { + valueToSet.QuadPart = cpuTimeNS; + deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS; + deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp; + }; + setTimestamps(0, 0ull, 0ull); + + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + // Error is smaller than 5%, timeout can be increased + auto newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout(); + setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); + + auto diff = (gpuCpuTime.gpuTimeStamp - valueToSet.QuadPart); + EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout()); + EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS); + + // Error is larger than 5%, timeout should be decreased + newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout() + 10; + setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u); + + EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS); +} + +TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) { + TimeStampData gpuCpuTime; + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + deviceTime->getGpuCpuTimeImplResult = false; + EXPECT_FALSE(osTime->getGpuCpuTime(&gpuCpuTime)); +} + +TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + TimeStampData gpuCpuTime; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + osTime->setDeviceTimerResolution(*hwInfo); + + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + deviceTime->gpuCpuTimeValue = {1u, 1u}; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + osTime->setRefreshTimestampsFlag(); + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); +} + +TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(true); + osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); + LARGE_INTEGER frequency = {}; + frequency.QuadPart = NSEC_PER_SEC; + osTime->setFrequency(frequency); + + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime->setDeviceTimerResolution(*hwInfo); + + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u); + TimeStampData gpuCpuTime; + deviceTime->gpuCpuTimeValue = {1u, 1u}; + valueToSet.QuadPart = 1; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + valueToSet.QuadPart = 5; + osTime->getGpuCpuTime(&gpuCpuTime, true); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); +} + +TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) { + DebugManagerStateRestore restore; + debugManager.flags.EnableReusingGpuTimestamps.set(0); + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto deviceTime = new MockDeviceTimeWin(); + osTime->deviceTime.reset(deviceTime); + osTime->setDeviceTimerResolution(*hwInfo); + EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout()); +}