From 5e92d530deef9972218a9893de61d616bd328c16 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Tue, 28 May 2024 11:12:18 +0000 Subject: [PATCH] performance: Reuse GPU timestamps by default on Windows Related-To: NEO-10615 Signed-off-by: Szymon Morek --- opencl/source/event/event.cpp | 38 ++++++++++--------- opencl/test/unit_test/event/event_tests.cpp | 20 ++-------- .../debug_settings/debug_variables_base.inl | 2 +- .../os_interface/linux/device_time_drm.cpp | 11 +++++- .../os_interface/linux/device_time_drm.h | 1 + shared/source/os_interface/os_time.cpp | 17 +++++---- shared/source/os_interface/os_time.h | 5 +-- shared/test/common/test_files/igdrcl.config | 2 +- .../execution_environment_tests.cpp | 9 +++++ .../os_interface/linux/os_time_test.cpp | 10 ++++- .../windows/os_time_win_tests.cpp | 34 ++++++++++------- 11 files changed, 87 insertions(+), 62 deletions(-) diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 735bb3b0a1..7509f9c2d9 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -398,26 +398,30 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con auto &gfxCoreHelper = device.getGfxCoreHelper(); auto resolution = device.getDeviceInfo().profilingTimerResolution; - startTimeStamp.gpuTimeStamp = globalStartTS; - addOverflowToTimestamp(startTimeStamp.gpuTimeStamp, submitTimeStamp.gpuTimeStamp); - if (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) { - auto diff = submitTimeStamp.gpuTimeStamp - startTimeStamp.gpuTimeStamp; - auto diffInNS = gfxCoreHelper.getGpuTimeStampInNS(diff, resolution); - auto osTime = device.getOSTime(); - if (diffInNS < osTime->getTimestampRefreshTimeout()) { - auto alignedSubmitTimestamp = startTimeStamp.gpuTimeStamp - 1; - auto alignedQueueTimestamp = startTimeStamp.gpuTimeStamp - 2; - if (startTimeStamp.gpuTimeStamp <= 2) { - alignedSubmitTimestamp = 0; - alignedQueueTimestamp = 0; + // Calculate startTimestamp only if it was not already set on CPU + if (startTimeStamp.cpuTimeInNs == 0) { + startTimeStamp.gpuTimeStamp = globalStartTS; + addOverflowToTimestamp(startTimeStamp.gpuTimeStamp, submitTimeStamp.gpuTimeStamp); + if (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) { + auto diff = submitTimeStamp.gpuTimeStamp - startTimeStamp.gpuTimeStamp; + auto diffInNS = gfxCoreHelper.getGpuTimeStampInNS(diff, resolution); + auto osTime = device.getOSTime(); + if (diffInNS < osTime->getTimestampRefreshTimeout()) { + auto alignedSubmitTimestamp = startTimeStamp.gpuTimeStamp - 1; + auto alignedQueueTimestamp = startTimeStamp.gpuTimeStamp - 2; + if (startTimeStamp.gpuTimeStamp <= 2) { + alignedSubmitTimestamp = 0; + alignedQueueTimestamp = 0; + } + updateTimestamp(submitTimeStamp, alignedSubmitTimestamp); + updateTimestamp(queueTimeStamp, alignedQueueTimestamp); + osTime->setRefreshTimestampsFlag(); + } else { + startTimeStamp.gpuTimeStamp += static_cast(1ULL << gfxCoreHelper.getGlobalTimeStampBits()); } - updateTimestamp(submitTimeStamp, alignedSubmitTimestamp); - updateTimestamp(queueTimeStamp, alignedQueueTimestamp); - osTime->setRefreshTimestampsFlag(); - } else { - startTimeStamp.gpuTimeStamp += static_cast(1ULL << gfxCoreHelper.getGlobalTimeStampBits()); } } + UNRECOVERABLE_IF(startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp); auto gpuTicksDiff = startTimeStamp.gpuTimeStamp - submitTimeStamp.gpuTimeStamp; auto timeDiff = static_cast(gpuTicksDiff * resolution); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index ba315a820f..cc3c5a6b50 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -911,16 +911,10 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle } TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitWhenCalculateStartTimestampThenAdjustTimestmaps) { - DebugManagerStateRestore dbgRestore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); - - MockContext context{}; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; - MockCommandQueue cmdQ(&context, mockDevice.get(), props, false); + MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); - auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution; + auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; HwTimeStamps timestamp{}; timestamp.globalStartTS = 3; @@ -946,16 +940,10 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle } TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitAndStartTSBelowOneWhenCalculateStartTimestampThenAdjustTimestmaps) { - DebugManagerStateRestore dbgRestore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); - - MockContext context{}; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; - MockCommandQueue cmdQ(&context, mockDevice.get(), props, false); + MockCommandQueue cmdQ(mockContext, pClDevice, props, false); MockEvent event(&cmdQ, CL_COMPLETE, 0, 0); - auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution; + auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution; HwTimeStamps timestamp{}; timestamp.globalStartTS = 2; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index cc898b6aa2..08e45ec112 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -350,7 +350,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") -DECLARE_DEBUG_VARIABLE(bool, EnableReusingGpuTimestamps, false, "When enabled, GPU timestamp will be reused for next device time requests") +DECLARE_DEBUG_VARIABLE(int32_t, EnableReusingGpuTimestamps, -1, "Reuse GPU timestamp for next device time requests. -1: os-specific, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object") DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled") DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100") diff --git a/shared/source/os_interface/linux/device_time_drm.cpp b/shared/source/os_interface/linux/device_time_drm.cpp index aa644644b3..2a19d0c945 100644 --- a/shared/source/os_interface/linux/device_time_drm.cpp +++ b/shared/source/os_interface/linux/device_time_drm.cpp @@ -7,6 +7,7 @@ #include "shared/source/os_interface/linux/device_time_drm.h" +#include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/drm_wrappers.h" @@ -50,4 +51,12 @@ uint64_t DeviceTimeDrm::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) c return static_cast(nanosecondsPerSecond / OSTime::getDeviceTimerResolution(hwInfo)); } -} // namespace NEO \ No newline at end of file +bool DeviceTimeDrm::isTimestampsRefreshEnabled() const { + bool timestampsRefreshEnabled = false; + if (debugManager.flags.EnableReusingGpuTimestamps.get() != -1) { + timestampsRefreshEnabled = debugManager.flags.EnableReusingGpuTimestamps.get(); + } + return timestampsRefreshEnabled; +} + +} // namespace NEO diff --git a/shared/source/os_interface/linux/device_time_drm.h b/shared/source/os_interface/linux/device_time_drm.h index 82503f89c3..57c4e304a2 100644 --- a/shared/source/os_interface/linux/device_time_drm.h +++ b/shared/source/os_interface/linux/device_time_drm.h @@ -17,6 +17,7 @@ class DeviceTimeDrm : public DeviceTime { bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override; double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override; uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override; + bool isTimestampsRefreshEnabled() const override; protected: Drm *pDrm = nullptr; diff --git a/shared/source/os_interface/os_time.cpp b/shared/source/os_interface/os_time.cpp index 427d365f1a..472e698d10 100644 --- a/shared/source/os_interface/os_time.cpp +++ b/shared/source/os_interface/os_time.cpp @@ -19,13 +19,6 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) { return hwInfo.capabilityTable.defaultProfilingTimerResolution; }; -DeviceTime::DeviceTime() { - reusingTimestampsEnabled = debugManager.flags.EnableReusingGpuTimestamps.get(); - if (reusingTimestampsEnabled) { - timestampRefreshTimeoutNS = NSEC_PER_MSEC * 100; // 100ms - } -} - bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) { pGpuCpuTime->cpuTimeinNS = 0; pGpuCpuTime->gpuTimeStamp = 0; @@ -47,6 +40,14 @@ void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) { } } +bool DeviceTime::isTimestampsRefreshEnabled() const { + bool timestampsRefreshEnabled = true; + if (debugManager.flags.EnableReusingGpuTimestamps.get() != -1) { + timestampsRefreshEnabled = debugManager.flags.EnableReusingGpuTimestamps.get(); + } + return timestampsRefreshEnabled; +} + /** * @brief If this method is called within interval, GPU timestamp * will be calculated based on CPU timestamp and previous GPU ticks @@ -63,7 +64,7 @@ bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, b if (forceKmdCall || cpuTimeDiffInNS >= timestampRefreshTimeoutNS) { refreshTimestamps = true; } - + bool reusingTimestampsEnabled = isTimestampsRefreshEnabled(); if (!reusingTimestampsEnabled || refreshTimestamps) { if (!getGpuCpuTimeImpl(timeStamp, osTime)) { return false; diff --git a/shared/source/os_interface/os_time.h b/shared/source/os_interface/os_time.h index 6932232d22..762ce5f40c 100644 --- a/shared/source/os_interface/os_time.h +++ b/shared/source/os_interface/os_time.h @@ -25,12 +25,12 @@ class OSTime; class DeviceTime { public: - DeviceTime(); virtual ~DeviceTime() = default; bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall); virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime); virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const; virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const; + virtual bool isTimestampsRefreshEnabled() const; bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall); void setDeviceTimerResolution(HardwareInfo const &hwInfo); void setRefreshTimestampsFlag() { @@ -47,9 +47,8 @@ class DeviceTime { double deviceTimerResolution = 0; const uint64_t timestampRefreshMinTimeoutNS = NSEC_PER_MSEC; // 1ms const uint64_t timestampRefreshMaxTimeoutNS = NSEC_PER_SEC; // 1s - uint64_t timestampRefreshTimeoutNS = 0; + uint64_t timestampRefreshTimeoutNS = NSEC_PER_MSEC * 100; // 100ms bool refreshTimestamps = true; - bool reusingTimestampsEnabled = false; TimeStampData fetchedTimestamps{}; }; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index b8b6639c7f..e67d8f70e2 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -603,7 +603,7 @@ ForceSynchronizedDispatchMode = -1 DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1 ReadOnlyAllocationsTypeMask = 0 EnableLogLevel = 6 -EnableReusingGpuTimestamps = 0 +EnableReusingGpuTimestamps = -1 ForceCopyOperationOffloadForComputeCmdList = -1 SecondaryContextEngineTypeMask = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp index 6e1fef3826..e86c255747 100644 --- a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp +++ b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp @@ -86,6 +86,7 @@ TEST(RootDeviceEnvironment, givenExecutionEnvironmentWhenInitializeAubCenterIsCa } TEST(RootDeviceEnvironment, whenCreatingRootDeviceEnvironmentThenCreateOsAgnosticOsTime) { + DebugManagerStateRestore dbgRestore; MockExecutionEnvironment executionEnvironment; executionEnvironment.rootDeviceEnvironments[0]->setHwInfoAndInitHelpers(defaultHwInfo.get()); auto profilingTimerResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; @@ -109,6 +110,14 @@ TEST(RootDeviceEnvironment, whenCreatingRootDeviceEnvironmentThenCreateOsAgnosti EXPECT_EQ(profilingTimerResolution, rootDeviceEnvironment->osTime->getDynamicDeviceTimerResolution(*defaultHwInfo)); EXPECT_EQ(static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(*defaultHwInfo)), rootDeviceEnvironment->osTime->getDynamicDeviceTimerClock(*defaultHwInfo)); + + struct MockOSTime : public OSTime { + using OSTime::deviceTime; + }; + auto deviceTime = static_cast(rootDeviceEnvironment->osTime.get())->deviceTime.get(); + EXPECT_TRUE(deviceTime->isTimestampsRefreshEnabled()); + debugManager.flags.EnableReusingGpuTimestamps.set(0); + EXPECT_FALSE(deviceTime->isTimestampsRefreshEnabled()); } TEST(RootDeviceEnvironment, givenUseAubStreamFalseWhenGetAubManagerIsCalledThenReturnNull) { diff --git a/shared/test/unit_test/os_interface/linux/os_time_test.cpp b/shared/test/unit_test/os_interface/linux/os_time_test.cpp index 7f5d13077e..284f80ff07 100644 --- a/shared/test/unit_test/os_interface/linux/os_time_test.cpp +++ b/shared/test/unit_test/os_interface/linux/os_time_test.cpp @@ -390,7 +390,7 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); } -TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) { +TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetGpuCpuTimeThenAlwaysCallKmd) { DebugManagerStateRestore restore; debugManager.flags.EnableReusingGpuTimestamps.set(0); // Recreate mock to apply debug flag @@ -398,5 +398,11 @@ TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeout osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); osTime->setResolutionFunc(resolutionFuncTrue); osTime->setGetTimeFunc(getTimeFuncTrue); - EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout()); + auto deviceTime = osTime->getDeviceTime(); + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); } diff --git a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp index 3a9315fe6d..549e7c9498 100644 --- a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp @@ -53,6 +53,8 @@ struct OSTimeWinTest : public ::testing::Test { rootDeviceEnvironment.osInterface = std::make_unique(); rootDeviceEnvironment.osInterface->setDriverModel(std::unique_ptr(wddm)); osTime = std::unique_ptr(new MockOSTimeWin(*rootDeviceEnvironment.osInterface)); + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime->setDeviceTimerResolution(*hwInfo); } void TearDown() override { @@ -62,8 +64,11 @@ struct OSTimeWinTest : public ::testing::Test { }; TEST_F(OSTimeWinTest, given36BitGpuTimeStampWhenGpuTimeStampOverflowThenGpuTimeDoesNotDecrease) { + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); auto deviceTime = new MockDeviceTimeWin(); osTime->deviceTime.reset(deviceTime); + osTime->setDeviceTimerResolution(*hwInfo); TimeStampData gpuCpuTime = {0ull, 0ull}; @@ -95,8 +100,11 @@ TEST_F(OSTimeWinTest, given36BitGpuTimeStampWhenGpuTimeStampOverflowThenGpuTimeD } TEST_F(OSTimeWinTest, given64BitGpuTimeStampWhenGpuTimeStampOverflowThenOverflowsAreNotDetected) { + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); auto deviceTime = new MockDeviceTimeWin(); osTime->deviceTime.reset(deviceTime); + osTime->setDeviceTimerResolution(*hwInfo); TimeStampData gpuCpuTime = {0ull, 0ull}; @@ -183,9 +191,12 @@ TEST(OSTimeWinTests, givenOSInterfaceWhenGetGpuCpuTimeThenReturnsSuccess) { auto wddm = new WddmMock(rootDeviceEnvironment); TimeStampData gpuCpuTime01 = {}; TimeStampData gpuCpuTime02 = {}; - std::unique_ptr osInterface(new OSInterface()); - osInterface->setDriverModel(std::unique_ptr(wddm)); - auto osTime = OSTime::create(osInterface.get()); + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::unique_ptr(wddm)); + wddm->init(); + auto osTime = OSTime::create(rootDeviceEnvironment.osInterface.get()); + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + osTime->setDeviceTimerResolution(*hwInfo); auto success = osTime->getGpuCpuTime(&gpuCpuTime01); EXPECT_TRUE(success); EXPECT_NE(0u, gpuCpuTime01.cpuTimeinNS); @@ -208,8 +219,6 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur } TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) { - DebugManagerStateRestore restore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); LARGE_INTEGER frequency = {}; frequency.QuadPart = NSEC_PER_SEC; @@ -245,8 +254,6 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFrom } TEST_F(OSTimeWinTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) { - DebugManagerStateRestore restore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); LARGE_INTEGER frequency = {}; frequency.QuadPart = NSEC_PER_SEC; @@ -304,8 +311,6 @@ TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) { } TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) { - DebugManagerStateRestore restore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); TimeStampData gpuCpuTime; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); @@ -324,8 +329,6 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) } TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) { - DebugManagerStateRestore restore; - debugManager.flags.EnableReusingGpuTimestamps.set(true); osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock); LARGE_INTEGER frequency = {}; frequency.QuadPart = NSEC_PER_SEC; @@ -349,7 +352,7 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToK EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); } -TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) { +TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetGpuCpuTimeThenAlwaysCallKmd) { DebugManagerStateRestore restore; debugManager.flags.EnableReusingGpuTimestamps.set(0); auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; @@ -357,5 +360,10 @@ TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeo auto deviceTime = new MockDeviceTimeWin(); osTime->deviceTime.reset(deviceTime); osTime->setDeviceTimerResolution(*hwInfo); - EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout()); + TimeStampData gpuCpuTime; + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u); + + osTime->getGpuCpuTime(&gpuCpuTime); + EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u); }