diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp
index 7165ad3ce8..1985211583 100644
--- a/level_zero/core/source/device/device_imp.cpp
+++ b/level_zero/core/source/device/device_imp.cpp
@@ -1048,7 +1048,7 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
 
 ze_result_t DeviceImp::getGlobalTimestamps(uint64_t *hostTimestamp, uint64_t *deviceTimestamp) {
     NEO::TimeStampData queueTimeStamp;
-    bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp);
+    bool retVal = this->neoDevice->getOSTime()->getGpuCpuTime(&queueTimeStamp, true);
     if (!retVal)
         return ZE_RESULT_ERROR_DEVICE_LOST;
 
diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp
index 39f7cc9320..16866d9144 100644
--- a/level_zero/core/source/event/event.cpp
+++ b/level_zero/core/source/event/event.cpp
@@ -489,7 +489,7 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
     const auto recalculate =
         (currentCpuTimeStamp - referenceTs.cpuTimeinNS) > timestampRefreshIntervalInNanoSec;
     if (referenceTs.cpuTimeinNS == 0 || recalculate) {
-        device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs);
+        device->getNEODevice()->getOSTime()->getGpuCpuTime(&referenceTs, true);
     }
 }
 
diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp
index 5f8b266660..f3e73d761f 100644
--- a/opencl/source/event/event.cpp
+++ b/opencl/source/event/event.cpp
@@ -163,12 +163,14 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
     // if paramValue is NULL, it is ignored
     switch (paramName) {
     case CL_PROFILING_COMMAND_QUEUED:
+        calcProfilingData();
         timestamp = getProfilingInfoData(queueTimeStamp);
         src = &timestamp;
         srcSize = sizeof(cl_ulong);
         break;
 
     case CL_PROFILING_COMMAND_SUBMIT:
+        calcProfilingData();
         timestamp = getProfilingInfoData(submitTimeStamp);
         src = &timestamp;
         srcSize = sizeof(cl_ulong);
@@ -365,15 +367,55 @@ bool Event::calcProfilingData() {
     return dataCalculated;
 }
 
+void Event::updateTimestamp(ProfilingInfo &timestamp, uint64_t newGpuTimestamp) const {
+    auto &device = this->cmdQueue->getDevice();
+    auto &gfxCoreHelper = device.getGfxCoreHelper();
+    auto resolution = device.getDeviceInfo().profilingTimerResolution;
+    timestamp.gpuTimeStamp = newGpuTimestamp;
+    timestamp.gpuTimeInNs = gfxCoreHelper.getGpuTimeStampInNS(timestamp.gpuTimeStamp, resolution);
+    timestamp.cpuTimeInNs = timestamp.gpuTimeInNs;
+}
+
+/**
+ * @brief Timestamp returned from GPU is initially 32 bits. This method performs XOR with
+ * other timestamp that tracks overflows, so passed timestamp will have correct overflow bits
+ *
+ * @param[out] timestamp Overflow bits will be added to this timestamp
+ * @param[in] timestampWithOverflow Timestamp that tracks overflows in remaining 32 most significant bits
+ *
+ */
+void Event::addOverflowToTimestamp(uint64_t &timestamp, uint64_t timestampWithOverflow) const {
+    auto &device = this->cmdQueue->getDevice();
+    auto &gfxCoreHelper = device.getGfxCoreHelper();
+    timestamp |= timestampWithOverflow & (maxNBitValue(64) - maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()));
+}
+
 void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS) {
     auto &device = this->cmdQueue->getDevice();
     auto &gfxCoreHelper = device.getGfxCoreHelper();
     auto resolution = device.getDeviceInfo().profilingTimerResolution;
 
     startTimeStamp.gpuTimeStamp = globalStartTS;
-    while (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) {
-        startTimeStamp.gpuTimeStamp += static_cast<uint64_t>(1ULL << gfxCoreHelper.getGlobalTimeStampBits());
+    addOverflowToTimestamp(startTimeStamp.gpuTimeStamp, submitTimeStamp.gpuTimeStamp);
+    if (startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp) {
+        auto diff = submitTimeStamp.gpuTimeStamp - startTimeStamp.gpuTimeStamp;
+        auto diffInNS = gfxCoreHelper.getGpuTimeStampInNS(diff, resolution);
+        auto osTime = device.getOSTime();
+        if (diffInNS < osTime->getTimestampRefreshTimeout()) {
+            auto alignedSubmitTimestamp = startTimeStamp.gpuTimeStamp - 1;
+            auto alignedQueueTimestamp = startTimeStamp.gpuTimeStamp - 2;
+            if (startTimeStamp.gpuTimeStamp <= 2) {
+                alignedSubmitTimestamp = 0;
+                alignedQueueTimestamp = 0;
+            }
+            updateTimestamp(submitTimeStamp, alignedSubmitTimestamp);
+            updateTimestamp(queueTimeStamp, alignedQueueTimestamp);
+            osTime->setRefreshTimestampsFlag();
+        } else {
+            startTimeStamp.gpuTimeStamp += static_cast<uint64_t>(1ULL << gfxCoreHelper.getGlobalTimeStampBits());
+        }
     }
+    UNRECOVERABLE_IF(startTimeStamp.gpuTimeStamp < submitTimeStamp.gpuTimeStamp);
     auto gpuTicksDiff = startTimeStamp.gpuTimeStamp - submitTimeStamp.gpuTimeStamp;
     auto timeDiff = static_cast<uint64_t>(gpuTicksDiff * resolution);
     startTimeStamp.cpuTimeInNs = submitTimeStamp.cpuTimeInNs + timeDiff;
@@ -614,8 +656,8 @@ void Event::submitCommand(bool abortTasks) {
             this->setSubmitTimeStamp();
             if (profilingCpuPath) {
                 setStartTimeStamp();
-            } else {
             }
+
             if (perfCountersEnabled && perfCounterNode) {
                 this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
             }
diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h
index 3fe75d4eb3..385125d320 100644
--- a/opencl/source/event/event.h
+++ b/opencl/source/event/event.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -360,6 +360,9 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
     bool isWaitForTimestampsEnabled() const;
     bool areTimestampsCompleted();
 
+    void updateTimestamp(ProfilingInfo &timestamp, uint64_t newGpuTimestamp) const;
+    void addOverflowToTimestamp(uint64_t &timestamp, uint64_t timestampWithOverflow) const;
+
     bool currentCmdQVirtualEvent = false;
     std::atomic<Command *> cmdToSubmit{nullptr};
     std::atomic<Command *> submittedCmd{nullptr};
diff --git a/opencl/test/unit_test/device/device_timers_tests.cpp b/opencl/test/unit_test/device/device_timers_tests.cpp
index 0a0835ba42..04d874c9fb 100644
--- a/opencl/test/unit_test/device/device_timers_tests.cpp
+++ b/opencl/test/unit_test/device/device_timers_tests.cpp
@@ -25,7 +25,9 @@ TEST(MockOSTime, WhenSleepingThenDeviceAndHostTimerAreIncreased) {
     cl_ulong hostTimestamp[2] = {0, 0};
 
     auto mDev = MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr);
-    mDev->setOSTime(new MockOSTime());
+    auto osTime = new MockOSTime();
+    osTime->setDeviceTimerResolution(mDev->getHardwareInfo());
+    mDev->setOSTime(osTime);
 
     mDev->getDeviceAndHostTimer(
         &deviceTimestamp[0],
diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp
index 2f803ea023..6a9100c47b 100644
--- a/opencl/test/unit_test/event/event_tests.cpp
+++ b/opencl/test/unit_test/event/event_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -889,11 +889,12 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
     MockCommandQueue cmdQ(mockContext, pClDevice, props, false);
     MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
     auto resolution = pClDevice->getDevice().getDeviceInfo().profilingTimerResolution;
+    auto osTime = pClDevice->getDevice().getOSTime();
 
     HwTimeStamps timestamp{};
     timestamp.globalStartTS = 3;
     event.queueTimeStamp.gpuTimeStamp = 2;
-    event.submitTimeStamp.gpuTimeStamp = 4;
+    event.submitTimeStamp.gpuTimeStamp = osTime->getTimestampRefreshTimeout() + 4;
     event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
     TagNode<HwTimeStamps> timestampNode{};
     timestampNode.tagForCpuAccess = &timestamp;
@@ -909,6 +910,76 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle
     event.timeStampNode = nullptr;
 }
 
+TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitWhenCalculateStartTimestampThenAdjustTimestmaps) {
+    DebugManagerStateRestore dbgRestore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+
+    MockContext context{};
+    auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
+
+    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
+    MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
+    MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
+    auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
+
+    HwTimeStamps timestamp{};
+    timestamp.globalStartTS = 3;
+    event.queueTimeStamp.gpuTimeStamp = 2;
+    event.submitTimeStamp.gpuTimeStamp = 4;
+    event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
+    TagNode<HwTimeStamps> timestampNode{};
+    timestampNode.tagForCpuAccess = &timestamp;
+    event.timeStampNode = &timestampNode;
+
+    uint64_t start = 0u;
+    uint64_t submit = 0u;
+    uint64_t queue = 0u;
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
+
+    EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
+    EXPECT_EQ(submit, static_cast<uint64_t>((timestamp.globalStartTS - 1) * resolution));
+    EXPECT_EQ(queue, static_cast<uint64_t>((timestamp.globalStartTS - 2) * resolution));
+
+    event.timeStampNode = nullptr;
+}
+
+TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmallerThanQueueTSWithinRecalculationLimitAndStartTSBelowOneWhenCalculateStartTimestampThenAdjustTimestmaps) {
+    DebugManagerStateRestore dbgRestore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+
+    MockContext context{};
+    auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
+
+    const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
+    MockCommandQueue cmdQ(&context, mockDevice.get(), props, false);
+    MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
+    auto resolution = mockDevice->getDevice().getDeviceInfo().profilingTimerResolution;
+
+    HwTimeStamps timestamp{};
+    timestamp.globalStartTS = 2;
+    event.queueTimeStamp.gpuTimeStamp = 2;
+    event.submitTimeStamp.gpuTimeStamp = 4;
+    event.submitTimeStamp.gpuTimeInNs = static_cast<uint64_t>(4 * resolution);
+    TagNode<HwTimeStamps> timestampNode{};
+    timestampNode.tagForCpuAccess = &timestamp;
+    event.timeStampNode = &timestampNode;
+
+    uint64_t start = 0u;
+    uint64_t submit = 0u;
+    uint64_t queue = 0u;
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submit, nullptr);
+    event.getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queue, nullptr);
+
+    EXPECT_EQ(start, static_cast<uint64_t>(timestamp.globalStartTS * resolution));
+    EXPECT_EQ(submit, 0ul);
+    EXPECT_EQ(queue, 0ul);
+
+    event.timeStampNode = nullptr;
+}
+
 TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) {
     MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false);
 
diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp
index 03d7ab139f..fe8622d6fd 100644
--- a/opencl/test/unit_test/profiling/profiling_tests.cpp
+++ b/opencl/test/unit_test/profiling/profiling_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,58 @@ struct ProfilingTests : public CommandEnqueueFixture,
     uint32_t crossThreadData[32];
 };
 
+template <typename TagType>
+struct MockTagNode : public TagNode<TagType> {
+  public:
+    using TagNode<TagType>::tagForCpuAccess;
+    using TagNode<TagType>::gfxAllocation;
+    MockTagNode() {
+        gfxAllocation = nullptr;
+        tagForCpuAccess = nullptr;
+    }
+    void returnTag() {
+    }
+};
+
+class MyOSDeviceTime : public DeviceTime {
+    double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
+        EXPECT_FALSE(true);
+        return 1.0;
+    }
+    uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
+        EXPECT_FALSE(true);
+        return 0;
+    }
+    bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override {
+        EXPECT_FALSE(true);
+        return false;
+    }
+};
+
+class MyOSTime : public OSTime {
+  public:
+    static int instanceNum;
+    MyOSTime() {
+        instanceNum++;
+        this->deviceTime = std::make_unique<MyOSDeviceTime>();
+    }
+
+    bool getCpuTime(uint64_t *timeStamp) override {
+        EXPECT_FALSE(true);
+        return false;
+    };
+    double getHostTimerResolution() const override {
+        EXPECT_FALSE(true);
+        return 0;
+    }
+    uint64_t getCpuRawTimestamp() override {
+        EXPECT_FALSE(true);
+        return 0;
+    }
+};
+
+int MyOSTime::instanceNum = 0;
+
 HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithKernelWhenGetCSFromCmdQueueThenEnoughSpaceInCS) {
     typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM;
     typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
@@ -442,7 +494,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath)
     cl_event event;
     pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event);
     auto eventObj = static_cast<Event *>(event);
-    EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_FALSE);
+    EXPECT_FALSE(eventObj->isCPUProfilingPath());
     pCmdQ->finish();
 
     uint64_t queued, submit;
@@ -455,6 +507,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath)
 
     EXPECT_LT(0u, queued);
     EXPECT_LT(queued, submit);
+
     eventObj->release();
 }
 
@@ -474,7 +527,17 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
 
     uint64_t queued = 0u, submit = 0u;
     cl_int retVal;
-
+    HwTimeStamps timestamp;
+    timestamp.globalStartTS = 10;
+    timestamp.contextStartTS = 10;
+    timestamp.globalEndTS = 80;
+    timestamp.contextEndTS = 80;
+    MockTagNode<HwTimeStamps> timestampNode;
+    timestampNode.tagForCpuAccess = &timestamp;
+    static_cast<MockEvent<Event> *>(eventObj)->timeStampNode = &timestampNode;
+    if (eventObj->getTimestampPacketNodes()) {
+        eventObj->getTimestampPacketNodes()->releaseNodes();
+    }
     retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_QUEUED, sizeof(uint64_t), &queued, 0);
     EXPECT_EQ(CL_SUCCESS, retVal);
     retVal = eventObj->getEventProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, sizeof(uint64_t), &submit, 0);
@@ -483,60 +546,11 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenBlockedEnqueueThenSetGpuPath) {
     EXPECT_LT(0u, queued);
     EXPECT_LT(queued, submit);
 
+    static_cast<MockEvent<Event> *>(eventObj)->timeStampNode = nullptr;
     eventObj->release();
     userEventObj->release();
 }
 
-template <typename TagType>
-struct MockTagNode : public TagNode<TagType> {
-  public:
-    using TagNode<TagType>::tagForCpuAccess;
-    using TagNode<TagType>::gfxAllocation;
-    MockTagNode() {
-        gfxAllocation = nullptr;
-        tagForCpuAccess = nullptr;
-    }
-};
-
-class MyOSDeviceTime : public DeviceTime {
-    double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
-        EXPECT_FALSE(true);
-        return 1.0;
-    }
-    uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
-        EXPECT_FALSE(true);
-        return 0;
-    }
-    bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *) override {
-        EXPECT_FALSE(true);
-        return false;
-    }
-};
-
-class MyOSTime : public OSTime {
-  public:
-    static int instanceNum;
-    MyOSTime() {
-        instanceNum++;
-        this->deviceTime = std::make_unique<MyOSDeviceTime>();
-    }
-
-    bool getCpuTime(uint64_t *timeStamp) override {
-        EXPECT_FALSE(true);
-        return false;
-    };
-    double getHostTimerResolution() const override {
-        EXPECT_FALSE(true);
-        return 0;
-    }
-    uint64_t getCpuRawTimestamp() override {
-        EXPECT_FALSE(true);
-        return 0;
-    }
-};
-
-int MyOSTime::instanceNum = 0;
-
 using EventProfilingTest = ProfilingTests;
 
 HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl
index ce85b55da4..8f210a2786 100644
--- a/shared/source/debug_settings/debug_variables_base.inl
+++ b/shared/source/debug_settings/debug_variables_base.inl
@@ -346,6 +346,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
 DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
 DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
 DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
+DECLARE_DEBUG_VARIABLE(bool, EnableReusingGpuTimestamps, false, "When enabled, GPU timestamp will be reused for next device time requests")
 DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cacheline flush instead of memory copy for map/unmap mem object")
 DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
 DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp
index aadfdf542f..1a980c7956 100644
--- a/shared/source/device/device.cpp
+++ b/shared/source/device/device.cpp
@@ -652,7 +652,7 @@ EngineControl &Device::getEngine(uint32_t index) {
 
 bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
     TimeStampData timeStamp;
-    auto retVal = getOSTime()->getGpuCpuTime(&timeStamp);
+    auto retVal = getOSTime()->getGpuCpuTime(&timeStamp, true);
     if (retVal) {
         *hostTimestamp = timeStamp.cpuTimeinNS;
         if (debugManager.flags.EnableDeviceBasedTimestamps.get()) {
diff --git a/shared/source/execution_environment/root_device_environment.cpp b/shared/source/execution_environment/root_device_environment.cpp
index a43689f729..b29408e4c1 100644
--- a/shared/source/execution_environment/root_device_environment.cpp
+++ b/shared/source/execution_environment/root_device_environment.cpp
@@ -124,6 +124,7 @@ void RootDeviceEnvironment::initGmm() {
 void RootDeviceEnvironment::initOsTime() {
     if (!osTime) {
         osTime = OSTime::create(osInterface.get());
+        osTime->setDeviceTimerResolution(*hwInfo);
     }
 }
 
diff --git a/shared/source/os_interface/os_time.cpp b/shared/source/os_interface/os_time.cpp
index fa06f5a6db..427d365f1a 100644
--- a/shared/source/os_interface/os_time.cpp
+++ b/shared/source/os_interface/os_time.cpp
@@ -7,6 +7,8 @@
 
 #include "shared/source/os_interface/os_time.h"
 
+#include "shared/source/debug_settings/debug_settings_manager.h"
+#include "shared/source/helpers/debug_helpers.h"
 #include "shared/source/helpers/hw_info.h"
 
 #include <mutex>
@@ -17,6 +19,13 @@ double OSTime::getDeviceTimerResolution(HardwareInfo const &hwInfo) {
     return hwInfo.capabilityTable.defaultProfilingTimerResolution;
 };
 
+DeviceTime::DeviceTime() {
+    reusingTimestampsEnabled = debugManager.flags.EnableReusingGpuTimestamps.get();
+    if (reusingTimestampsEnabled) {
+        timestampRefreshTimeoutNS = NSEC_PER_MSEC * 100; // 100ms
+    }
+}
+
 bool DeviceTime::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) {
     pGpuCpuTime->cpuTimeinNS = 0;
     pGpuCpuTime->gpuTimeStamp = 0;
@@ -31,8 +40,63 @@ uint64_t DeviceTime::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) cons
     return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
 }
 
-bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime) {
-    if (!getGpuCpuTimeImpl(pGpuCpuTime, osTime)) {
+void DeviceTime::setDeviceTimerResolution(HardwareInfo const &hwInfo) {
+    deviceTimerResolution = getDynamicDeviceTimerResolution(hwInfo);
+    if (debugManager.flags.OverrideProfilingTimerResolution.get() != -1) {
+        deviceTimerResolution = static_cast<double>(debugManager.flags.OverrideProfilingTimerResolution.get());
+    }
+}
+
+/**
+ * @brief If this method is called within interval, GPU timestamp
+ * will be calculated based on CPU timestamp and previous GPU ticks
+ * to reduce amount of internal KMD calls. Interval is selected
+ * adaptively, based on misalignment between calculated ticks and actual ticks.
+ *
+ * @return returns false if internal call to KMD failed. True otherwise.
+ */
+bool DeviceTime::getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall) {
+    uint64_t cpuTimeinNS;
+    osTime->getCpuTime(&cpuTimeinNS);
+
+    auto cpuTimeDiffInNS = cpuTimeinNS - fetchedTimestamps.cpuTimeinNS;
+    if (forceKmdCall || cpuTimeDiffInNS >= timestampRefreshTimeoutNS) {
+        refreshTimestamps = true;
+    }
+
+    if (!reusingTimestampsEnabled || refreshTimestamps) {
+        if (!getGpuCpuTimeImpl(timeStamp, osTime)) {
+            return false;
+        }
+        if (!reusingTimestampsEnabled) {
+            return true;
+        }
+        if (initialGpuTimeStamp) {
+            UNRECOVERABLE_IF(deviceTimerResolution == 0);
+            auto calculatedTimestamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
+            auto diff = abs(static_cast<int64_t>(timeStamp->gpuTimeStamp - calculatedTimestamp));
+            auto elapsedTicks = timeStamp->gpuTimeStamp - fetchedTimestamps.gpuTimeStamp;
+            int64_t adaptValue = static_cast<int64_t>(diff * deviceTimerResolution);
+            adaptValue = std::min(adaptValue, static_cast<int64_t>(timestampRefreshMinTimeoutNS));
+            if (diff * 1.0f / elapsedTicks > 0.05) {
+                adaptValue = adaptValue * (-1);
+            }
+            timestampRefreshTimeoutNS += adaptValue;
+            timestampRefreshTimeoutNS = std::max(timestampRefreshMinTimeoutNS, std::min(timestampRefreshMaxTimeoutNS, timestampRefreshTimeoutNS));
+        }
+        fetchedTimestamps = *timeStamp;
+        refreshTimestamps = false;
+    } else {
+        timeStamp->cpuTimeinNS = cpuTimeinNS;
+        UNRECOVERABLE_IF(deviceTimerResolution == 0);
+        timeStamp->gpuTimeStamp = fetchedTimestamps.gpuTimeStamp + static_cast<uint64_t>(cpuTimeDiffInNS / deviceTimerResolution);
+    }
+
+    return true;
+}
+
+bool DeviceTime::getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall) {
+    if (!getGpuCpuTimestamps(pGpuCpuTime, osTime, forceKmdCall)) {
         return false;
     }
 
diff --git a/shared/source/os_interface/os_time.h b/shared/source/os_interface/os_time.h
index 2e2e4b2211..6932232d22 100644
--- a/shared/source/os_interface/os_time.h
+++ b/shared/source/os_interface/os_time.h
@@ -10,7 +10,7 @@
 #include <optional>
 
 #define NSEC_PER_SEC (1000000000ULL)
-
+#define NSEC_PER_MSEC (NSEC_PER_SEC / 1000)
 namespace NEO {
 
 class OSInterface;
@@ -25,15 +25,32 @@ class OSTime;
 
 class DeviceTime {
   public:
+    DeviceTime();
     virtual ~DeviceTime() = default;
-    bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime);
+    bool getGpuCpuTime(TimeStampData *pGpuCpuTime, OSTime *osTime, bool forceKmdCall);
     virtual bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime);
     virtual double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const;
     virtual uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const;
+    bool getGpuCpuTimestamps(TimeStampData *timeStamp, OSTime *osTime, bool forceKmdCall);
+    void setDeviceTimerResolution(HardwareInfo const &hwInfo);
+    void setRefreshTimestampsFlag() {
+        refreshTimestamps = true;
+    }
+    uint64_t getTimestampRefreshTimeout() const {
+        return timestampRefreshTimeoutNS;
+    };
 
     std::optional<uint64_t> initialGpuTimeStamp{};
     bool waitingForGpuTimeStampOverflow = false;
     uint64_t gpuTimeStampOverflowCounter = 0;
+
+    double deviceTimerResolution = 0;
+    const uint64_t timestampRefreshMinTimeoutNS = NSEC_PER_MSEC; // 1ms
+    const uint64_t timestampRefreshMaxTimeoutNS = NSEC_PER_SEC;  // 1s
+    uint64_t timestampRefreshTimeoutNS = 0;
+    bool refreshTimestamps = true;
+    bool reusingTimestampsEnabled = false;
+    TimeStampData fetchedTimestamps{};
 };
 
 class OSTime {
@@ -47,8 +64,13 @@ class OSTime {
     virtual uint64_t getCpuRawTimestamp();
 
     static double getDeviceTimerResolution(HardwareInfo const &hwInfo);
+
+    bool getGpuCpuTime(TimeStampData *gpuCpuTime, bool forceKmdCall) {
+        return deviceTime->getGpuCpuTime(gpuCpuTime, this, forceKmdCall);
+    }
+
     bool getGpuCpuTime(TimeStampData *gpuCpuTime) {
-        return deviceTime->getGpuCpuTime(gpuCpuTime, this);
+        return deviceTime->getGpuCpuTime(gpuCpuTime, this, false);
     }
 
     double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const {
@@ -61,6 +83,18 @@ class OSTime {
 
     uint64_t getMaxGpuTimeStamp() const { return maxGpuTimeStamp; }
 
+    void setDeviceTimerResolution(HardwareInfo const &hwInfo) const {
+        deviceTime->setDeviceTimerResolution(hwInfo);
+    }
+
+    void setRefreshTimestampsFlag() const {
+        deviceTime->setRefreshTimestampsFlag();
+    }
+
+    uint64_t getTimestampRefreshTimeout() const {
+        return deviceTime->getTimestampRefreshTimeout();
+    }
+
   protected:
     OSTime() = default;
     OSInterface *osInterface = nullptr;
diff --git a/shared/test/common/mocks/linux/mock_os_time_linux.h b/shared/test/common/mocks/linux/mock_os_time_linux.h
index f778f03228..a73f51e3b0 100644
--- a/shared/test/common/mocks/linux/mock_os_time_linux.h
+++ b/shared/test/common/mocks/linux/mock_os_time_linux.h
@@ -18,15 +18,28 @@ class MockDeviceTimeDrm : public DeviceTimeDrm {
     using DeviceTimeDrm::pDrm;
 
     bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
+        getGpuCpuTimeImplCalled++;
         if (callBaseGetGpuCpuTimeImpl) {
             return DeviceTimeDrm::getGpuCpuTimeImpl(pGpuCpuTime, osTime);
         }
         *pGpuCpuTime = gpuCpuTimeValue;
         return getGpuCpuTimeImplResult;
     }
+
+    double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
+        if (callGetDynamicDeviceTimerResolution) {
+            return DeviceTimeDrm::getDynamicDeviceTimerResolution(hwInfo);
+        }
+        return dynamicDeviceTimerResolutionValue;
+    }
+
     bool callBaseGetGpuCpuTimeImpl = true;
     bool getGpuCpuTimeImplResult = true;
     TimeStampData gpuCpuTimeValue{};
+    uint32_t getGpuCpuTimeImplCalled = 0;
+
+    bool callGetDynamicDeviceTimerResolution = false;
+    double dynamicDeviceTimerResolutionValue = 1.0;
 };
 
 class MockOSTimeLinux : public OSTimeLinux {
diff --git a/shared/test/common/mocks/mock_device.cpp b/shared/test/common/mocks/mock_device.cpp
index ebf6d0f77c..be85053e9a 100644
--- a/shared/test/common/mocks/mock_device.cpp
+++ b/shared/test/common/mocks/mock_device.cpp
@@ -50,11 +50,11 @@ const char *MockDevice::getProductAbbrev() const {
 MockDevice::MockDevice(ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex)
     : RootDevice(executionEnvironment, rootDeviceIndex) {
     UltDeviceFactory::initializeMemoryManager(*executionEnvironment);
-
+    auto &hwInfo = getHardwareInfo();
     if (!getOSTime()) {
         getRootDeviceEnvironmentRef().osTime = MockOSTime::create();
+        getRootDeviceEnvironmentRef().osTime->setDeviceTimerResolution(hwInfo);
     }
-    auto &hwInfo = getHardwareInfo();
     executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(&hwInfo);
     executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->initGmm();
     if (!executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface) {
diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config
index b3f273e205..04f122e510 100644
--- a/shared/test/common/test_files/igdrcl.config
+++ b/shared/test/common/test_files/igdrcl.config
@@ -600,4 +600,5 @@ ForceSynchronizedDispatchMode = -1
 DirectSubmissionControllerAdjustOnThrottleAndAcLineStatus = -1
 ReadOnlyAllocationsTypeMask = 0
 EnableLogLevel = 6
+EnableReusingGpuTimestamps = 0
 # Please don't edit below this line
diff --git a/shared/test/unit_test/os_interface/linux/os_time_test.cpp b/shared/test/unit_test/os_interface/linux/os_time_test.cpp
index b493299d86..e563b5483f 100644
--- a/shared/test/unit_test/os_interface/linux/os_time_test.cpp
+++ b/shared/test/unit_test/os_interface/linux/os_time_test.cpp
@@ -9,6 +9,7 @@
 #include "shared/source/os_interface/linux/ioctl_helper.h"
 #include "shared/source/os_interface/linux/os_time_linux.h"
 #include "shared/source/os_interface/os_interface.h"
+#include "shared/test/common/helpers/debug_manager_state_restore.h"
 #include "shared/test/common/mocks/linux/mock_os_time_linux.h"
 #include "shared/test/common/mocks/mock_execution_environment.h"
 #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
@@ -18,7 +19,7 @@
 
 #include <dlfcn.h>
 
-static int actualTime = 0;
+static uint64_t actualTime = 0;
 
 int getTimeFuncFalse(clockid_t clkId, struct timespec *tp) throw() {
     return -1;
@@ -48,6 +49,8 @@ struct DrmTimeTest : public ::testing::Test {
         osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
         osTime->setResolutionFunc(resolutionFuncTrue);
         osTime->setGetTimeFunc(getTimeFuncTrue);
+        auto hwInfo = rootDeviceEnvironment.getMutableHardwareInfo();
+        osTime->setDeviceTimerResolution(*hwInfo);
         deviceTime = osTime->getDeviceTime();
     }
 
@@ -202,7 +205,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultRes
 
     drm->getParamRetValue = 0;
     drm->ioctlRes = -1;
-
+    deviceTime->callGetDynamicDeviceTimerResolution = true;
     auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
     EXPECT_DOUBLE_EQ(result, defaultResolution);
 }
@@ -239,7 +242,7 @@ TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlSuccedsThenCorrectR
     // 19200000 is frequency yelding 52.083ns resolution
     drm->getParamRetValue = 19200000;
     drm->ioctlRes = 0;
-
+    deviceTime->callGetDynamicDeviceTimerResolution = true;
     auto result = osTime->getDynamicDeviceTimerResolution(*defaultHwInfo);
     EXPECT_DOUBLE_EQ(result, 52.08333333333333);
 }
@@ -282,3 +285,118 @@ TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsReturne
         EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
     }
 }
+
+TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+
+    // Recreate mock to apply debug flag
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
+    osTime->setResolutionFunc(resolutionFuncTrue);
+    osTime->setGetTimeFunc(getTimeFuncTrue);
+    osTime->setDeviceTimerResolution(*hwInfo);
+    auto deviceTime = osTime->getDeviceTime();
+
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    TimeStampData gpuCpuTime;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
+    auto cpuTimeBefore = actualTime;
+
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
+    auto cpuTimeAfter = actualTime;
+
+    auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
+    auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
+    auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
+    EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
+}
+
+TEST_F(DrmTimeTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+
+    // Recreate mock to apply debug flag
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
+    osTime->setResolutionFunc(resolutionFuncTrue);
+    osTime->setGetTimeFunc(getTimeFuncTrue);
+    osTime->setDeviceTimerResolution(*hwInfo);
+    auto deviceTime = osTime->getDeviceTime();
+    deviceTime->callBaseGetGpuCpuTimeImpl = false;
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+
+    const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100;
+    EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout());
+
+    auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) {
+        actualTime = cpuTimeNS;
+        deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS;
+        deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp;
+    };
+    setTimestamps(0, 0ull, 0ull);
+
+    TimeStampData gpuCpuTime;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    // Error is smaller than 5%, timeout can be increased
+    auto newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout();
+    setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10);
+
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+
+    auto diff = (gpuCpuTime.gpuTimeStamp - actualTime);
+    EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout());
+    EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS);
+
+    // Error is larger than 5%, timeout should be decreased
+    newTimeAfterInterval = actualTime + osTime->getTimestampRefreshTimeout() + 10;
+    setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2);
+
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u);
+
+    EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS);
+}
+
+TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) {
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    TimeStampData gpuCpuTime;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    osTime->setRefreshTimestampsFlag();
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+}
+
+TEST_F(DrmTimeTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) {
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    TimeStampData gpuCpuTime;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    osTime->getGpuCpuTime(&gpuCpuTime, true);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+}
+
+TEST_F(DrmTimeTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(0);
+    // Recreate mock to apply debug flag
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface);
+    osTime->setResolutionFunc(resolutionFuncTrue);
+    osTime->setGetTimeFunc(getTimeFuncTrue);
+    EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout());
+}
diff --git a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp
index 8f1464df66..3a9315fe6d 100644
--- a/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp
+++ b/shared/test/unit_test/os_interface/windows/os_time_win_tests.cpp
@@ -7,6 +7,7 @@
 
 #include "shared/source/execution_environment/root_device_environment.h"
 #include "shared/source/os_interface/os_interface.h"
+#include "shared/test/common/helpers/debug_manager_state_restore.h"
 #include "shared/test/common/mocks/mock_execution_environment.h"
 #include "shared/test/common/mocks/mock_ostime.h"
 #include "shared/test/common/mocks/windows/mock_os_time_win.h"
@@ -29,10 +30,19 @@ BOOL WINAPI queryPerformanceCounterMock(
 class MockDeviceTimeWin : public MockDeviceTime {
   public:
     bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
+        getGpuCpuTimeImplCalled++;
         *pGpuCpuTime = gpuCpuTimeValue;
-        return true;
+        return getGpuCpuTimeImplResult;
     }
+
+    double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
+        return deviceTimerResolution;
+    }
+
+    bool getGpuCpuTimeImplResult = true;
     TimeStampData gpuCpuTimeValue{};
+    uint32_t getGpuCpuTimeImplCalled = 0;
+    double deviceTimerResolution = 1;
 };
 
 struct OSTimeWinTest : public ::testing::Test {
@@ -196,3 +206,156 @@ TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueThenHwInfoBasedValueIsRetur
         EXPECT_EQ(0ull, osTime->getMaxGpuTimeStamp());
     }
 }
+
+TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWithinIntervalThenReuseFromPreviousCall) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+    osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
+    LARGE_INTEGER frequency = {};
+    frequency.QuadPart = NSEC_PER_SEC;
+    osTime->setFrequency(frequency);
+
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    osTime->setDeviceTimerResolution(*hwInfo);
+
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    TimeStampData gpuCpuTime;
+    deviceTime->gpuCpuTimeValue = {1u, 1u};
+    valueToSet.QuadPart = 1;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    auto gpuTimestampBefore = gpuCpuTime.gpuTimeStamp;
+    auto cpuTimeBefore = gpuCpuTime.cpuTimeinNS;
+    valueToSet.QuadPart = 5;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    auto gpuTimestampAfter = gpuCpuTime.gpuTimeStamp;
+    auto cpuTimeAfter = gpuCpuTime.cpuTimeinNS;
+
+    auto cpuTimeDiff = cpuTimeAfter - cpuTimeBefore;
+
+    auto deviceTimerResolution = deviceTime->getDynamicDeviceTimerResolution(*hwInfo);
+    auto gpuTimestampDiff = static_cast<uint64_t>(cpuTimeDiff / deviceTimerResolution);
+    EXPECT_EQ(gpuTimestampAfter, gpuTimestampBefore + gpuTimestampDiff);
+}
+
+TEST_F(OSTimeWinTest, whenGettingGpuTimeStampValueAfterIntervalThenCallToKmdAndAdaptTimeout) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+    osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
+    LARGE_INTEGER frequency = {};
+    frequency.QuadPart = NSEC_PER_SEC;
+    osTime->setFrequency(frequency);
+
+    // Recreate mock to apply debug flag
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    osTime->setDeviceTimerResolution(*hwInfo);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+
+    const auto initialExpectedTimeoutNS = NSEC_PER_MSEC * 100;
+    EXPECT_EQ(initialExpectedTimeoutNS, osTime->getTimestampRefreshTimeout());
+
+    auto setTimestamps = [&](uint64_t cpuTimeNS, uint64_t cpuTimeFromKmdNS, uint64_t gpuTimestamp) {
+        valueToSet.QuadPart = cpuTimeNS;
+        deviceTime->gpuCpuTimeValue.cpuTimeinNS = cpuTimeFromKmdNS;
+        deviceTime->gpuCpuTimeValue.gpuTimeStamp = gpuTimestamp;
+    };
+    setTimestamps(0, 0ull, 0ull);
+
+    TimeStampData gpuCpuTime;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    // Error is smaller than 5%, timeout can be increased
+    auto newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout();
+    setTimestamps(newTimeAfterInterval, newTimeAfterInterval + 10, newTimeAfterInterval + 10);
+
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+
+    auto diff = (gpuCpuTime.gpuTimeStamp - valueToSet.QuadPart);
+    EXPECT_EQ(initialExpectedTimeoutNS + diff, osTime->getTimestampRefreshTimeout());
+    EXPECT_GT(initialExpectedTimeoutNS + diff, initialExpectedTimeoutNS);
+
+    // Error is larger than 5%, timeout should be decreased
+    newTimeAfterInterval = valueToSet.QuadPart + osTime->getTimestampRefreshTimeout() + 10;
+    setTimestamps(newTimeAfterInterval, newTimeAfterInterval * 2, newTimeAfterInterval * 2);
+
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 3u);
+
+    EXPECT_LT(osTime->getTimestampRefreshTimeout(), initialExpectedTimeoutNS);
+}
+
+TEST_F(OSTimeWinTest, whenGetGpuCpuTimeFailedThenReturnFalse) {
+    TimeStampData gpuCpuTime;
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    deviceTime->getGpuCpuTimeImplResult = false;
+    EXPECT_FALSE(osTime->getGpuCpuTime(&gpuCpuTime));
+}
+
+TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueAfterFlagSetThenCallToKmd) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+    TimeStampData gpuCpuTime;
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    osTime->setDeviceTimerResolution(*hwInfo);
+
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    deviceTime->gpuCpuTimeValue = {1u, 1u};
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    osTime->setRefreshTimestampsFlag();
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+}
+
+TEST_F(OSTimeWinTest, whenGettingMaxGpuTimeStampValueWhenForceFlagSetThenCallToKmd) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(true);
+    osTime->overrideQueryPerformanceCounterFunction(queryPerformanceCounterMock);
+    LARGE_INTEGER frequency = {};
+    frequency.QuadPart = NSEC_PER_SEC;
+    osTime->setFrequency(frequency);
+
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    osTime->setDeviceTimerResolution(*hwInfo);
+
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 0u);
+    TimeStampData gpuCpuTime;
+    deviceTime->gpuCpuTimeValue = {1u, 1u};
+    valueToSet.QuadPart = 1;
+    osTime->getGpuCpuTime(&gpuCpuTime);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 1u);
+
+    valueToSet.QuadPart = 5;
+    osTime->getGpuCpuTime(&gpuCpuTime, true);
+    EXPECT_EQ(deviceTime->getGpuCpuTimeImplCalled, 2u);
+}
+
+TEST_F(OSTimeWinTest, givenReusingTimestampsDisabledWhenGetTimestampRefreshTimeoutThenReturnCorrectValue) {
+    DebugManagerStateRestore restore;
+    debugManager.flags.EnableReusingGpuTimestamps.set(0);
+    auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
+    auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
+    auto deviceTime = new MockDeviceTimeWin();
+    osTime->deviceTime.reset(deviceTime);
+    osTime->setDeviceTimerResolution(*hwInfo);
+    EXPECT_EQ(0ul, osTime->getTimestampRefreshTimeout());
+}