From 6d2d16d68e80c7080404e3b6179027e9b87498e9 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Tue, 24 Oct 2023 08:11:23 +0000 Subject: [PATCH] fix: avoid overflow of gpu time stamp in ns Related-To: NEO-8394 Signed-off-by: Mateusz Jablonski --- level_zero/core/source/event/event_impl.inl | 14 +++++++------- opencl/source/event/event.cpp | 18 +++++++++--------- shared/source/helpers/gfx_core_helper.h | 4 ++-- shared/source/helpers/gfx_core_helper_base.inl | 7 +++++++ .../helpers/gfx_core_helper_bdw_and_later.inl | 5 ----- .../helpers/gfx_core_helper_xehp_and_later.inl | 5 ----- .../helpers/gfx_core_helper_tests.cpp | 13 +++++++------ 7 files changed, 32 insertions(+), 34 deletions(-) diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 71ff06ee4b..bc663c9775 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -610,8 +610,8 @@ void EventImp::getSynchronizedKernelTimestamps(ze_synchronized_timesta auto &gfxCoreHelper = device->getNEODevice()->getGfxCoreHelper(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); - const auto frequency = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; - auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, frequency); + const auto resolution = device->getNEODevice()->getDeviceInfo().profilingTimerResolution; + auto deviceTsInNs = gfxCoreHelper.getGpuTimeStampInNS(referenceTs.gpuTimeStamp, resolution); const auto maxKernelTsValue = maxNBitValue(hwInfo.capabilityTable.kernelTimestampValidBits); auto getDuration = [&](uint64_t startTs, uint64_t endTs) { @@ -640,15 +640,15 @@ void EventImp::getSynchronizedKernelTimestamps(ze_synchronized_timesta auto calculateSynchronizedTs = [&](ze_synchronized_timestamp_data_ext_t *synchronizedTs, const ze_kernel_timestamp_data_t *deviceTs) { // Add the offset to the kernel timestamp to find the start timestamp on the CPU timescale int64_t offset = tsOffsetInNs; - uint64_t startTimeStampInNs = static_cast(deviceTs->kernelStart * frequency) + offset; + uint64_t startTimeStampInNs = static_cast(deviceTs->kernelStart * resolution) + offset; if (startTimeStampInNs < referenceHostTsInNs) { - offset += static_cast(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * frequency); - startTimeStampInNs = static_cast(deviceTs->kernelStart * frequency) + offset; + offset += static_cast(maxNBitValue(gfxCoreHelper.getGlobalTimeStampBits()) * resolution); + startTimeStampInNs = static_cast(deviceTs->kernelStart * resolution) + offset; } // Get the kernel timestamp duration uint64_t deviceDuration = getDuration(deviceTs->kernelStart, deviceTs->kernelEnd); - uint64_t deviceDurationNs = static_cast(deviceDuration * frequency); + uint64_t deviceDurationNs = static_cast(deviceDuration * resolution); // Add the duration to the startTimeStamp to get the endTimeStamp uint64_t endTimeStampInNs = startTimeStampInNs + deviceDurationNs; @@ -662,7 +662,7 @@ void EventImp::getSynchronizedKernelTimestamps(ze_synchronized_timesta pSynchronizedTimestampsBuffer[index].context.kernelStart = pSynchronizedTimestampsBuffer[index].global.kernelStart; uint64_t deviceDuration = getDuration(pKernelTimestampsBuffer[index].context.kernelStart, pKernelTimestampsBuffer[index].context.kernelEnd); - uint64_t deviceDurationNs = static_cast(deviceDuration * frequency); + uint64_t deviceDurationNs = static_cast(deviceDuration * resolution); pSynchronizedTimestampsBuffer[index].context.kernelEnd = pSynchronizedTimestampsBuffer[index].context.kernelStart + deviceDurationNs; } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index da7106a243..25cdc2992f 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -369,20 +369,20 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con auto &device = this->cmdQueue->getDevice(); auto &gfxCoreHelper = device.getGfxCoreHelper(); - auto frequency = device.getDeviceInfo().profilingTimerResolution; - auto gpuSubmitTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(submitTimeStamp.gpuTimeStamp, frequency); + auto resolution = device.getDeviceInfo().profilingTimerResolution; + auto gpuSubmitTimeStamp = gfxCoreHelper.getGpuTimeStampInNS(submitTimeStamp.gpuTimeStamp, resolution); if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) { - startTimeStamp = static_cast(globalStartTS * frequency); + startTimeStamp = static_cast(globalStartTS * resolution); while (startTimeStamp < gpuSubmitTimeStamp) { - startTimeStamp += static_cast((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * frequency); + startTimeStamp += static_cast((1ULL << gfxCoreHelper.getGlobalTimeStampBits()) * resolution); } } else { int64_t c0 = submitTimeStamp.cpuTimeinNS - gpuSubmitTimeStamp; - startTimeStamp = static_cast(globalStartTS * frequency) + c0; + startTimeStamp = static_cast(globalStartTS * resolution) + c0; if (startTimeStamp < submitTimeStamp.cpuTimeinNS) { - c0 += static_cast((1ULL << (gfxCoreHelper.getGlobalTimeStampBits())) * frequency); - startTimeStamp = static_cast(globalStartTS * frequency) + c0; + c0 += static_cast((1ULL << (gfxCoreHelper.getGlobalTimeStampBits())) * resolution); + startTimeStamp = static_cast(globalStartTS * resolution) + c0; } } @@ -401,8 +401,8 @@ void Event::calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t con } else { gpuCompleteDuration = getDelta(contextStartTS, *contextCompleteTS); } - cpuDuration = static_cast(gpuDuration * frequency); - cpuCompleteDuration = static_cast(gpuCompleteDuration * frequency); + cpuDuration = static_cast(gpuDuration * resolution); + cpuCompleteDuration = static_cast(gpuCompleteDuration * resolution); endTimeStamp = startTimeStamp + cpuDuration; completeTimeStamp = startTimeStamp + cpuCompleteDuration; diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index ad03a311f1..7843116110 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -110,7 +110,7 @@ class GfxCoreHelper { virtual uint32_t getMinimalGrfSize() const = 0; virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const = 0; virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0; - virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0; + virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double resolution) const = 0; virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0; virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0; virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const = 0; @@ -303,7 +303,7 @@ class GfxCoreHelperHw : public GfxCoreHelper { uint32_t getMinimalGrfSize() const override; - uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override; + uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double resolution) const override; uint32_t getGlobalTimeStampBits() const override; diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index de89499653..9154fbf182 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -715,4 +715,11 @@ char const *GfxCoreHelperHw::getDefaultDeviceHierarchy() const { return deviceHierarchyComposite; } +template +uint64_t GfxCoreHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, double resolution) const { + UNRECOVERABLE_IF(resolution > 127.0); + constexpr auto timestampMask = maxNBitValue(57); + return static_cast(static_cast(timeStamp & timestampMask) * resolution); +} + } // namespace NEO diff --git a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl index 25c36edff3..d82137e111 100644 --- a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl @@ -89,11 +89,6 @@ inline uint32_t GfxCoreHelperHw::calculateMaxWorkGroupSize(const Kern return defaultMaxGroupSize; } -template -uint64_t GfxCoreHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const { - return static_cast(timeStamp * frequency); -} - constexpr uint32_t planarYuvMaxHeight = 16352; template diff --git a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl index 5b5c04d42f..2be2172856 100644 --- a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl @@ -124,11 +124,6 @@ inline uint32_t GfxCoreHelperHw::calculateMaxWorkGroupSize(const Kern return defaultMaxGroupSize; } -template -uint64_t GfxCoreHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const { - return static_cast(timeStamp * frequency); -} - constexpr uint32_t planarYuvMaxHeight = 16128; template diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 51eed27eee..ae4bf6df12 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -149,13 +149,14 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenGettingThreadsPerEUConfigsThe EXPECT_EQ(0U, configs.size()); } -HWCMDTEST_F(IGFX_GEN8_CORE, GfxCoreHelperTest, givenGfxCoreHelperWhenGetGpuTimeStampInNSIsCalledThenCorrectValueIsReturned) { - +TEST_F(GfxCoreHelperTest, givenGfxCoreHelperWhenGetGpuTimeStampInNSIsCalledThenCorrectValueIsReturned) { auto &gfxCoreHelper = getHelper(); - auto timeStamp = 0x00ff'ffff'ffff; - auto frequency = 123456.0; - auto result = static_cast(timeStamp * frequency); - EXPECT_EQ(result, gfxCoreHelper.getGpuTimeStampInNS(timeStamp, frequency)); + auto timeStamp0 = 0x00ff'ffff'ffff; + auto timeStamp1 = 0xfe00'00ff'ffff'ffff; + auto resolution = 123.0; + auto result = static_cast(timeStamp0 * resolution); + EXPECT_EQ(result, gfxCoreHelper.getGpuTimeStampInNS(timeStamp0, resolution)); + EXPECT_EQ(result, gfxCoreHelper.getGpuTimeStampInNS(timeStamp1, resolution)); } TEST(DwordBuilderTest, WhenSettingNonMaskedBitsThenOnlySelectedBitAreSet) {