fix: correct latency observed between device and metric timestamps

Related-To: NEO-15820

Signed-off-by: shubham kumar <shubham.kumar@intel.com>
This commit is contained in:
shubham kumar
2025-09-08 07:03:32 +00:00
committed by Compute-Runtime-Automation
parent 76b93ca9aa
commit d3bef2223a
3 changed files with 22 additions and 16 deletions

View File

@@ -8,6 +8,7 @@
#include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/os_library.h"
@@ -1012,20 +1013,21 @@ ze_result_t OaMetricGroupImp::getMetricTimestampsExp(const ze_bool_t synchronize
*globalTimestamp = deviceTimestamp;
}
uint32_t cpuId;
MetricsDiscovery::ECompletionCode mdapiRetVal;
MetricsDiscovery::IMetricsDevice_1_13 *metricDevice;
metricDevice = getMetricSource()->getMetricEnumeration().getMdapiDevice();
// MDAPI returns GPU timestamps in nanoseconds
mdapiRetVal = metricDevice->GetGpuCpuTimestamps(metricTimestamp, &hostTimestamp, &cpuId);
if (mdapiRetVal != MetricsDiscovery::CC_OK) {
*globalTimestamp = 0;
*metricTimestamp = 0;
result = ZE_RESULT_ERROR_NOT_AVAILABLE;
} else {
result = ZE_RESULT_SUCCESS;
if (!metricSource->isFrequencyDataAvailable) {
metricSource->csTimestampPeriodNs = metricSource->getMetricDeviceContext().getDevice().getNEODevice()->getProfilingTimerResolution();
result = metricSource->getTimerResolution(metricSource->oaTimestampFrequency);
if (result != ZE_RESULT_SUCCESS) {
METRICS_LOG_ERR("Could not fetch oaTimestampFrequency from getTimerResolution(). Return status recieved %x ", result);
*globalTimestamp = 0;
*metricTimestamp = 0;
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
metricSource->isFrequencyDataAvailable = true;
}
const uint64_t csTimestampFrequency = static_cast<uint64_t>(CommonConstants::nsecPerSec / metricSource->csTimestampPeriodNs);
*metricTimestamp = deviceTimestamp * (metricSource->oaTimestampFrequency / csTimestampFrequency);
result = ZE_RESULT_SUCCESS;
}
return result;

View File

@@ -72,6 +72,10 @@ class OaMetricSourceImp : public MetricSource {
}
bool canDisable() override;
void initMetricScopes(MetricDeviceContext &metricDeviceContext) override;
ze_result_t getTimerResolution(uint64_t &resolution);
double csTimestampPeriodNs = 0;
uint64_t oaTimestampFrequency = 0;
bool isFrequencyDataAvailable = false;
protected:
ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
@@ -81,7 +85,6 @@ class OaMetricSourceImp : public MetricSource {
MetricStreamer *pMetricStreamer = nullptr;
bool useCompute = false;
std::unique_ptr<MultiDomainDeferredActivationTracker> activationTracker{};
ze_result_t getTimerResolution(uint64_t &resolution);
void getTimestampValidBits(uint64_t &validBits);
};

View File

@@ -860,7 +860,7 @@ TEST_F(MetricEnumerationTest, GivenEnumerationIsSuccessfulWhenReadingMetricsFreq
EXPECT_NE(metricTimestamp, 0UL);
}
TEST_F(MetricEnumerationTest, GivenEnumerationIsSuccessfulWhenFailingToReadMetricsOrDeviceTimestampsThenValuesAreZero) {
TEST_F(MetricEnumerationTest, GivenEnumerationIsSuccessfulWhenFailingToReadDeviceTimestampsOrMetricFrequencyThenValuesAreZero) {
// Metrics Discovery device.
metricsDeviceParams.ConcurrentGroupsCount = 1;
@@ -922,7 +922,7 @@ TEST_F(MetricEnumerationTest, GivenEnumerationIsSuccessfulWhenFailingToReadMetri
ze_bool_t synchronizedWithHost = true;
uint64_t globalTimestamp = 1;
uint64_t metricTimestamp = 1;
metricsDevice.forceGetGpuCpuTimestampsFail = true;
metricsDevice.forceGetSymbolByNameFail = true;
EXPECT_EQ(L0::zetMetricGroupGetGlobalTimestampsExp(metricGroupHandle, synchronizedWithHost, &globalTimestamp, &metricTimestamp), ZE_RESULT_ERROR_NOT_AVAILABLE);
EXPECT_EQ(globalTimestamp, 0UL);
@@ -931,6 +931,7 @@ TEST_F(MetricEnumerationTest, GivenEnumerationIsSuccessfulWhenFailingToReadMetri
globalTimestamp = 1;
metricTimestamp = 1;
metricsDevice.forceGetGpuCpuTimestampsFail = true;
neoDevice->setOSTime(new FalseGpuCpuTime());
EXPECT_EQ(L0::zetMetricGroupGetGlobalTimestampsExp(metricGroupHandle, synchronizedWithHost, &globalTimestamp, &metricTimestamp), ZE_RESULT_ERROR_DEVICE_LOST);
EXPECT_EQ(globalTimestamp, 0UL);