feature: support metrics runtime disabling

This feature disallows metric group activation
if disabled

Related-To: NEO-14310

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan
2025-05-02 11:15:34 +00:00
committed by Compute-Runtime-Automation
parent bcf6c94f1c
commit 3db2183810
13 changed files with 198 additions and 6 deletions

View File

@@ -15,7 +15,7 @@ ze_result_t ZE_APICALL zetIntelDeviceEnableMetricsExp(zet_device_handle_t hDevic
}
ze_result_t ZE_APICALL zetIntelDeviceDisableMetricsExp(zet_device_handle_t hDevice) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
return L0::metricsDisable(hDevice);
}
ze_result_t ZE_APICALL zetIntelCommandListAppendMarkerExp(zet_command_list_handle_t hCommandList,

View File

@@ -58,7 +58,6 @@ MetricDeviceContext::MetricDeviceContext(Device &inputDevice) : device(inputDevi
bool MetricDeviceContext::enable() {
bool status = false;
std::lock_guard<std::mutex> lock(enableMetricsMutex);
for (auto const &entry : metricSources) {
auto const &metricSource = entry.second;
@@ -68,10 +67,27 @@ bool MetricDeviceContext::enable() {
}
status |= metricSource->isAvailable();
}
setMetricsCollectionAllowed(status);
isEnableChecked = true;
return status;
}
bool MetricDeviceContext::canDisable() {
if (isMetricsCollectionAllowed) {
for (auto const &entry : metricSources) {
auto const &metricSource = entry.second;
if (!metricSource->canDisable()) {
return false;
}
}
}
return true;
}
void MetricDeviceContext::disable() {
setMetricsCollectionAllowed(false);
}
ze_result_t MetricDeviceContext::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) {
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -111,6 +127,11 @@ ze_result_t MetricDeviceContext::metricGroupGet(uint32_t *pCount, zet_metric_gro
ze_result_t MetricDeviceContext::activateMetricGroupsPreferDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups) {
if (!isMetricsCollectionAllowed) {
METRICS_LOG_ERR("%s", "Cannot activate when metrics is disabled");
return ZE_RESULT_ERROR_UNINITIALIZED;
}
// Create a map of metric source types and Metric groups
std::map<uint32_t, std::vector<zet_metric_group_handle_t>> metricGroupsPerMetricSourceMap{};
for (auto index = 0u; index < count; index++) {
@@ -187,6 +208,7 @@ Device &MetricDeviceContext::getDevice() const {
void MetricDeviceContext::enableMetricApiForDevice(zet_device_handle_t hDevice, bool &isFailed) {
auto deviceImp = static_cast<DeviceImp *>(L0::Device::fromHandle(hDevice));
std::lock_guard<std::mutex> lock(deviceImp->getMetricDeviceContext().enableMetricsMutex);
// Initialize device.
isFailed |= !deviceImp->metricContext->enable();
@@ -196,6 +218,30 @@ void MetricDeviceContext::enableMetricApiForDevice(zet_device_handle_t hDevice,
}
}
ze_result_t MetricDeviceContext::disableMetricApiForDevice(zet_device_handle_t hDevice) {
auto deviceImp = static_cast<DeviceImp *>(L0::Device::fromHandle(hDevice));
std::lock_guard<std::mutex> lock(deviceImp->getMetricDeviceContext().enableMetricsMutex);
for (uint32_t i = 0; i < deviceImp->numSubDevices; ++i) {
if (!deviceImp->subDevices[i]->getMetricDeviceContext().canDisable()) {
METRICS_LOG_ERR("%s", "Cannot disable sub device, since metrics resources are still in use.");
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
}
}
if (!deviceImp->getMetricDeviceContext().canDisable()) {
METRICS_LOG_ERR("%s", "Cannot disable root device, since metrics resources are still in use.");
return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE;
}
for (uint32_t i = 0; i < deviceImp->numSubDevices; ++i) {
deviceImp->subDevices[i]->getMetricDeviceContext().disable();
}
deviceImp->getMetricDeviceContext().disable();
return ZE_RESULT_SUCCESS;
}
ze_result_t MetricDeviceContext::enableMetricApi() {
bool failed = false;
@@ -919,4 +965,8 @@ ze_result_t metricsEnable(zet_device_handle_t hDevice) {
return isFailed ? ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE : ZE_RESULT_SUCCESS;
}
ze_result_t metricsDisable(zet_device_handle_t hDevice) {
return MetricDeviceContext::disableMetricApiForDevice(hDevice);
}
} // namespace L0

View File

@@ -108,6 +108,7 @@ class MetricSource {
uint32_t *pCount,
zet_metric_handle_t *phExcludedMetrics,
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation) = 0;
virtual bool canDisable() = 0;
protected:
uint32_t type = MetricSource::metricSourceTypeUndefined;
@@ -122,6 +123,7 @@ class MultiDomainDeferredActivationTracker {
virtual bool activateMetricGroupsDeferred(uint32_t count, zet_metric_group_handle_t *phMetricGroups);
bool isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const;
bool isMetricGroupActivatedInHw() const;
bool isAnyMetricGroupActivated() const { return domains.size() > 0; }
protected:
void deActivateDomain(uint32_t domain);
@@ -155,6 +157,7 @@ class MetricDeviceContext {
static std::unique_ptr<MetricDeviceContext> create(Device &device);
static ze_result_t enableMetricApi();
static void enableMetricApiForDevice(zet_device_handle_t hDevice, bool &isFailed);
static ze_result_t disableMetricApiForDevice(zet_device_handle_t hDevice);
ze_result_t getConcurrentMetricGroups(uint32_t metricGroupCount, zet_metric_group_handle_t *phMetricGroups,
uint32_t *pConcurrentGroupCount, uint32_t *pCountPerConcurrentGroup);
@@ -170,6 +173,7 @@ class MetricDeviceContext {
zet_metric_handle_t *phExcludedMetrics,
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation);
bool areMetricGroupsFromSameDeviceHierarchy(uint32_t count, zet_metric_group_handle_t *phMetricGroups);
void setMetricsCollectionAllowed(bool status) { isMetricsCollectionAllowed = status; }
protected:
bool areMetricGroupsFromSameSource(uint32_t count, zet_metric_group_handle_t *phMetricGroups, uint32_t *sourceType);
@@ -180,9 +184,12 @@ class MetricDeviceContext {
private:
bool enable();
bool canDisable();
void disable();
struct Device &device;
bool multiDeviceCapable = false;
uint32_t subDeviceIndex = 0;
bool isMetricsCollectionAllowed = false;
bool isEnableChecked = false;
std::mutex enableMetricsMutex;
};
@@ -527,5 +534,6 @@ ze_result_t metricDecodeCalculateMultipleValues(zet_intel_metric_decoder_exp_han
uint32_t *pTotalMetricReportCount, zet_intel_metric_result_exp_t *pMetricResults);
ze_result_t metricsEnable(zet_device_handle_t hDevice);
ze_result_t metricsDisable(zet_device_handle_t hDevice);
} // namespace L0

View File

@@ -260,6 +260,10 @@ ze_result_t IpSamplingMetricSourceImp::calcOperationCreate(MetricDeviceContext &
return status;
}
bool IpSamplingMetricSourceImp::canDisable() {
return !activationTracker->isAnyMetricGroupActivated();
}
IpSamplingMetricGroupImp::IpSamplingMetricGroupImp(IpSamplingMetricSourceImp &metricSource,
std::vector<IpSamplingMetricImp> &metrics) : IpSamplingMetricGroupBase(metricSource) {
this->metrics.reserve(metrics.size());

View File

@@ -64,6 +64,7 @@ class IpSamplingMetricSourceImp : public MetricSource {
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation) override;
uint32_t metricSourceCount = 0;
bool canDisable() override;
protected:
void cacheMetricGroup();

View File

@@ -112,6 +112,10 @@ Device &OaMetricSourceImp::getDevice() {
return metricDeviceContext.getDevice();
}
bool OaMetricSourceImp::canDisable() {
return !activationTracker->isAnyMetricGroupActivated();
}
MetricsLibrary &OaMetricSourceImp::getMetricsLibrary() {
return *metricsLibrary;
}

View File

@@ -71,6 +71,7 @@ class OaMetricSourceImp : public MetricSource {
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
bool canDisable() override;
protected:
ze_result_t initializationState = ZE_RESULT_ERROR_UNINITIALIZED;

View File

@@ -36,6 +36,7 @@ void MetricContextFixture::setUp() {
std::unique_ptr<MetricIpSamplingOsInterface>(mockIpSamplingOsInterface);
auto &ipMetricSource = device->getMetricDeviceContext().getMetricSource<IpSamplingMetricSourceImp>();
ipMetricSource.setMetricOsInterface(metricIpSamplingOsInterface);
device->getMetricDeviceContext().setMetricsCollectionAllowed(true);
// Mock metrics library.
mockMetricsLibrary = std::unique_ptr<Mock<MetricsLibrary>>(new (std::nothrow) Mock<MetricsLibrary>(metricSource));
@@ -100,6 +101,7 @@ void MetricMultiDeviceFixture::setUp() {
// Initialize metric api.
auto &metricSource = devices[0]->getMetricDeviceContext().getMetricSource<OaMetricSourceImp>();
metricSource.setInitializationState(ZE_RESULT_SUCCESS);
devices[0]->getMetricDeviceContext().setMetricsCollectionAllowed(true);
// Mock metrics library.
mockMetricsLibrary = std::unique_ptr<Mock<MetricsLibrary>>(new (std::nothrow) Mock<MetricsLibrary>(metricSource));
@@ -127,6 +129,7 @@ void MetricMultiDeviceFixture::setUp() {
mockMetricsLibrarySubDevices[i]->handle = new MockOsLibrary();
metricsSubDeviceContext.setInitializationState(ZE_RESULT_SUCCESS);
deviceImp.subDevices[i]->getMetricDeviceContext().setMetricsCollectionAllowed(true);
}
// Metrics Discovery device common settings.
metricsDeviceParams.Version.MajorNumber = MetricEnumeration::requiredMetricsDiscoveryMajorVersion;

View File

@@ -56,6 +56,7 @@ class MockMetricSource : public L0::MetricSource {
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
bool canDisable() override { return false; }
~MockMetricSource() override = default;
};

View File

@@ -283,9 +283,5 @@ TEST_F(MetricRuntimeFixture, WhenRunTimeEnableIsDoneAndNoSourcesAreAvailableThen
deviceImp->metricContext.reset();
}
TEST_F(MetricRuntimeFixture, WhenRunTimeDisableIsDoneMultipleTimesThenEnableIsDoneOnlyOnce) {
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, zetIntelDeviceDisableMetricsExp(device->toHandle()));
}
} // namespace ult
} // namespace L0

View File

@@ -283,6 +283,35 @@ HWTEST2_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulThenDummy
}
}
HWTEST2_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulWhenMetricsDisableIsCalledActivationReturnsFailure, EustallSupportedPlatforms) {
EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi());
for (auto device : testDevices) {
uint32_t metricGroupCount = 0;
zetMetricGroupGet(device->toHandle(), &metricGroupCount, nullptr);
std::vector<zet_metric_group_handle_t> metricGroups;
metricGroups.resize(metricGroupCount);
ASSERT_EQ(zetMetricGroupGet(device->toHandle(), &metricGroupCount, metricGroups.data()), ZE_RESULT_SUCCESS);
ASSERT_NE(metricGroups[0], nullptr);
zet_metric_group_properties_t metricGroupProperties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES, nullptr};
EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[0], &metricGroupProperties), ZE_RESULT_SUCCESS);
EXPECT_EQ(strcmp(metricGroupProperties.name, "EuStallSampling"), 0);
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroups[0]), ZE_RESULT_SUCCESS);
static_cast<DeviceImp *>(device)->activateMetricGroups();
// Disable Metrics
EXPECT_EQ(zetIntelDeviceDisableMetricsExp(device->toHandle()), ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE);
// De-Activate all metric groups.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS);
// Disable Metrics with all groups deactivated should return success
EXPECT_EQ(zetIntelDeviceDisableMetricsExp(device->toHandle()), ZE_RESULT_SUCCESS);
// Activate metric group on a disabled device should be failure.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), device->toHandle(), 1, &metricGroups[0]), ZE_RESULT_ERROR_UNINITIALIZED);
EXPECT_EQ(zetIntelDeviceEnableMetricsExp(device->toHandle()), ZE_RESULT_SUCCESS);
}
}
HWTEST2_F(MetricIpSamplingEnumerationTest, GivenEnumerationIsSuccessfulThenUnsupportedApisForMetricGroupReturnsFailure, EustallSupportedPlatforms) {
EXPECT_EQ(ZE_RESULT_SUCCESS, testDevices[0]->getMetricDeviceContext().enableMetricApi());

View File

@@ -1598,6 +1598,100 @@ TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndTwoMetricGroupsW
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 2, metricGroupHandles.data()), ZE_RESULT_SUCCESS);
}
TEST_F(MultiDeviceMetricEnumerationTest, givenMultipleDevicesAndMetricsIsDisabledThenZetContextActivateMetricGroupsReturnsFailure) {
// Metrics Discovery device.
metricsDeviceParams.ConcurrentGroupsCount = 1;
// Metrics Discovery concurrent group.
Mock<IConcurrentGroup_1_13> metricsConcurrentGroup;
TConcurrentGroupParams_1_13 metricsConcurrentGroupParams = {};
metricsConcurrentGroupParams.MetricSetsCount = 1;
metricsConcurrentGroupParams.SymbolName = "OA";
metricsConcurrentGroupParams.Description = "OA description";
metricsConcurrentGroupParams.IoMeasurementInformationCount = 1;
Mock<MetricsDiscovery::IEquation_1_0> ioReadEquation;
MetricsDiscovery::TEquationElement_1_0 ioEquationElement = {};
ioEquationElement.Type = MetricsDiscovery::EQUATION_ELEM_IMM_UINT64;
ioEquationElement.ImmediateUInt64 = 0;
ioReadEquation.getEquationElement.push_back(&ioEquationElement);
Mock<MetricsDiscovery::IInformation_1_0> ioMeasurement;
MetricsDiscovery::TInformationParams_1_0 oaInformation = {};
oaInformation.SymbolName = "BufferOverflow";
oaInformation.IoReadEquation = &ioReadEquation;
// Metrics Discovery:: metric set.
Mock<MetricsDiscovery::IMetricSet_1_13> metricsSet;
MetricsDiscovery::TMetricSetParams_1_11 metricsSetParams = {};
metricsSetParams.ApiMask = MetricsDiscovery::API_TYPE_OCL;
metricsSetParams.MetricsCount = 0;
metricsSetParams.SymbolName = "Metric set name";
metricsSetParams.ShortName = "Metric set description";
metricsSetParams.MetricsCount = 1;
// Metrics Discovery:: metric.
Mock<IMetric_1_13> metric;
TMetricParams_1_13 metricParams = {};
metricParams.SymbolName = "Metric symbol name";
metricParams.ShortName = "Metric short name";
metricParams.LongName = "Metric long name";
metricParams.ResultType = MetricsDiscovery::TMetricResultType::RESULT_UINT64;
metricParams.MetricType = MetricsDiscovery::TMetricType::METRIC_TYPE_RATIO;
// One api: metric group handle.
zet_metric_group_handle_t metricGroupHandle = {};
openMetricsAdapter();
setupDefaultMocksForMetricDevice(metricsDevice);
metricsDevice.getConcurrentGroupResults.push_back(&metricsConcurrentGroup);
metricsConcurrentGroup.GetParamsResult = &metricsConcurrentGroupParams;
metricsConcurrentGroup.getMetricSetResult = &metricsSet;
metricsConcurrentGroup.GetIoMeasurementInformationResult = &ioMeasurement;
ioMeasurement.GetParamsResult = &oaInformation;
metricsSet.GetParamsResult = &metricsSetParams;
metricsSet.GetMetricResult = &metric;
metric.GetParamsResult = &metricParams;
// Metric group count.
uint32_t metricGroupCount = 0;
EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, nullptr), ZE_RESULT_SUCCESS);
EXPECT_EQ(metricGroupCount, 1u);
// Metric group handle.
EXPECT_EQ(zetMetricGroupGet(devices[0]->toHandle(), &metricGroupCount, &metricGroupHandle), ZE_RESULT_SUCCESS);
EXPECT_EQ(metricGroupCount, 1u);
EXPECT_NE(metricGroupHandle, nullptr);
// Activate metric group.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_SUCCESS);
// Disable Metrics with an activated metric group returns error
EXPECT_EQ(zetIntelDeviceDisableMetricsExp(devices[0]->toHandle()), ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE);
// De-Activate all metric groups.
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 0, nullptr), ZE_RESULT_SUCCESS);
// Disable Metrics with all groups deactivated should return success
EXPECT_EQ(zetIntelDeviceDisableMetricsExp(devices[0]->toHandle()), ZE_RESULT_SUCCESS);
// Multiple Disables continue to return success
EXPECT_EQ(zetIntelDeviceDisableMetricsExp(devices[0]->toHandle()), ZE_RESULT_SUCCESS);
// Activate metric group on a disabled device should be failure
EXPECT_EQ(zetContextActivateMetricGroups(context->toHandle(), devices[0]->toHandle(), 1, &metricGroupHandle), ZE_RESULT_ERROR_UNINITIALIZED);
// Reset the disabled status
devices[0]->getMetricDeviceContext().setMetricsCollectionAllowed(true);
auto &deviceImp = *static_cast<DeviceImp *>(devices[0]);
const uint32_t subDeviceCount = static_cast<uint32_t>(deviceImp.subDevices.size());
for (uint32_t i = 0; i < subDeviceCount; i++) {
deviceImp.subDevices[i]->getMetricDeviceContext().setMetricsCollectionAllowed(true);
}
}
TEST_F(MetricEnumerationTest, givenValidTimeBasedMetricGroupWhenzetContextActivateMetricGroupsIsCalledThenReturnsSuccess) {
// Metrics Discovery device.

View File

@@ -41,6 +41,7 @@ void OaMetricProgrammableFixture::SetUp() {
mockAdapterGroup.mockParams.Version.MajorNumber = 1;
mockAdapterGroup.mockParams.Version.MinorNumber = 13;
deviceContext = std::make_unique<MetricDeviceContext>(*device);
deviceContext->setMetricsCollectionAllowed(true);
oaMetricSource = static_cast<OaMetricSourceImp *>(&deviceContext->getMetricSource<OaMetricSourceImp>());
metricEnumeration = static_cast<MetricEnumeration *>(&oaMetricSource->getMetricEnumeration());
metricEnumeration->setAdapterGroup(&mockAdapterGroup);