compute-runtime/level_zero/tools/source/metrics/metric_oa_source.cpp

438 lines
19 KiB
C++

/*
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/source/metrics/metric_oa_source.h"
#include "shared/source/os_interface/os_library.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/tools/source/metrics/metric.h"
#include "level_zero/tools/source/metrics/metric_multidevice_programmable.h"
#include "level_zero/tools/source/metrics/metric_multidevice_programmable.inl"
#include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h"
#include "level_zero/tools/source/metrics/metric_oa_programmable_imp.h"
#include "level_zero/tools/source/metrics/metric_oa_query_imp.h"
#include "level_zero/zet_intel_gpu_metric.h"
namespace L0 {
std::unique_ptr<OaMetricSourceImp> OaMetricSourceImp::create(const MetricDeviceContext &metricDeviceContext) {
return std::unique_ptr<OaMetricSourceImp>(new (std::nothrow) OaMetricSourceImp(metricDeviceContext));
}
OaMetricSourceImp::OaMetricSourceImp(const MetricDeviceContext &metricDeviceContext) : metricDeviceContext(metricDeviceContext),
metricEnumeration(std::unique_ptr<MetricEnumeration>(new(std::nothrow) MetricEnumeration(*this))),
metricsLibrary(std::unique_ptr<MetricsLibrary>(new(std::nothrow) MetricsLibrary(*this))) {
activationTracker = std::make_unique<MultiDomainDeferredActivationTracker>(metricDeviceContext.getSubDeviceIndex());
type = MetricSource::metricSourceTypeOa;
}
OaMetricSourceImp::~OaMetricSourceImp() = default;
void OaMetricSourceImp::enable() {
loadDependencies();
}
ze_result_t OaMetricSourceImp::getTimerResolution(uint64_t &resolution) {
if (!metricEnumeration->readGlobalSymbol(globalSymbolOaGpuTimestampFrequency.data(), resolution)) {
resolution = 0;
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t OaMetricSourceImp::getTimestampValidBits(uint64_t &validBits) {
ze_result_t retVal = ZE_RESULT_SUCCESS;
uint64_t maxNanoSeconds = 0;
if (!metricEnumeration->readGlobalSymbol(globalSymbolOaMaxTimestamp.data(), maxNanoSeconds)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
uint64_t timerFreqquency;
retVal = getTimerResolution(timerFreqquency);
if (retVal != ZE_RESULT_SUCCESS) {
validBits = 0;
return retVal;
}
uint64_t maxTimeStamp = maxNanoSeconds * timerFreqquency / CommonConstants::nsecPerSec;
auto bits = std::bitset<64>(maxTimeStamp);
validBits = bits.count();
return retVal;
}
bool OaMetricSourceImp::isAvailable() {
return isInitialized();
}
ze_result_t OaMetricSourceImp::appendMetricMemoryBarrier(CommandList &commandList) {
DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandList.getDevice());
if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
// Use one of the sub-device contexts to append to command list.
pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
}
auto &metricContext = pDeviceImp->getMetricDeviceContext();
auto &metricsLibrary = metricContext.getMetricSource<OaMetricSourceImp>().getMetricsLibrary();
// Obtain gpu commands.
CommandBufferData_1_0 commandBuffer = {};
commandBuffer.CommandsType = MetricsLibraryApi::ObjectType::OverrideFlushCaches;
commandBuffer.Override.Enable = true;
commandBuffer.Type = metricContext.getMetricSource<OaMetricSourceImp>().isComputeUsed()
? MetricsLibraryApi::GpuCommandBufferType::Compute
: MetricsLibraryApi::GpuCommandBufferType::Render;
return metricsLibrary.getGpuCommands(commandList, commandBuffer) ? ZE_RESULT_SUCCESS
: ZE_RESULT_ERROR_UNKNOWN;
}
bool OaMetricSourceImp::loadDependencies() {
bool result = true;
if (metricEnumeration->loadMetricsDiscovery() != ZE_RESULT_SUCCESS) {
result = false;
DEBUG_BREAK_IF(!result);
}
if (result && !metricsLibrary->load()) {
result = false;
DEBUG_BREAK_IF(!result);
}
// Set metric context initialization state.
setInitializationState(result
? ZE_RESULT_SUCCESS
: ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE);
return result;
}
bool OaMetricSourceImp::isInitialized() {
return initializationState == ZE_RESULT_SUCCESS;
}
void OaMetricSourceImp::setInitializationState(const ze_result_t state) {
initializationState = state;
}
Device &OaMetricSourceImp::getDevice() {
return metricDeviceContext.getDevice();
}
MetricsLibrary &OaMetricSourceImp::getMetricsLibrary() {
return *metricsLibrary;
}
MetricEnumeration &OaMetricSourceImp::getMetricEnumeration() {
return *metricEnumeration;
}
MetricStreamer *OaMetricSourceImp::getMetricStreamer() {
return pMetricStreamer;
}
void OaMetricSourceImp::setMetricStreamer(MetricStreamer *pMetricStreamer) {
this->pMetricStreamer = pMetricStreamer;
}
void OaMetricSourceImp::setUseCompute(const bool useCompute) {
this->useCompute = useCompute;
}
bool OaMetricSourceImp::isComputeUsed() const {
return useCompute;
}
ze_result_t OaMetricSourceImp::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) {
return getMetricEnumeration().metricGroupGet(*pCount, phMetricGroups);
}
uint32_t OaMetricSourceImp::getSubDeviceIndex() {
return metricDeviceContext.getSubDeviceIndex();
}
bool OaMetricSourceImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const {
return activationTracker->isMetricGroupActivated(hMetricGroup);
}
bool OaMetricSourceImp::isMetricGroupActivatedInHw() const {
return activationTracker->isMetricGroupActivatedInHw();
}
bool OaMetricSourceImp::isImplicitScalingCapable() const {
return metricDeviceContext.isImplicitScalingCapable();
}
ze_result_t OaMetricSourceImp::activateMetricGroupsPreferDeferred(uint32_t count,
zet_metric_group_handle_t *phMetricGroups) {
activationTracker->activateMetricGroupsDeferred(count, phMetricGroups);
return ZE_RESULT_SUCCESS;
}
ze_result_t OaMetricSourceImp::activateMetricGroupsAlreadyDeferred() {
return activationTracker->activateMetricGroupsAlreadyDeferred();
}
ze_result_t OaMetricSourceImp::getConcurrentMetricGroups(std::vector<zet_metric_group_handle_t> &hMetricGroups,
uint32_t *pConcurrentGroupCount,
uint32_t *pCountPerConcurrentGroup) {
if (*pConcurrentGroupCount == 0) {
*pConcurrentGroupCount = static_cast<uint32_t>(hMetricGroups.size());
return ZE_RESULT_SUCCESS;
}
*pConcurrentGroupCount = std::min(*pConcurrentGroupCount, static_cast<uint32_t>(hMetricGroups.size()));
// Each metric group is in unique container
for (uint32_t index = 0; index < *pConcurrentGroupCount; index++) {
pCountPerConcurrentGroup[index] = 1;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t OaMetricSourceImp::handleMetricGroupExtendedProperties(zet_metric_group_handle_t hMetricGroup,
zet_metric_group_properties_t *pBaseProperties,
void *pNext) {
ze_result_t retVal = ZE_RESULT_ERROR_INVALID_ARGUMENT;
while (pNext) {
auto extendedProperties = reinterpret_cast<zet_base_properties_t *>(pNext);
if (extendedProperties->stype == ZET_INTEL_STRUCTURE_TYPE_METRIC_SOURCE_ID_EXP) {
getMetricGroupSourceIdProperty(extendedProperties);
retVal = ZE_RESULT_SUCCESS;
} else if (extendedProperties->stype == ZET_STRUCTURE_TYPE_METRIC_GLOBAL_TIMESTAMPS_RESOLUTION_EXP) {
zet_metric_global_timestamps_resolution_exp_t *metricsTimestampProperties =
reinterpret_cast<zet_metric_global_timestamps_resolution_exp_t *>(extendedProperties);
retVal = getTimerResolution(metricsTimestampProperties->timerResolution);
if (retVal != ZE_RESULT_SUCCESS) {
metricsTimestampProperties->timerResolution = 0;
metricsTimestampProperties->timestampValidBits = 0;
return retVal;
}
retVal = getTimestampValidBits(metricsTimestampProperties->timestampValidBits);
if (retVal != ZE_RESULT_SUCCESS) {
metricsTimestampProperties->timerResolution = 0;
metricsTimestampProperties->timestampValidBits = 0;
return retVal;
}
} else if (extendedProperties->stype == ZET_STRUCTURE_TYPE_METRIC_GROUP_TYPE_EXP) {
zet_metric_group_type_exp_t *groupType = reinterpret_cast<zet_metric_group_type_exp_t *>(extendedProperties);
groupType->type = ZET_METRIC_GROUP_TYPE_EXP_FLAG_OTHER;
retVal = ZE_RESULT_SUCCESS;
} else if (extendedProperties->stype == ZET_INTEL_STRUCTURE_TYPE_METRIC_GROUP_CALCULATE_EXP_PROPERTIES) {
auto calcProperties = reinterpret_cast<zet_intel_metric_group_calculate_properties_exp_t *>(extendedProperties);
if (pBaseProperties->samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED) {
calcProperties->isTimeFilterSupported = true;
} else {
calcProperties->isTimeFilterSupported = false;
}
retVal = ZE_RESULT_SUCCESS;
}
pNext = extendedProperties->pNext;
}
return retVal;
}
void OaMetricSourceImp::metricGroupCreate(const char name[ZET_MAX_METRIC_GROUP_NAME],
const char description[ZET_MAX_METRIC_GROUP_DESCRIPTION],
zet_metric_group_sampling_type_flag_t samplingType,
zet_metric_group_handle_t *pMetricGroupHandle) {
zet_metric_group_properties_t properties{};
memcpy_s(properties.description, ZET_MAX_METRIC_GROUP_DESCRIPTION, description, ZET_MAX_METRIC_GROUP_DESCRIPTION);
memcpy_s(properties.name, ZET_MAX_METRIC_GROUP_NAME, name, ZET_MAX_METRIC_GROUP_NAME);
properties.samplingType = samplingType;
properties.domain = UINT32_MAX;
auto concurrentGrp = getMetricEnumeration().getConcurrentGroup();
MetricsDiscovery::IMetricSet_1_13 *metricSet = concurrentGrp->AddMetricSet(name, description);
auto metricGroup = OaMetricGroupUserDefined::create(properties, *metricSet, *concurrentGrp, *this);
*pMetricGroupHandle = metricGroup->toHandle();
}
ze_result_t OaMetricSourceImp::metricGroupCreateFromMetric(const char *pName, const char *pDescription,
zet_metric_group_sampling_type_flags_t samplingType, zet_metric_handle_t hMetric,
zet_metric_group_handle_t *phMetricGroup) {
zet_metric_group_handle_t hMetricGroup{};
metricGroupCreate(pName, pDescription, static_cast<zet_metric_group_sampling_type_flag_t>(samplingType), &hMetricGroup);
auto oaMetricGroupImp = static_cast<OaMetricGroupUserDefined *>(MetricGroup::fromHandle(hMetricGroup));
size_t errorStringSize = 0;
auto status = oaMetricGroupImp->addMetric(hMetric, &errorStringSize, nullptr);
if (status != ZE_RESULT_SUCCESS) {
oaMetricGroupImp->destroy();
return status;
}
*phMetricGroup = hMetricGroup;
return status;
}
ze_result_t OaMetricSourceImp::createMetricGroupsFromMetrics(std::vector<zet_metric_handle_t> &metricList,
const char metricGroupNamePrefix[ZET_INTEL_MAX_METRIC_GROUP_NAME_PREFIX_EXP],
const char description[ZET_MAX_METRIC_GROUP_DESCRIPTION],
uint32_t *maxMetricGroupCount,
std::vector<zet_metric_group_handle_t> &metricGroupList) {
if (isImplicitScalingCapable()) {
return MultiDeviceCreatedMetricGroupManager::createMultipleMetricGroupsFromMetrics<OaMultiDeviceMetricGroupUserDefined>(
metricDeviceContext, *this, metricList,
metricGroupNamePrefix, description,
maxMetricGroupCount, metricGroupList);
}
const auto isCountCalculationPath = *maxMetricGroupCount == 0;
auto cleanupCreatedGroups = [](std::vector<zet_metric_group_handle_t> &createdMetricGroupList) {
for (auto &metricGroup : createdMetricGroupList) {
zetMetricGroupDestroyExp(metricGroup);
}
createdMetricGroupList.clear();
};
if (isCountCalculationPath) {
// Metric group can be for streamer and query from a single programmable
// So multiplying by 2 to estimate the maximum metric group count
*maxMetricGroupCount = static_cast<uint32_t>(metricList.size()) * 2u;
return ZE_RESULT_SUCCESS;
}
// Arrange the metrics based on their sampling types
std::map<zet_metric_group_sampling_type_flags_t, std::vector<zet_metric_handle_t>> samplingTypeToMeticMap{};
for (auto &metric : metricList) {
auto metricImp = static_cast<OaMetricImp *>(Metric::fromHandle(metric));
auto metricFromProgrammable = static_cast<OaMetricFromProgrammable *>(metricImp);
auto samplingType = metricFromProgrammable->getSupportedSamplingType();
// Different metric groups based on sampling type
if (samplingType == METRICS_SAMPLING_TYPE_TIME_EVENT_BASED) {
samplingTypeToMeticMap[ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED].push_back(metric);
samplingTypeToMeticMap[ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED].push_back(metric);
} else {
samplingTypeToMeticMap[samplingType].push_back(metric);
}
}
metricGroupList.clear();
uint32_t numMetricGroupsCreated = 0;
auto createMetricGroupAndAddMetric = [&](zet_metric_handle_t metricHandle,
zet_metric_group_sampling_type_flags_t samplingType,
zet_metric_group_handle_t &metricGroup) {
char metricGroupName[ZET_MAX_METRIC_GROUP_NAME] = {};
snprintf(metricGroupName, ZET_MAX_METRIC_GROUP_NAME - 1, "%s%d", metricGroupNamePrefix, numMetricGroupsCreated);
auto status = metricGroupCreateFromMetric(metricGroupName, description, samplingType, metricHandle, &metricGroup);
if (status != ZE_RESULT_SUCCESS) {
return status;
}
numMetricGroupsCreated++;
return ZE_RESULT_SUCCESS;
};
bool isMaxMetricGroupCountReached = numMetricGroupsCreated >= *maxMetricGroupCount;
// Process the metrics in each sampling type seperately
for (auto &entry : samplingTypeToMeticMap) {
if (isMaxMetricGroupCountReached) {
break;
}
std::vector<zet_metric_group_handle_t> perSamplingTypeMetricGroupList{};
zet_metric_group_handle_t currentMetricGroup{};
auto samplingType = entry.first;
// Create and add the metrics to group
for (uint32_t index = 0; index < static_cast<uint32_t>(entry.second.size()); index++) {
auto &metricToAdd = entry.second[index];
bool isAddedToExistingMetricGroup = false;
for (auto &perSamplingTypeMetricGroup : perSamplingTypeMetricGroupList) {
auto oaMetricGroup = static_cast<OaMetricGroupUserDefined *>(MetricGroup::fromHandle(perSamplingTypeMetricGroup));
size_t errorStringSize = 0;
auto status = oaMetricGroup->addMetric(metricToAdd, &errorStringSize, nullptr);
if (status == ZE_RESULT_SUCCESS) {
isAddedToExistingMetricGroup = true;
break;
}
}
if (!isAddedToExistingMetricGroup) {
if (isMaxMetricGroupCountReached) {
break;
}
currentMetricGroup = nullptr;
auto status = createMetricGroupAndAddMetric(metricToAdd, samplingType, currentMetricGroup);
if (status != ZE_RESULT_SUCCESS) {
cleanupCreatedGroups(metricGroupList);
cleanupCreatedGroups(perSamplingTypeMetricGroupList);
*maxMetricGroupCount = 0;
return status;
}
perSamplingTypeMetricGroupList.push_back(currentMetricGroup);
isMaxMetricGroupCountReached = numMetricGroupsCreated >= *maxMetricGroupCount;
}
}
metricGroupList.insert(metricGroupList.end(), perSamplingTypeMetricGroupList.begin(), perSamplingTypeMetricGroupList.end());
}
// close all the metric groups
for (auto &metricGroup : metricGroupList) {
auto oaMetricGroup = static_cast<OaMetricGroupUserDefined *>(MetricGroup::fromHandle(metricGroup));
auto status = oaMetricGroup->close();
if (status != ZE_RESULT_SUCCESS) {
cleanupCreatedGroups(metricGroupList);
*maxMetricGroupCount = 0;
return status;
}
}
*maxMetricGroupCount = static_cast<uint32_t>(metricGroupList.size());
return ZE_RESULT_SUCCESS;
}
ze_result_t OaMetricSourceImp::metricProgrammableGet(uint32_t *pCount, zet_metric_programmable_exp_handle_t *phMetricProgrammables) {
return getMetricEnumeration().metricProgrammableGet(pCount, phMetricProgrammables);
}
ze_result_t OaMetricSourceImp::appendMarker(zet_command_list_handle_t hCommandList, zet_metric_group_handle_t hMetricGroup, uint32_t value) {
auto commandListImp = static_cast<CommandListImp *>(CommandList::fromHandle(hCommandList));
DeviceImp *pDeviceImp = static_cast<DeviceImp *>(commandListImp->getDevice());
if (pDeviceImp->metricContext->isImplicitScalingCapable()) {
// Use one of the sub-device contexts to append to command list.
pDeviceImp = static_cast<DeviceImp *>(pDeviceImp->subDevices[0]);
}
OaMetricSourceImp &metricSource = pDeviceImp->metricContext->getMetricSource<OaMetricSourceImp>();
auto &metricsLibrary = metricSource.getMetricsLibrary();
// Obtain gpu commands.
CommandBufferData_1_0 commandBuffer = {};
commandBuffer.CommandsType = MetricsLibraryApi::ObjectType::MarkerStreamUser;
commandBuffer.MarkerStreamUser.Value = value;
commandBuffer.Type = metricSource.isComputeUsed()
? MetricsLibraryApi::GpuCommandBufferType::Compute
: MetricsLibraryApi::GpuCommandBufferType::Render;
return metricsLibrary.getGpuCommands(*commandListImp, commandBuffer) ? ZE_RESULT_SUCCESS
: ZE_RESULT_ERROR_UNKNOWN;
}
template <>
OaMetricSourceImp &MetricDeviceContext::getMetricSource<OaMetricSourceImp>() const {
return static_cast<OaMetricSourceImp &>(*metricSources.at(MetricSource::metricSourceTypeOa));
}
} // namespace L0