compute-runtime/level_zero/tools/source/metrics/metric_ip_sampling_source.cpp

648 lines
30 KiB
C++

/*
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/source/metrics/metric_ip_sampling_source.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/tools/source/metrics/metric.h"
#include "level_zero/tools/source/metrics/metric_ip_sampling_streamer.h"
#include "level_zero/tools/source/metrics/os_interface_metric.h"
#include "level_zero/zet_intel_gpu_metric.h"
#include "level_zero/zet_intel_gpu_metric_export.h"
#include <level_zero/zet_api.h>
#include <cstring>
#include <unordered_set>
namespace L0 {
constexpr uint32_t ipSamplinDomainId = 100u;
std::unique_ptr<IpSamplingMetricSourceImp> IpSamplingMetricSourceImp::create(const MetricDeviceContext &metricDeviceContext) {
return std::unique_ptr<IpSamplingMetricSourceImp>(new (std::nothrow) IpSamplingMetricSourceImp(metricDeviceContext));
}
IpSamplingMetricSourceImp::IpSamplingMetricSourceImp(const MetricDeviceContext &metricDeviceContext) : metricDeviceContext(metricDeviceContext) {
metricIPSamplingpOsInterface = MetricIpSamplingOsInterface::create(metricDeviceContext.getDevice());
activationTracker = std::make_unique<MultiDomainDeferredActivationTracker>(metricDeviceContext.getSubDeviceIndex());
type = MetricSource::metricSourceTypeIpSampling;
}
ze_result_t IpSamplingMetricSourceImp::getTimerResolution(uint64_t &resolution) {
resolution = metricDeviceContext.getDevice().getNEODevice()->getDeviceInfo().outProfilingTimerClock;
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricSourceImp::getTimestampValidBits(uint64_t &validBits) {
validBits = metricDeviceContext.getDevice().getNEODevice()->getHardwareInfo().capabilityTable.timestampValidBits;
return ZE_RESULT_SUCCESS;
}
void IpSamplingMetricSourceImp::enable() {
isEnabled = metricIPSamplingpOsInterface->isDependencyAvailable();
}
bool IpSamplingMetricSourceImp::isAvailable() {
return isEnabled;
}
void IpSamplingMetricSourceImp::cacheMetricGroup() {
const auto deviceImp = static_cast<DeviceImp *>(&metricDeviceContext.getDevice());
if (metricDeviceContext.isImplicitScalingCapable()) {
std::vector<IpSamplingMetricGroupImp *> subDeviceMetricGroup = {};
subDeviceMetricGroup.reserve(deviceImp->subDevices.size());
// Prepare cached metric group for sub-devices
for (auto &subDevice : deviceImp->subDevices) {
IpSamplingMetricSourceImp &source = subDevice->getMetricDeviceContext().getMetricSource<IpSamplingMetricSourceImp>();
// 1 metric group available for IP Sampling
uint32_t count = 1;
zet_metric_group_handle_t hMetricGroup = {};
const auto result = source.metricGroupGet(&count, &hMetricGroup);
// Getting MetricGroup from sub-device cannot fail, since RootDevice is successful
UNRECOVERABLE_IF(result != ZE_RESULT_SUCCESS);
subDeviceMetricGroup.push_back(static_cast<IpSamplingMetricGroupImp *>(MetricGroup::fromHandle(hMetricGroup)));
}
IpSamplingMetricSourceImp &source = deviceImp->getMetricDeviceContext().getMetricSource<IpSamplingMetricSourceImp>();
cachedMetricGroup = MultiDeviceIpSamplingMetricGroupImp::create(source, subDeviceMetricGroup);
return;
}
std::vector<IpSamplingMetricImp> metrics = {};
auto &l0GfxCoreHelper = deviceImp->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
metrics.reserve(l0GfxCoreHelper.getIpSamplingMetricCount());
metricSourceCount = l0GfxCoreHelper.getIpSamplingMetricCount();
zet_metric_properties_t metricProperties = {};
metricProperties.stype = ZET_STRUCTURE_TYPE_METRIC_PROPERTIES;
metricProperties.pNext = nullptr;
strcpy_s(metricProperties.component, ZET_MAX_METRIC_COMPONENT, "XVE");
metricProperties.tierNumber = 4;
metricProperties.resultType = ZET_VALUE_TYPE_UINT64;
// Preparing properties for IP separately because of unique values
strcpy_s(metricProperties.name, ZET_MAX_METRIC_NAME, "IP");
strcpy_s(metricProperties.description, ZET_MAX_METRIC_DESCRIPTION, "IP address");
metricProperties.metricType = ZET_METRIC_TYPE_IP;
strcpy_s(metricProperties.resultUnits, ZET_MAX_METRIC_RESULT_UNITS, "Address");
metrics.push_back(IpSamplingMetricImp(*this, metricProperties));
std::vector<std::pair<const char *, const char *>> stallSamplingReportList = l0GfxCoreHelper.getStallSamplingReportMetrics();
// Preparing properties for others because of common values
metricProperties.metricType = ZET_METRIC_TYPE_EVENT;
strcpy_s(metricProperties.resultUnits, ZET_MAX_METRIC_RESULT_UNITS, "Events");
for (auto &property : stallSamplingReportList) {
strcpy_s(metricProperties.name, ZET_MAX_METRIC_NAME, property.first);
strcpy_s(metricProperties.description, ZET_MAX_METRIC_DESCRIPTION, property.second);
metrics.push_back(IpSamplingMetricImp(*this, metricProperties));
}
cachedMetricGroup = IpSamplingMetricGroupImp::create(*this, metrics);
DEBUG_BREAK_IF(cachedMetricGroup == nullptr);
}
ze_result_t IpSamplingMetricSourceImp::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) {
if (!isEnabled) {
*pCount = 0;
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
if (*pCount == 0) {
*pCount = 1;
return ZE_RESULT_SUCCESS;
}
if (cachedMetricGroup == nullptr) {
cacheMetricGroup();
}
DEBUG_BREAK_IF(phMetricGroups == nullptr);
phMetricGroups[0] = cachedMetricGroup->toHandle();
*pCount = 1;
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricSourceImp::appendMetricMemoryBarrier(CommandList &commandList) {
METRICS_LOG_ERR("%s", "Memory barrier not supported for IP Sampling");
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t IpSamplingMetricSourceImp::activateMetricGroupsPreferDeferred(uint32_t count,
zet_metric_group_handle_t *phMetricGroups) {
auto status = activationTracker->activateMetricGroupsDeferred(count, phMetricGroups);
if (!status) {
METRICS_LOG_ERR("%s", "Metric group activation failed");
return ZE_RESULT_ERROR_UNKNOWN;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricSourceImp::activateMetricGroupsAlreadyDeferred() {
return activationTracker->activateMetricGroupsAlreadyDeferred();
}
bool IpSamplingMetricSourceImp::isMetricGroupActivated(const zet_metric_group_handle_t hMetricGroup) const {
return activationTracker->isMetricGroupActivated(hMetricGroup);
}
void IpSamplingMetricSourceImp::setMetricOsInterface(std::unique_ptr<MetricIpSamplingOsInterface> &metricIPSamplingpOsInterface) {
this->metricIPSamplingpOsInterface = std::move(metricIPSamplingpOsInterface);
}
ze_result_t IpSamplingMetricGroupBase::getExportData(const uint8_t *pRawData, size_t rawDataSize, size_t *pExportDataSize,
uint8_t *pExportData) {
const auto expectedExportDataSize = sizeof(zet_intel_metric_df_gpu_export_data_format_t) + rawDataSize;
if (*pExportDataSize == 0u) {
*pExportDataSize = expectedExportDataSize;
return ZE_RESULT_SUCCESS;
}
if (*pExportDataSize < expectedExportDataSize) {
METRICS_LOG_ERR("Incorrect Size Passed. Returning 0x%x", ZE_RESULT_ERROR_INVALID_SIZE);
return ZE_RESULT_ERROR_INVALID_SIZE;
}
zet_intel_metric_df_gpu_export_data_format_t *exportData = reinterpret_cast<zet_intel_metric_df_gpu_export_data_format_t *>(pExportData);
exportData->header.type = ZET_INTEL_METRIC_DF_SOURCE_TYPE_IPSAMPLING;
exportData->header.version.major = ZET_INTEL_GPU_METRIC_EXPORT_VERSION_MAJOR;
exportData->header.version.minor = ZET_INTEL_GPU_METRIC_EXPORT_VERSION_MINOR;
exportData->header.rawDataOffset = sizeof(zet_intel_metric_df_gpu_export_data_format_t);
exportData->header.rawDataSize = rawDataSize;
// Append the rawData
memcpy_s(reinterpret_cast<void *>(pExportData + exportData->header.rawDataOffset), rawDataSize, pRawData, rawDataSize);
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricSourceImp::getConcurrentMetricGroups(std::vector<zet_metric_group_handle_t> &hMetricGroups,
uint32_t *pConcurrentGroupCount,
uint32_t *pCountPerConcurrentGroup) {
if (*pConcurrentGroupCount == 0) {
*pConcurrentGroupCount = static_cast<uint32_t>(hMetricGroups.size());
return ZE_RESULT_SUCCESS;
}
*pConcurrentGroupCount = std::min(*pConcurrentGroupCount, static_cast<uint32_t>(hMetricGroups.size()));
// Each metric group is in unique container
for (uint32_t index = 0; index < *pConcurrentGroupCount; index++) {
pCountPerConcurrentGroup[index] = 1;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricSourceImp::handleMetricGroupExtendedProperties(zet_metric_group_handle_t hMetricGroup,
zet_metric_group_properties_t *pBaseProperties,
void *pNext) {
ze_result_t retVal = ZE_RESULT_ERROR_INVALID_ARGUMENT;
while (pNext) {
auto extendedProperties = reinterpret_cast<zet_base_properties_t *>(pNext);
if (static_cast<uint32_t>(extendedProperties->stype) == ZET_INTEL_STRUCTURE_TYPE_METRIC_SOURCE_ID_EXP) {
getMetricGroupSourceIdProperty(extendedProperties);
retVal = ZE_RESULT_SUCCESS;
} else if (extendedProperties->stype == ZET_STRUCTURE_TYPE_METRIC_GLOBAL_TIMESTAMPS_RESOLUTION_EXP) {
zet_metric_global_timestamps_resolution_exp_t *metricsTimestampProperties =
reinterpret_cast<zet_metric_global_timestamps_resolution_exp_t *>(extendedProperties);
getTimerResolution(metricsTimestampProperties->timerResolution);
getTimestampValidBits(metricsTimestampProperties->timestampValidBits);
retVal = ZE_RESULT_SUCCESS;
} else if (extendedProperties->stype == ZET_STRUCTURE_TYPE_METRIC_GROUP_TYPE_EXP) {
zet_metric_group_type_exp_t *groupType = reinterpret_cast<zet_metric_group_type_exp_t *>(extendedProperties);
groupType->type = ZET_METRIC_GROUP_TYPE_EXP_FLAG_OTHER;
retVal = ZE_RESULT_SUCCESS;
} else if (static_cast<uint32_t>(extendedProperties->stype) == ZET_INTEL_STRUCTURE_TYPE_METRIC_GROUP_CALCULATE_EXP_PROPERTIES) {
auto calcProperties = reinterpret_cast<zet_intel_metric_group_calculate_properties_exp_t *>(extendedProperties);
calcProperties->isTimeFilterSupported = false;
retVal = ZE_RESULT_SUCCESS;
}
pNext = extendedProperties->pNext;
}
return retVal;
}
ze_result_t IpSamplingMetricSourceImp::calcOperationCreate(MetricDeviceContext &metricDeviceContext,
zet_intel_metric_calculate_exp_desc_t *pCalculateDesc,
uint32_t *pCount,
zet_metric_handle_t *phExcludedMetrics,
zet_intel_metric_calculate_operation_exp_handle_t *phCalculateOperation) {
ze_result_t status = ZE_RESULT_ERROR_UNKNOWN;
// All metrics in Ip sampling allow calculation
*pCount = 0;
bool isMultiDevice = (metricDeviceContext.isImplicitScalingCapable()) ? true : false;
status = IpSamplingMetricCalcOpImp::create(*this, pCalculateDesc, isMultiDevice, phCalculateOperation);
return status;
}
IpSamplingMetricGroupImp::IpSamplingMetricGroupImp(IpSamplingMetricSourceImp &metricSource,
std::vector<IpSamplingMetricImp> &metrics) : IpSamplingMetricGroupBase(metricSource) {
this->metrics.reserve(metrics.size());
for (const auto &metric : metrics) {
this->metrics.push_back(std::make_unique<IpSamplingMetricImp>(metric));
}
properties.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES;
properties.pNext = nullptr;
strcpy_s(properties.name, ZET_MAX_METRIC_GROUP_NAME, "EuStallSampling");
strcpy_s(properties.description, ZET_MAX_METRIC_GROUP_DESCRIPTION, "EU stall sampling");
properties.samplingType = ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED;
properties.domain = ipSamplinDomainId;
properties.metricCount = this->getMetricSource().metricSourceCount;
}
ze_result_t IpSamplingMetricGroupImp::getProperties(zet_metric_group_properties_t *pProperties) {
void *pNext = pProperties->pNext;
*pProperties = properties;
pProperties->pNext = pNext;
if (pNext) {
return metricSource.handleMetricGroupExtendedProperties(toHandle(), pProperties, pNext);
}
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) {
if (*pCount == 0) {
*pCount = static_cast<uint32_t>(metrics.size());
return ZE_RESULT_SUCCESS;
}
// User is expected to allocate space.
DEBUG_BREAK_IF(phMetrics == nullptr);
*pCount = std::min(*pCount, static_cast<uint32_t>(metrics.size()));
for (uint32_t i = 0; i < *pCount; i++) {
phMetrics[i] = metrics[i]->toHandle();
}
return ZE_RESULT_SUCCESS;
}
bool IpSamplingMetricGroupBase::isMultiDeviceCaptureData(const size_t rawDataSize, const uint8_t *pRawData) {
if (rawDataSize >= sizeof(IpSamplingMetricDataHeader)) {
const auto header = reinterpret_cast<const IpSamplingMetricDataHeader *>(pRawData);
return header->magic == IpSamplingMetricDataHeader::magicValue;
}
return false;
}
ze_result_t IpSamplingMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
const uint8_t *pRawData, uint32_t *pMetricValueCount,
zet_typed_value_t *pMetricValues) {
const bool calculateCountOnly = *pMetricValueCount == 0;
if (isMultiDeviceCaptureData(rawDataSize, pRawData)) {
METRICS_LOG_ERR("%s", "The call is not supported for multiple devices");
METRICS_LOG_ERR("%s", "Please use zetMetricGroupCalculateMultipleMetricValuesExp instead");
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (calculateCountOnly) {
return getCalculatedMetricCount(pRawData, rawDataSize, *pMetricValueCount);
} else {
return getCalculatedMetricValues(type, rawDataSize, pRawData, *pMetricValueCount, pMetricValues);
}
}
ze_result_t IpSamplingMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
const uint8_t *pRawData, uint32_t *pSetCount,
uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts,
zet_typed_value_t *pMetricValues) {
ze_result_t result = ZE_RESULT_SUCCESS;
const bool calculateCountOnly = (*pTotalMetricValueCount == 0) || (*pSetCount == 0);
if (calculateCountOnly) {
*pTotalMetricValueCount = 0;
*pSetCount = 0;
}
if (!isMultiDeviceCaptureData(rawDataSize, pRawData)) {
result = this->calculateMetricValues(type, rawDataSize, pRawData, pTotalMetricValueCount, pMetricValues);
} else {
if (calculateCountOnly) {
result = getCalculatedMetricCount(pRawData, rawDataSize, *pTotalMetricValueCount, 0);
} else {
result = getCalculatedMetricValues(type, rawDataSize, pRawData, *pTotalMetricValueCount, pMetricValues, 0);
}
}
if ((result == ZE_RESULT_SUCCESS) || (result == ZE_RESULT_WARNING_DROPPED_DATA)) {
*pSetCount = 1;
if (!calculateCountOnly) {
pMetricCounts[0] = *pTotalMetricValueCount;
}
} else {
if (!calculateCountOnly) {
pMetricCounts[0] = 0;
}
}
return result;
}
ze_result_t getDeviceTimestamps(DeviceImp *deviceImp, const ze_bool_t synchronizedWithHost,
uint64_t *globalTimestamp, uint64_t *metricTimestamp) {
ze_result_t result;
uint64_t hostTimestamp;
uint64_t deviceTimestamp;
result = deviceImp->getGlobalTimestamps(&hostTimestamp, &deviceTimestamp);
if (result != ZE_RESULT_SUCCESS) {
*globalTimestamp = 0;
*metricTimestamp = 0;
} else {
if (synchronizedWithHost) {
*globalTimestamp = hostTimestamp;
} else {
*globalTimestamp = deviceTimestamp;
}
*metricTimestamp = deviceTimestamp;
result = ZE_RESULT_SUCCESS;
}
return result;
}
ze_result_t IpSamplingMetricGroupImp::getMetricTimestampsExp(const ze_bool_t synchronizedWithHost,
uint64_t *globalTimestamp,
uint64_t *metricTimestamp) {
DeviceImp *deviceImp = static_cast<DeviceImp *>(&getMetricSource().getMetricDeviceContext().getDevice());
return getDeviceTimestamps(deviceImp, synchronizedWithHost, globalTimestamp, metricTimestamp);
}
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const uint8_t *pRawData, const size_t rawDataSize,
uint32_t &metricValueCount) {
std::unordered_set<uint64_t> stallReportIpCount{};
constexpr uint32_t rawReportSize = IpSamplingMetricGroupBase::rawReportSize;
if ((rawDataSize % rawReportSize) != 0) {
METRICS_LOG_ERR("%s", "Invalid input raw data size");
metricValueCount = 0;
return ZE_RESULT_ERROR_INVALID_SIZE;
}
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) {
uint64_t ip = 0ULL;
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), pRawIpData, sizeof(ip));
ip &= 0x1fffffff;
stallReportIpCount.insert(ip);
}
metricValueCount = static_cast<uint32_t>(stallReportIpCount.size()) * properties.metricCount;
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const uint8_t *pMultiMetricData, const size_t rawDataSize, uint32_t &metricValueCount, const uint32_t setIndex) {
// Iterate through headers and assign required sizes
auto processedSize = 0u;
while (processedSize < rawDataSize) {
auto processMetricData = pMultiMetricData + processedSize;
if (!isMultiDeviceCaptureData(rawDataSize - processedSize, processMetricData)) {
return ZE_RESULT_ERROR_INVALID_SIZE;
}
auto header = reinterpret_cast<const IpSamplingMetricDataHeader *>(processMetricData);
processedSize += sizeof(IpSamplingMetricDataHeader) + header->rawDataSize;
if (header->setIndex != setIndex) {
continue;
}
auto currTotalMetricValueCount = 0u;
auto result = this->getCalculatedMetricCount((processMetricData + sizeof(IpSamplingMetricDataHeader)), header->rawDataSize, currTotalMetricValueCount);
if (result != ZE_RESULT_SUCCESS) {
metricValueCount = 0;
return result;
}
metricValueCount += currTotalMetricValueCount;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pMultiMetricData,
uint32_t &metricValueCount,
zet_typed_value_t *pCalculatedData, const uint32_t setIndex) {
auto processedSize = 0u;
auto isDataDropped = false;
auto requestTotalMetricValueCount = metricValueCount;
while (processedSize < rawDataSize && requestTotalMetricValueCount > 0) {
auto processMetricData = pMultiMetricData + processedSize;
if (!isMultiDeviceCaptureData(rawDataSize - processedSize, processMetricData)) {
return ZE_RESULT_ERROR_INVALID_SIZE;
}
auto header = reinterpret_cast<const IpSamplingMetricDataHeader *>(processMetricData);
processedSize += header->rawDataSize + sizeof(IpSamplingMetricDataHeader);
if (header->setIndex != setIndex) {
continue;
}
auto processMetricRawData = processMetricData + sizeof(IpSamplingMetricDataHeader);
auto currTotalMetricValueCount = requestTotalMetricValueCount;
auto result = this->calculateMetricValues(type, header->rawDataSize, processMetricRawData, &currTotalMetricValueCount, pCalculatedData);
if (result != ZE_RESULT_SUCCESS) {
if (result == ZE_RESULT_WARNING_DROPPED_DATA) {
isDataDropped = true;
} else {
metricValueCount = 0;
return result;
}
}
pCalculatedData += currTotalMetricValueCount;
requestTotalMetricValueCount -= currTotalMetricValueCount;
}
metricValueCount -= requestTotalMetricValueCount;
return isDataDropped ? ZE_RESULT_WARNING_DROPPED_DATA : ZE_RESULT_SUCCESS;
}
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pRawData,
uint32_t &metricValueCount,
zet_typed_value_t *pCalculatedData) {
bool dataOverflow = false;
std::map<uint64_t, void *> stallReportDataMap;
// MAX_METRIC_VALUES is not supported yet.
if (type != ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES) {
METRICS_LOG_ERR("%s", "IP sampling only supports ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES");
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
DEBUG_BREAK_IF(pCalculatedData == nullptr);
const uint32_t rawReportSize = IpSamplingMetricGroupBase::rawReportSize;
if ((rawDataSize % rawReportSize) != 0) {
METRICS_LOG_ERR("%s", "Invalid input raw data size");
metricValueCount = 0;
return ZE_RESULT_ERROR_INVALID_SIZE;
}
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
DeviceImp *deviceImp = static_cast<DeviceImp *>(&this->getMetricSource().getMetricDeviceContext().getDevice());
auto &l0GfxCoreHelper = deviceImp->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) {
dataOverflow |= l0GfxCoreHelper.stallIpDataMapUpdate(stallReportDataMap, pRawIpData);
}
metricValueCount = std::min<uint32_t>(metricValueCount, static_cast<uint32_t>(stallReportDataMap.size()) * properties.metricCount);
std::vector<zet_typed_value_t> ipDataValues;
uint32_t i = 0;
for (auto it = stallReportDataMap.begin(); it != stallReportDataMap.end(); ++it) {
l0GfxCoreHelper.stallSumIpDataToTypedValues(it->first, it->second, ipDataValues);
for (auto jt = ipDataValues.begin(); (jt != ipDataValues.end()) && (i < metricValueCount); jt++, i++) {
*(pCalculatedData + i) = *jt;
}
ipDataValues.clear();
}
l0GfxCoreHelper.stallIpDataMapDelete(stallReportDataMap);
stallReportDataMap.clear();
return dataOverflow ? ZE_RESULT_WARNING_DROPPED_DATA : ZE_RESULT_SUCCESS;
}
zet_metric_group_handle_t IpSamplingMetricGroupImp::getMetricGroupForSubDevice(const uint32_t subDeviceIndex) {
return toHandle();
}
std::unique_ptr<IpSamplingMetricGroupImp> IpSamplingMetricGroupImp::create(IpSamplingMetricSourceImp &metricSource,
std::vector<IpSamplingMetricImp> &ipSamplingMetrics) {
return std::unique_ptr<IpSamplingMetricGroupImp>(new (std::nothrow) IpSamplingMetricGroupImp(metricSource, ipSamplingMetrics));
}
ze_result_t MultiDeviceIpSamplingMetricGroupImp::getProperties(zet_metric_group_properties_t *pProperties) {
return subDeviceMetricGroup[0]->getProperties(pProperties);
}
ze_result_t MultiDeviceIpSamplingMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_handle_t *phMetrics) {
return subDeviceMetricGroup[0]->metricGet(pCount, phMetrics);
}
ze_result_t MultiDeviceIpSamplingMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
const uint8_t *pRawData, uint32_t *pMetricValueCount,
zet_typed_value_t *pMetricValues) {
return subDeviceMetricGroup[0]->calculateMetricValues(type, rawDataSize, pRawData, pMetricValueCount, pMetricValues);
}
ze_result_t MultiDeviceIpSamplingMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
const uint8_t *pRawData, uint32_t *pSetCount,
uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts,
zet_typed_value_t *pMetricValues) {
const bool calculateCountOnly = *pSetCount == 0 || *pTotalMetricValueCount == 0;
bool isDroppedData = false;
ze_result_t result = ZE_RESULT_SUCCESS;
if (calculateCountOnly) {
*pSetCount = 0;
*pTotalMetricValueCount = 0;
for (uint32_t setIndex = 0; setIndex < subDeviceMetricGroup.size(); setIndex++) {
uint32_t currTotalMetricValueCount = 0;
result = subDeviceMetricGroup[setIndex]->getCalculatedMetricCount(pRawData, rawDataSize, currTotalMetricValueCount, setIndex);
if (result != ZE_RESULT_SUCCESS) {
return result;
}
*pTotalMetricValueCount += currTotalMetricValueCount;
}
*pSetCount = static_cast<uint32_t>(subDeviceMetricGroup.size());
} else {
memset(pMetricCounts, 0, *pSetCount);
const auto maxSets = std::min<uint32_t>(static_cast<uint32_t>(subDeviceMetricGroup.size()), *pSetCount);
auto tempTotalMetricValueCount = *pTotalMetricValueCount;
for (uint32_t setIndex = 0; setIndex < maxSets; setIndex++) {
uint32_t currTotalMetricValueCount = tempTotalMetricValueCount;
result = subDeviceMetricGroup[setIndex]->getCalculatedMetricValues(type, rawDataSize, pRawData, currTotalMetricValueCount, pMetricValues, setIndex);
if (result != ZE_RESULT_SUCCESS) {
if (result == ZE_RESULT_WARNING_DROPPED_DATA) {
isDroppedData = true;
} else {
memset(pMetricCounts, 0, *pSetCount);
return result;
}
}
pMetricCounts[setIndex] = currTotalMetricValueCount;
pMetricValues += currTotalMetricValueCount;
tempTotalMetricValueCount -= currTotalMetricValueCount;
}
*pTotalMetricValueCount -= tempTotalMetricValueCount;
}
return isDroppedData ? ZE_RESULT_WARNING_DROPPED_DATA : ZE_RESULT_SUCCESS;
}
zet_metric_group_handle_t MultiDeviceIpSamplingMetricGroupImp::getMetricGroupForSubDevice(const uint32_t subDeviceIndex) {
return subDeviceMetricGroup[subDeviceIndex]->toHandle();
}
void MultiDeviceIpSamplingMetricGroupImp::closeSubDeviceStreamers(std::vector<IpSamplingMetricStreamerImp *> &subDeviceStreamers) {
for (auto streamer : subDeviceStreamers) {
streamer->close();
}
}
ze_result_t MultiDeviceIpSamplingMetricGroupImp::getMetricTimestampsExp(const ze_bool_t synchronizedWithHost,
uint64_t *globalTimestamp,
uint64_t *metricTimestamp) {
DeviceImp *deviceImp = static_cast<DeviceImp *>(&subDeviceMetricGroup[0]->getMetricSource().getMetricDeviceContext().getDevice());
return getDeviceTimestamps(deviceImp, synchronizedWithHost, globalTimestamp, metricTimestamp);
}
std::unique_ptr<MultiDeviceIpSamplingMetricGroupImp> MultiDeviceIpSamplingMetricGroupImp::create(
MetricSource &metricSource,
std::vector<IpSamplingMetricGroupImp *> &subDeviceMetricGroup) {
UNRECOVERABLE_IF(subDeviceMetricGroup.size() == 0);
return std::unique_ptr<MultiDeviceIpSamplingMetricGroupImp>(new (std::nothrow) MultiDeviceIpSamplingMetricGroupImp(metricSource, subDeviceMetricGroup));
}
IpSamplingMetricImp::IpSamplingMetricImp(MetricSource &metricSource, zet_metric_properties_t &properties) : MetricImp(metricSource), properties(properties) {
}
ze_result_t IpSamplingMetricImp::getProperties(zet_metric_properties_t *pProperties) {
*pProperties = properties;
return ZE_RESULT_SUCCESS;
}
template <>
IpSamplingMetricSourceImp &MetricDeviceContext::getMetricSource<IpSamplingMetricSourceImp>() const {
return static_cast<IpSamplingMetricSourceImp &>(*metricSources.at(MetricSource::metricSourceTypeIpSampling));
}
} // namespace L0