956 lines
36 KiB
C++
956 lines
36 KiB
C++
/*
|
|
* Copyright (C) 2020-2024 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "level_zero/tools/source/metrics/metric_oa_query_imp.h"
|
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
|
#include "shared/source/device/device.h"
|
|
#include "shared/source/execution_environment/execution_environment.h"
|
|
#include "shared/source/helpers/engine_node_helper.h"
|
|
#include "shared/source/helpers/gfx_core_helper.h"
|
|
#include "shared/source/memory_manager/allocation_properties.h"
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
#include "shared/source/os_interface/os_context.h"
|
|
#include "shared/source/os_interface/os_library.h"
|
|
|
|
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
|
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
|
|
#include "level_zero/core/source/device/device.h"
|
|
#include "level_zero/core/source/device/device_imp.h"
|
|
#include "level_zero/core/source/driver/driver_handle.h"
|
|
#include "level_zero/tools/source/metrics/metric_oa_enumeration_imp.h"
|
|
#include "level_zero/tools/source/metrics/metric_oa_source.h"
|
|
|
|
using namespace MetricsLibraryApi;
|
|
|
|
namespace L0 {
|
|
|
|
MetricsLibrary::MetricsLibrary(OaMetricSourceImp &metricSourceInput)
|
|
: metricSource(metricSourceInput) {}
|
|
|
|
MetricsLibrary::~MetricsLibrary() {
|
|
release();
|
|
}
|
|
|
|
ze_result_t MetricsLibrary::getInitializationState() {
|
|
return initializationState;
|
|
}
|
|
|
|
bool MetricsLibrary::isInitialized() {
|
|
// Try to initialize metrics library only once.
|
|
if (initializationState == ZE_RESULT_ERROR_UNINITIALIZED) {
|
|
initialize();
|
|
}
|
|
|
|
return initializationState == ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
uint32_t MetricsLibrary::getQueryReportGpuSize() {
|
|
|
|
TypedValue_1_0 gpuReportSize = {};
|
|
|
|
// Obtain gpu report size.
|
|
if (!isInitialized() ||
|
|
api.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &gpuReportSize.Type, &gpuReportSize) != StatusCode::Success) {
|
|
|
|
DEBUG_BREAK_IF(true);
|
|
return 0;
|
|
}
|
|
|
|
// Validate gpu report size.
|
|
if (!gpuReportSize.ValueUInt32) {
|
|
DEBUG_BREAK_IF(true);
|
|
return 0;
|
|
}
|
|
|
|
return gpuReportSize.ValueUInt32;
|
|
}
|
|
|
|
bool MetricsLibrary::createMetricQuery(const uint32_t slotsCount, QueryHandle_1_0 &query,
|
|
NEO::GraphicsAllocation *&pAllocation) {
|
|
|
|
std::lock_guard<std::mutex> lock(mutex);
|
|
|
|
// Validate metrics library state.
|
|
if (!isInitialized()) {
|
|
DEBUG_BREAK_IF(true);
|
|
return false;
|
|
}
|
|
|
|
QueryCreateData_1_0 queryData = {};
|
|
queryData.HandleContext = context;
|
|
queryData.Type = ObjectType::QueryHwCounters;
|
|
queryData.Slots = slotsCount;
|
|
|
|
// Create query pool within metrics library.
|
|
if (api.QueryCreate(&queryData, &query) != StatusCode::Success) {
|
|
DEBUG_BREAK_IF(true);
|
|
return false;
|
|
}
|
|
|
|
// Register created query.
|
|
queries.push_back(query);
|
|
|
|
return true;
|
|
}
|
|
|
|
uint32_t MetricsLibrary::getMetricQueryCount() {
|
|
std::lock_guard<std::mutex> lock(mutex);
|
|
return static_cast<uint32_t>(queries.size());
|
|
}
|
|
|
|
bool MetricsLibrary::destroyMetricQuery(QueryHandle_1_0 &query) {
|
|
std::lock_guard<std::mutex> lock(mutex);
|
|
DEBUG_BREAK_IF(!query.IsValid());
|
|
|
|
const bool result = isInitialized() && (api.QueryDelete(query) == StatusCode::Success);
|
|
auto iter = std::find_if(queries.begin(), queries.end(), [&](const QueryHandle_1_0 &element) { return element.data == query.data; });
|
|
|
|
// Unregister query.
|
|
if (iter != queries.end()) {
|
|
queries.erase(iter);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool MetricsLibrary::getMetricQueryReportSize(size_t &rawDataSize) {
|
|
ValueType valueType = ValueType::Last;
|
|
TypedValue_1_0 value = {};
|
|
|
|
const bool result = isInitialized() && (api.GetParameter(ParameterType::QueryHwCountersReportApiSize, &valueType, &value) == StatusCode::Success);
|
|
rawDataSize = static_cast<size_t>(value.ValueUInt32);
|
|
DEBUG_BREAK_IF(!result);
|
|
return result;
|
|
}
|
|
|
|
bool MetricsLibrary::getMetricQueryReport(QueryHandle_1_0 &query, const uint32_t slot,
|
|
const size_t rawDataSize, uint8_t *pData) {
|
|
|
|
GetReportData_1_0 report = {};
|
|
report.Type = ObjectType::QueryHwCounters;
|
|
report.Query.Handle = query;
|
|
report.Query.Slot = slot;
|
|
report.Query.SlotsCount = 1;
|
|
report.Query.Data = pData;
|
|
report.Query.DataSize = static_cast<uint32_t>(rawDataSize);
|
|
|
|
const bool result = isInitialized() && (api.GetData(&report) == StatusCode::Success);
|
|
DEBUG_BREAK_IF(!result);
|
|
return result;
|
|
}
|
|
|
|
void MetricsLibrary::initialize() {
|
|
auto &metricsEnumeration = metricSource.getMetricEnumeration();
|
|
|
|
// Function should be called only once.
|
|
DEBUG_BREAK_IF(initializationState != ZE_RESULT_ERROR_UNINITIALIZED);
|
|
|
|
// Metrics Enumeration needs to be initialized before Metrics Library
|
|
const bool validMetricsEnumeration = metricsEnumeration.isInitialized();
|
|
const bool validMetricsLibrary = validMetricsEnumeration && handle && createContext();
|
|
|
|
// Load metrics library and exported functions.
|
|
initializationState = validMetricsLibrary ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
|
|
DEBUG_BREAK_IF(initializationState != ZE_RESULT_SUCCESS);
|
|
}
|
|
|
|
void MetricsLibrary::release() {
|
|
|
|
// Delete metric group configurations.
|
|
deleteAllConfigurations();
|
|
|
|
// Destroy context.
|
|
if (context.IsValid() && contextDeleteFunction) {
|
|
contextDeleteFunction(context);
|
|
}
|
|
|
|
// Reset metric query state to not initialized.
|
|
api = {};
|
|
callbacks = {};
|
|
context = {};
|
|
initializationState = ZE_RESULT_ERROR_UNINITIALIZED;
|
|
}
|
|
|
|
bool MetricsLibrary::load() {
|
|
// Load library.
|
|
handle = NEO::OsLibrary::loadFunc({getFilename()});
|
|
|
|
// Load exported functions.
|
|
if (handle) {
|
|
contextCreateFunction = reinterpret_cast<ContextCreateFunction_1_0>(
|
|
handle->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0));
|
|
contextDeleteFunction = reinterpret_cast<ContextDeleteFunction_1_0>(
|
|
handle->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0));
|
|
}
|
|
|
|
if (contextCreateFunction == nullptr || contextDeleteFunction == nullptr) {
|
|
METRICS_LOG_ERR("cannot load %s exported functions", MetricsLibrary::getFilename());
|
|
return false;
|
|
}
|
|
|
|
// Return success if exported functions have been loaded.
|
|
return true;
|
|
}
|
|
|
|
void MetricsLibrary::enableWorkloadPartition() {
|
|
isWorkloadPartitionEnabled = true;
|
|
}
|
|
|
|
void MetricsLibrary::getSubDeviceClientOptions(
|
|
ClientOptionsData_1_0 &subDevice,
|
|
ClientOptionsData_1_0 &subDeviceIndex,
|
|
ClientOptionsData_1_0 &subDeviceCount,
|
|
ClientOptionsData_1_0 &workloadPartition) {
|
|
|
|
auto &deviceImp = *static_cast<DeviceImp *>(&metricSource.getDevice());
|
|
|
|
std::tuple<uint32_t, uint32_t, uint32_t> subDeviceMap;
|
|
uint32_t hwSubDeviceIndex = 0u;
|
|
uint32_t hwSubDevicesCount = 0u;
|
|
bool requiresSubDeviceHierarchy = false;
|
|
bool isSubDevice = deviceImp.isSubdevice;
|
|
if (deviceImp.getNEODevice()->getExecutionEnvironment()->getSubDeviceHierarchy(deviceImp.getNEODevice()->getRootDeviceIndex(), &subDeviceMap)) {
|
|
hwSubDeviceIndex = std::get<1>(subDeviceMap);
|
|
hwSubDevicesCount = std::get<2>(subDeviceMap);
|
|
requiresSubDeviceHierarchy = true;
|
|
isSubDevice = true;
|
|
}
|
|
|
|
if (!isSubDevice) {
|
|
|
|
// Root device.
|
|
subDevice.Type = ClientOptionsType::SubDevice;
|
|
subDevice.SubDevice.Enabled = false;
|
|
|
|
subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
|
|
subDeviceIndex.SubDeviceIndex.Index = static_cast<uint8_t>(deviceImp.getPhysicalSubDeviceId());
|
|
|
|
subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
|
|
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
|
|
|
|
workloadPartition.Type = ClientOptionsType::WorkloadPartition;
|
|
workloadPartition.WorkloadPartition.Enabled = false;
|
|
|
|
} else {
|
|
|
|
// Sub device.
|
|
subDevice.Type = ClientOptionsType::SubDevice;
|
|
subDevice.SubDevice.Enabled = true;
|
|
|
|
subDeviceIndex.Type = ClientOptionsType::SubDeviceIndex;
|
|
if (requiresSubDeviceHierarchy) {
|
|
subDeviceIndex.SubDeviceIndex.Index = hwSubDeviceIndex;
|
|
} else {
|
|
subDeviceIndex.SubDeviceIndex.Index = static_cast<uint8_t>(deviceImp.getPhysicalSubDeviceId());
|
|
}
|
|
|
|
subDeviceCount.Type = ClientOptionsType::SubDeviceCount;
|
|
if (requiresSubDeviceHierarchy) {
|
|
subDeviceCount.SubDeviceCount.Count = hwSubDevicesCount;
|
|
} else {
|
|
subDeviceCount.SubDeviceCount.Count = std::max(deviceImp.getNEODevice()->getRootDevice()->getNumSubDevices(), 1u);
|
|
}
|
|
|
|
workloadPartition.Type = ClientOptionsType::WorkloadPartition;
|
|
workloadPartition.WorkloadPartition.Enabled = isWorkloadPartitionEnabled;
|
|
}
|
|
}
|
|
|
|
bool MetricsLibrary::createContext() {
|
|
auto &device = metricSource.getDevice();
|
|
const auto &gfxCoreHelper = device.getGfxCoreHelper();
|
|
const auto &asyncComputeEngines = gfxCoreHelper.getGpgpuEngineInstances(device.getNEODevice()->getRootDeviceEnvironment());
|
|
ContextCreateData_1_0 createData = {};
|
|
ClientOptionsData_1_0 clientOptions[6] = {};
|
|
ClientData_1_0 clientData = {};
|
|
ClientType_1_0 clientType = {};
|
|
ClientDataLinuxAdapter_1_0 adapter = {};
|
|
|
|
// Check if compute command streamer is used.
|
|
auto asyncComputeEngine = std::find_if(asyncComputeEngines.begin(), asyncComputeEngines.end(), [&](const auto &engine) {
|
|
return engine.first == aub_stream::ENGINE_CCS;
|
|
});
|
|
|
|
const auto &deviceImp = *static_cast<DeviceImp *>(&device);
|
|
const auto &commandStreamReceiver = *deviceImp.getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
|
const auto engineType = commandStreamReceiver.getOsContext().getEngineType();
|
|
const bool isComputeUsed = NEO::EngineHelpers::isCcs(engineType);
|
|
|
|
metricSource.setUseCompute(isComputeUsed);
|
|
|
|
// Create metrics library context.
|
|
DEBUG_BREAK_IF(!contextCreateFunction);
|
|
clientType.Api = ClientApi::OneApi;
|
|
clientType.Gen = getGenType(device.getGfxCoreHelper());
|
|
|
|
clientOptions[0].Type = ClientOptionsType::Compute;
|
|
clientOptions[0].Compute.Asynchronous = asyncComputeEngine != asyncComputeEngines.end();
|
|
|
|
clientOptions[1].Type = ClientOptionsType::Tbs;
|
|
clientOptions[1].Tbs.Enabled = metricSource.getMetricStreamer() != nullptr;
|
|
|
|
// Sub device client options #2
|
|
getSubDeviceClientOptions(clientOptions[2], clientOptions[3], clientOptions[4], clientOptions[5]);
|
|
|
|
clientData.Linux.Adapter = &adapter;
|
|
clientData.ClientOptions = clientOptions;
|
|
clientData.ClientOptionsCount = sizeof(clientOptions) / sizeof(ClientOptionsData_1_0);
|
|
|
|
createData.Api = &api;
|
|
createData.ClientCallbacks = &callbacks;
|
|
createData.ClientData = &clientData;
|
|
|
|
const bool result =
|
|
getContextData(device, createData) &&
|
|
contextCreateFunction(clientType, &createData, &context) == StatusCode::Success;
|
|
|
|
DEBUG_BREAK_IF(!result);
|
|
return result;
|
|
}
|
|
|
|
ClientGen MetricsLibrary::getGenType(const NEO::GfxCoreHelper &gfxCoreHelper) const {
|
|
return static_cast<MetricsLibraryApi::ClientGen>(gfxCoreHelper.getMetricsLibraryGenId());
|
|
}
|
|
|
|
uint32_t MetricsLibrary::getGpuCommandsSize(CommandBufferData_1_0 &commandBuffer) {
|
|
CommandBufferSize_1_0 commandBufferSize = {};
|
|
|
|
bool result = isInitialized();
|
|
|
|
// Validate metrics library initialization state.
|
|
if (result) {
|
|
commandBuffer.HandleContext = context;
|
|
result = api.CommandBufferGetSize(&commandBuffer, &commandBufferSize) == StatusCode::Success;
|
|
}
|
|
|
|
DEBUG_BREAK_IF(!result);
|
|
return result ? commandBufferSize.GpuMemorySize : 0;
|
|
}
|
|
|
|
bool MetricsLibrary::getGpuCommands(CommandBufferData_1_0 &commandBuffer) {
|
|
|
|
// Obtain gpu commands from metrics library.
|
|
const bool result =
|
|
isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
|
|
DEBUG_BREAK_IF(!result);
|
|
return result;
|
|
}
|
|
|
|
bool MetricsLibrary::getGpuCommands(CommandList &commandList,
|
|
CommandBufferData_1_0 &commandBuffer) {
|
|
|
|
// Obtain required command buffer size.
|
|
commandBuffer.Size = getGpuCommandsSize(commandBuffer);
|
|
|
|
// Validate gpu commands size.
|
|
if (!commandBuffer.Size) {
|
|
DEBUG_BREAK_IF(true);
|
|
return false;
|
|
}
|
|
|
|
// Allocate command buffer.
|
|
auto stream = commandList.getCmdContainer().getCommandStream();
|
|
auto buffer = stream->getSpace(commandBuffer.Size);
|
|
|
|
// Fill attached command buffer with gpu commands.
|
|
commandBuffer.Data = buffer;
|
|
|
|
// Obtain gpu commands from metrics library.
|
|
const bool result =
|
|
isInitialized() && (api.CommandBufferGet(&commandBuffer) == StatusCode::Success);
|
|
DEBUG_BREAK_IF(!result);
|
|
return result;
|
|
}
|
|
|
|
ConfigurationHandle_1_0
|
|
MetricsLibrary::createConfiguration(const zet_metric_group_handle_t metricGroupHandle,
|
|
const zet_metric_group_properties_t &properties) {
|
|
// Metric group internal data.
|
|
auto metricGroup = static_cast<OaMetricGroupImp *>(MetricGroup::fromHandle(metricGroupHandle));
|
|
auto metricGroupDummy = ConfigurationHandle_1_0{};
|
|
DEBUG_BREAK_IF(!metricGroup);
|
|
|
|
// Metrics library configuration creation data.
|
|
ConfigurationHandle_1_0 handle = {};
|
|
ConfigurationCreateData_1_0 handleData = {};
|
|
|
|
// Check supported sampling types.
|
|
const bool validSampling =
|
|
properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED ||
|
|
properties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_TIME_BASED;
|
|
|
|
// Activate metric group through metrics discovery to send metric group
|
|
// configuration to kernel driver.
|
|
const bool validActivate = isInitialized() && validSampling && metricGroup->activateMetricSet();
|
|
|
|
if (validActivate) {
|
|
handleData.HandleContext = context;
|
|
handleData.Type = ObjectType::ConfigurationHwCountersOa;
|
|
|
|
// Use metrics library to create configuration for the activated metric group.
|
|
api.ConfigurationCreate(&handleData, &handle);
|
|
|
|
// Use metrics discovery to deactivate metric group.
|
|
metricGroup->deactivateMetricSet();
|
|
}
|
|
|
|
return validActivate ? handle : metricGroupDummy;
|
|
}
|
|
|
|
ConfigurationHandle_1_0 MetricsLibrary::getConfiguration(zet_metric_group_handle_t handle) {
|
|
|
|
auto iter = configurations.find(handle);
|
|
auto configuration = (iter != end(configurations)) ? iter->second : addConfiguration(handle);
|
|
|
|
DEBUG_BREAK_IF(!configuration.IsValid());
|
|
return configuration;
|
|
}
|
|
|
|
ConfigurationHandle_1_0 MetricsLibrary::addConfiguration(zet_metric_group_handle_t handle) {
|
|
ConfigurationHandle_1_0 libraryHandle = {};
|
|
DEBUG_BREAK_IF(!handle);
|
|
|
|
// Create metrics library configuration.
|
|
auto metricGroup = MetricGroup::fromHandle(handle);
|
|
zet_metric_group_properties_t properties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES, nullptr};
|
|
OaMetricGroupImp::getProperties(handle, &properties);
|
|
auto configuration = createConfiguration(metricGroup, properties);
|
|
|
|
// Cache configuration if valid.
|
|
if (configuration.IsValid()) {
|
|
libraryHandle = configuration;
|
|
cacheConfiguration(handle, libraryHandle);
|
|
}
|
|
|
|
DEBUG_BREAK_IF(!libraryHandle.IsValid());
|
|
return libraryHandle;
|
|
}
|
|
|
|
void MetricsLibrary::deleteAllConfigurations() {
|
|
|
|
if (api.ConfigurationDelete) {
|
|
for (auto &configuration : configurations) {
|
|
if (configuration.second.IsValid()) {
|
|
api.ConfigurationDelete(configuration.second);
|
|
}
|
|
}
|
|
}
|
|
|
|
configurations.clear();
|
|
}
|
|
|
|
ze_result_t OaMetricGroupImp::metricQueryPoolCreate(
|
|
zet_context_handle_t hContext,
|
|
zet_device_handle_t hDevice,
|
|
const zet_metric_query_pool_desc_t *desc,
|
|
zet_metric_query_pool_handle_t *phMetricQueryPool) {
|
|
|
|
return OaMetricQueryPoolImp::metricQueryPoolCreate(hContext, hDevice, toHandle(), desc, phMetricQueryPool);
|
|
}
|
|
|
|
ze_result_t OaMetricQueryPoolImp::metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
|
|
const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) {
|
|
auto device = Device::fromHandle(hDevice);
|
|
auto &metricSource = device->getMetricDeviceContext().getMetricSource<OaMetricSourceImp>();
|
|
|
|
// Metric query cannot be used with streamer simultaneously
|
|
// (due to oa buffer usage constraints).
|
|
|
|
if (metricSource.getMetricStreamer() != nullptr) {
|
|
return ZE_RESULT_ERROR_NOT_AVAILABLE;
|
|
}
|
|
|
|
const auto &deviceImp = *static_cast<DeviceImp *>(device);
|
|
auto metricPoolImp = new OaMetricQueryPoolImp(metricSource, hMetricGroup, *pDesc);
|
|
|
|
if (metricSource.isImplicitScalingCapable()) {
|
|
|
|
auto emptyMetricGroups = std::vector<MetricGroupImp *>();
|
|
|
|
auto metricGroups = hMetricGroup
|
|
? static_cast<OaMetricGroupImp *>(MetricGroup::fromHandle(hMetricGroup))->getMetricGroups()
|
|
: emptyMetricGroups;
|
|
|
|
const bool useMetricGroupSubDevice = metricGroups.size() > 0;
|
|
|
|
auto &metricPools = metricPoolImp->getMetricQueryPools();
|
|
|
|
for (size_t i = 0; i < deviceImp.numSubDevices; ++i) {
|
|
|
|
auto &subDevice = deviceImp.subDevices[i];
|
|
auto &subDeviceMetricSource = subDevice->getMetricDeviceContext().getMetricSource<OaMetricSourceImp>();
|
|
|
|
zet_metric_group_handle_t metricGroupHandle = useMetricGroupSubDevice
|
|
? metricGroups[subDeviceMetricSource.getSubDeviceIndex()]
|
|
: hMetricGroup;
|
|
|
|
auto metricPoolSubdeviceImp = new OaMetricQueryPoolImp(subDeviceMetricSource, metricGroupHandle, *pDesc);
|
|
|
|
// Create metric query pool.
|
|
if (!metricPoolSubdeviceImp->create()) {
|
|
metricPoolSubdeviceImp->destroy();
|
|
metricPoolImp->destroy();
|
|
metricPoolSubdeviceImp = nullptr;
|
|
metricPoolImp = nullptr;
|
|
*phMetricQueryPool = nullptr;
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
metricPools.push_back(metricPoolSubdeviceImp);
|
|
}
|
|
|
|
} else {
|
|
|
|
// Create metric query pool.
|
|
if (!metricPoolImp->create()) {
|
|
metricPoolImp->destroy();
|
|
metricPoolImp = nullptr;
|
|
*phMetricQueryPool = nullptr;
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
|
|
// Allocate gpu memory.
|
|
if (!metricPoolImp->allocateGpuMemory()) {
|
|
metricPoolImp->destroy();
|
|
metricPoolImp = nullptr;
|
|
*phMetricQueryPool = nullptr;
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
*phMetricQueryPool = metricPoolImp;
|
|
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
ze_result_t metricQueryPoolCreate(zet_context_handle_t hContext, zet_device_handle_t hDevice, zet_metric_group_handle_t hMetricGroup,
|
|
const zet_metric_query_pool_desc_t *pDesc, zet_metric_query_pool_handle_t *phMetricQueryPool) {
|
|
|
|
if (pDesc->type == ZET_METRIC_QUERY_POOL_TYPE_EXECUTION) {
|
|
return OaMetricQueryPoolImp::metricQueryPoolCreate(hContext, hDevice, hMetricGroup, pDesc, phMetricQueryPool);
|
|
} else {
|
|
UNRECOVERABLE_IF(hMetricGroup == nullptr);
|
|
return MetricGroup::fromHandle(hMetricGroup)->metricQueryPoolCreate(hContext, hDevice, pDesc, phMetricQueryPool);
|
|
}
|
|
}
|
|
|
|
OaMetricQueryPoolImp::OaMetricQueryPoolImp(OaMetricSourceImp &metricSourceInput,
|
|
zet_metric_group_handle_t hEventMetricGroupInput,
|
|
const zet_metric_query_pool_desc_t &poolDescription)
|
|
: metricSource(metricSourceInput), metricsLibrary(metricSource.getMetricsLibrary()),
|
|
description(poolDescription),
|
|
hMetricGroup(hEventMetricGroupInput) {}
|
|
|
|
bool OaMetricQueryPoolImp::create() {
|
|
switch (description.type) {
|
|
case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
|
|
return createMetricQueryPool();
|
|
case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
|
|
return createSkipExecutionQueryPool();
|
|
default:
|
|
DEBUG_BREAK_IF(true);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
ze_result_t OaMetricQueryPoolImp::destroy() {
|
|
switch (description.type) {
|
|
case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
|
|
if (metricQueryPools.size() > 0) {
|
|
for (auto &metricQueryPool : metricQueryPools) {
|
|
MetricQueryPool::fromHandle(metricQueryPool)->destroy();
|
|
}
|
|
}
|
|
if (query.IsValid()) {
|
|
metricsLibrary.destroyMetricQuery(query);
|
|
}
|
|
if (pAllocation) {
|
|
metricSource.getDevice().getDriverHandle()->getMemoryManager()->freeGraphicsMemory(pAllocation);
|
|
}
|
|
break;
|
|
case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
|
|
for (auto &metricQueryPool : metricQueryPools) {
|
|
MetricQueryPool::fromHandle(metricQueryPool)->destroy();
|
|
}
|
|
break;
|
|
default:
|
|
DEBUG_BREAK_IF(true);
|
|
break;
|
|
}
|
|
|
|
// Check open queries.
|
|
if (metricSource.getMetricsLibrary().getMetricQueryCount() == 0) {
|
|
if (!metricSource.isMetricGroupActivatedInHw()) {
|
|
metricSource.getMetricsLibrary().release();
|
|
}
|
|
}
|
|
|
|
delete this;
|
|
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
bool OaMetricQueryPoolImp::allocateGpuMemory() {
|
|
|
|
if (description.type == ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE) {
|
|
// Get allocation size.
|
|
const auto &deviceImp = *static_cast<DeviceImp *>(&metricSource.getDevice());
|
|
|
|
allocationSize = (metricSource.isImplicitScalingCapable())
|
|
? deviceImp.subDevices[0]->getMetricDeviceContext().getMetricSource<OaMetricSourceImp>().getMetricsLibrary().getQueryReportGpuSize() * description.count * deviceImp.numSubDevices
|
|
: metricsLibrary.getQueryReportGpuSize() * description.count;
|
|
|
|
if (allocationSize == 0) {
|
|
return false;
|
|
}
|
|
|
|
// Allocate gpu memory.
|
|
NEO::AllocationProperties properties(
|
|
metricSource.getDevice().getRootDeviceIndex(), allocationSize, NEO::AllocationType::bufferHostMemory, metricSource.getDevice().getNEODevice()->getDeviceBitfield());
|
|
properties.alignment = 64u;
|
|
pAllocation = metricSource.getDevice().getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
|
|
|
UNRECOVERABLE_IF(pAllocation == nullptr);
|
|
|
|
// Clear allocation.
|
|
memset(pAllocation->getUnderlyingBuffer(), 0, allocationSize);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool OaMetricQueryPoolImp::createMetricQueryPool() {
|
|
// Validate metric group query - only event based is supported.
|
|
zet_metric_group_properties_t metricGroupProperties = {ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES, nullptr};
|
|
OaMetricGroupImp::getProperties(hMetricGroup, &metricGroupProperties);
|
|
const bool validMetricGroup = metricGroupProperties.samplingType == ZET_METRIC_GROUP_SAMPLING_TYPE_FLAG_EVENT_BASED;
|
|
|
|
if (!validMetricGroup) {
|
|
return false;
|
|
}
|
|
|
|
// Pool initialization.
|
|
pool.reserve(description.count);
|
|
for (uint32_t i = 0; i < description.count; ++i) {
|
|
pool.push_back({metricSource, *this, i});
|
|
}
|
|
|
|
// Metrics library query object initialization.
|
|
return metricsLibrary.createMetricQuery(description.count, query, pAllocation);
|
|
}
|
|
|
|
bool OaMetricQueryPoolImp::createSkipExecutionQueryPool() {
|
|
|
|
pool.reserve(description.count);
|
|
for (uint32_t i = 0; i < description.count; ++i) {
|
|
pool.push_back({metricSource, *this, i});
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
MetricQueryPool *MetricQueryPool::fromHandle(zet_metric_query_pool_handle_t handle) {
|
|
return static_cast<MetricQueryPool *>(handle);
|
|
}
|
|
|
|
zet_metric_query_pool_handle_t MetricQueryPool::toHandle() { return this; }
|
|
|
|
ze_result_t OaMetricQueryPoolImp::metricQueryCreate(uint32_t index,
|
|
zet_metric_query_handle_t *phMetricQuery) {
|
|
|
|
if (index >= description.count) {
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
if (metricQueryPools.size() > 0) {
|
|
|
|
auto pMetricQueryImp = new OaMetricQueryImp(metricSource, *this, index);
|
|
|
|
for (auto metricQueryPoolHandle : metricQueryPools) {
|
|
auto &metricQueries = pMetricQueryImp->getMetricQueries();
|
|
auto metricQueryPoolImp = static_cast<OaMetricQueryPoolImp *>(MetricQueryPool::fromHandle(metricQueryPoolHandle));
|
|
metricQueries.push_back(&metricQueryPoolImp->pool[index]);
|
|
}
|
|
|
|
*phMetricQuery = pMetricQueryImp;
|
|
|
|
return ZE_RESULT_SUCCESS;
|
|
|
|
} else {
|
|
|
|
*phMetricQuery = &(pool[index]);
|
|
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
}
|
|
|
|
std::vector<zet_metric_query_pool_handle_t> &OaMetricQueryPoolImp::getMetricQueryPools() {
|
|
return metricQueryPools;
|
|
}
|
|
|
|
OaMetricQueryImp::OaMetricQueryImp(OaMetricSourceImp &metricSourceInput, OaMetricQueryPoolImp &poolInput,
|
|
const uint32_t slotInput)
|
|
: metricSource(metricSourceInput), metricsLibrary(metricSource.getMetricsLibrary()),
|
|
pool(poolInput), slot(slotInput) {}
|
|
|
|
ze_result_t OaMetricQueryImp::appendBegin(CommandList &commandList) {
|
|
switch (pool.description.type) {
|
|
case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
|
|
return writeMetricQuery(commandList, nullptr, 0, nullptr, true);
|
|
case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
|
|
return writeSkipExecutionQuery(commandList, nullptr, 0, nullptr, true);
|
|
default:
|
|
DEBUG_BREAK_IF(true);
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::appendEnd(CommandList &commandList, ze_event_handle_t hSignalEvent,
|
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
|
switch (pool.description.type) {
|
|
case ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE:
|
|
return writeMetricQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
|
|
case ZET_METRIC_QUERY_POOL_TYPE_EXECUTION:
|
|
return writeSkipExecutionQuery(commandList, hSignalEvent, numWaitEvents, phWaitEvents, false);
|
|
default:
|
|
DEBUG_BREAK_IF(true);
|
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::getData(size_t *pRawDataSize, uint8_t *pRawData) {
|
|
|
|
const bool calculateSizeOnly = *pRawDataSize == 0;
|
|
const size_t metricQueriesSize = metricQueries.size();
|
|
bool result = true;
|
|
|
|
if (metricQueriesSize > 0) {
|
|
|
|
if (calculateSizeOnly) {
|
|
|
|
const size_t headerSize = sizeof(MetricGroupCalculateHeader);
|
|
const size_t rawDataOffsetsRequiredSize = sizeof(uint32_t) * metricQueriesSize;
|
|
const size_t rawDataSizesRequiredSize = sizeof(uint32_t) * metricQueriesSize;
|
|
|
|
auto pMetricQueryImp = static_cast<OaMetricQueryImp *>(MetricQuery::fromHandle(metricQueries[0]));
|
|
result = pMetricQueryImp->metricsLibrary.getMetricQueryReportSize(*pRawDataSize);
|
|
|
|
const size_t rawDataRequiredSize = *pRawDataSize * metricQueriesSize;
|
|
|
|
*pRawDataSize = headerSize + rawDataOffsetsRequiredSize + rawDataSizesRequiredSize + rawDataRequiredSize;
|
|
|
|
} else {
|
|
|
|
MetricGroupCalculateHeader *pRawDataHeader = reinterpret_cast<MetricGroupCalculateHeader *>(pRawData);
|
|
pRawDataHeader->magic = MetricGroupCalculateHeader::magicValue;
|
|
pRawDataHeader->dataCount = static_cast<uint32_t>(metricQueriesSize);
|
|
|
|
// Relative offsets in the header allow to move/copy the buffer.
|
|
pRawDataHeader->rawDataOffsets = sizeof(MetricGroupCalculateHeader);
|
|
pRawDataHeader->rawDataSizes = static_cast<uint32_t>(pRawDataHeader->rawDataOffsets + (sizeof(uint32_t) * metricQueriesSize));
|
|
pRawDataHeader->rawDataOffset = static_cast<uint32_t>(pRawDataHeader->rawDataSizes + (sizeof(uint32_t) * metricQueriesSize));
|
|
|
|
const size_t sizePerSubDevice = (*pRawDataSize - pRawDataHeader->rawDataOffset) / metricQueriesSize;
|
|
DEBUG_BREAK_IF(sizePerSubDevice == 0);
|
|
*pRawDataSize = pRawDataHeader->rawDataOffset;
|
|
|
|
uint32_t *pRawDataOffsetsUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataOffsets);
|
|
uint32_t *pRawDataSizesUnpacked = reinterpret_cast<uint32_t *>(pRawData + pRawDataHeader->rawDataSizes);
|
|
uint8_t *pRawDataUnpacked = reinterpret_cast<uint8_t *>(pRawData + pRawDataHeader->rawDataOffset);
|
|
|
|
for (size_t i = 0; i < metricQueriesSize; ++i) {
|
|
|
|
size_t getDataSize = sizePerSubDevice;
|
|
const uint32_t rawDataOffset = (i != 0) ? (pRawDataSizesUnpacked[i - 1] + pRawDataOffsetsUnpacked[i - 1]) : 0;
|
|
auto pMetricQuery = MetricQuery::fromHandle(metricQueries[i]);
|
|
ze_result_t tmpResult = pMetricQuery->getData(&getDataSize, pRawDataUnpacked + rawDataOffset);
|
|
// Return at first error.
|
|
if (tmpResult != ZE_RESULT_SUCCESS) {
|
|
return tmpResult;
|
|
}
|
|
pRawDataSizesUnpacked[i] = static_cast<uint32_t>(getDataSize);
|
|
pRawDataOffsetsUnpacked[i] = (i != 0) ? pRawDataOffsetsUnpacked[i - 1] + pRawDataSizesUnpacked[i - 1] : 0;
|
|
*pRawDataSize += getDataSize;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
result = calculateSizeOnly
|
|
? metricsLibrary.getMetricQueryReportSize(*pRawDataSize)
|
|
: metricsLibrary.getMetricQueryReport(pool.query, slot, *pRawDataSize, pRawData);
|
|
}
|
|
|
|
return result
|
|
? ZE_RESULT_SUCCESS
|
|
: ZE_RESULT_ERROR_UNKNOWN;
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::reset() {
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::destroy() {
|
|
|
|
if (metricQueries.size() > 0) {
|
|
delete this;
|
|
}
|
|
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
std::vector<zet_metric_query_handle_t> &OaMetricQueryImp::getMetricQueries() {
|
|
return metricQueries;
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
|
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
|
const bool begin) {
|
|
|
|
bool result = true;
|
|
const bool writeCompletionEvent = hSignalEvent && !begin;
|
|
const size_t metricQueriesSize = metricQueries.size();
|
|
|
|
// Make gpu allocation visible.
|
|
commandList.getCmdContainer().addToResidencyContainer(pool.pAllocation);
|
|
|
|
// Wait for events before executing query.
|
|
commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, false, false, false, false);
|
|
|
|
if (metricQueriesSize) {
|
|
|
|
const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize;
|
|
static_cast<CommandListImp &>(commandList).appendMultiPartitionPrologue(static_cast<uint32_t>(allocationSizeForSubDevice));
|
|
void *buffer = nullptr;
|
|
bool gpuCommandStatus = true;
|
|
|
|
// Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands,
|
|
// so only one of the sub-device contexts will be used to append to command list.
|
|
for (int32_t i = static_cast<int32_t>(metricQueriesSize - 1); i >= 0; --i) {
|
|
|
|
// Adjust cpu and gpu addresses for each sub-device's query object.
|
|
uint64_t gpuAddress = pool.pAllocation->getGpuAddress() + (i * allocationSizeForSubDevice);
|
|
uint8_t *cpuAddress = static_cast<uint8_t *>(pool.pAllocation->getUnderlyingBuffer()) + (i * allocationSizeForSubDevice);
|
|
|
|
auto &metricQueryImp = *static_cast<OaMetricQueryImp *>(MetricQuery::fromHandle(metricQueries[i]));
|
|
auto &metricLibrarySubDevice = metricQueryImp.metricsLibrary;
|
|
auto &metricSourceSubDevice = metricQueryImp.metricSource;
|
|
|
|
// Obtain gpu commands.
|
|
CommandBufferData_1_0 commandBuffer = {};
|
|
commandBuffer.CommandsType = ObjectType::QueryHwCounters;
|
|
commandBuffer.QueryHwCounters.Handle = metricQueryImp.pool.query;
|
|
commandBuffer.QueryHwCounters.Begin = begin;
|
|
commandBuffer.QueryHwCounters.Slot = slot;
|
|
commandBuffer.Allocation.GpuAddress = gpuAddress;
|
|
commandBuffer.Allocation.CpuAddress = cpuAddress;
|
|
commandBuffer.Type = metricSourceSubDevice.isComputeUsed()
|
|
? GpuCommandBufferType::Compute
|
|
: GpuCommandBufferType::Render;
|
|
|
|
// Obtain required command buffer size.
|
|
commandBuffer.Size = metricLibrarySubDevice.getGpuCommandsSize(commandBuffer);
|
|
|
|
// Validate gpu commands size.
|
|
if (!commandBuffer.Size) {
|
|
return ZE_RESULT_ERROR_UNKNOWN;
|
|
}
|
|
|
|
// Allocate command buffer only once.
|
|
if (buffer == nullptr) {
|
|
auto stream = commandList.getCmdContainer().getCommandStream();
|
|
buffer = stream->getSpace(commandBuffer.Size);
|
|
}
|
|
|
|
// Fill attached command buffer with gpu commands.
|
|
commandBuffer.Data = buffer;
|
|
|
|
// Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for
|
|
// each query object in metrics library, so that get data works properly.
|
|
gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer);
|
|
if (!gpuCommandStatus) {
|
|
break;
|
|
}
|
|
}
|
|
static_cast<CommandListImp &>(commandList).appendMultiPartitionEpilogue();
|
|
if (!gpuCommandStatus) {
|
|
return ZE_RESULT_ERROR_UNKNOWN;
|
|
}
|
|
|
|
// Write gpu commands for sub device index 0.
|
|
} else {
|
|
// Obtain gpu commands.
|
|
CommandBufferData_1_0 commandBuffer = {};
|
|
commandBuffer.CommandsType = ObjectType::QueryHwCounters;
|
|
commandBuffer.QueryHwCounters.Handle = pool.query;
|
|
commandBuffer.QueryHwCounters.Begin = begin;
|
|
commandBuffer.QueryHwCounters.Slot = slot;
|
|
commandBuffer.Allocation.GpuAddress = pool.pAllocation->getGpuAddress();
|
|
commandBuffer.Allocation.CpuAddress = pool.pAllocation->getUnderlyingBuffer();
|
|
commandBuffer.Type = metricSource.isComputeUsed()
|
|
? GpuCommandBufferType::Compute
|
|
: GpuCommandBufferType::Render;
|
|
|
|
// Get query commands.
|
|
result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
|
|
}
|
|
|
|
// Write completion event.
|
|
if (result && writeCompletionEvent) {
|
|
result = commandList.appendSignalEvent(hSignalEvent, false) == ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
|
|
}
|
|
|
|
ze_result_t OaMetricQueryImp::writeSkipExecutionQuery(CommandList &commandList, ze_event_handle_t hSignalEvent,
|
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
|
|
const bool begin) {
|
|
|
|
bool writeCompletionEvent = hSignalEvent && !begin;
|
|
bool result = false;
|
|
|
|
// Obtain gpu commands.
|
|
CommandBufferData_1_0 commandBuffer = {};
|
|
commandBuffer.CommandsType = ObjectType::OverrideNullHardware;
|
|
commandBuffer.Override.Enable = begin;
|
|
commandBuffer.Type = metricSource.isComputeUsed()
|
|
? GpuCommandBufferType::Compute
|
|
: GpuCommandBufferType::Render;
|
|
|
|
// Wait for events before executing query.
|
|
zeCommandListAppendWaitOnEvents(commandList.toHandle(), numWaitEvents, phWaitEvents);
|
|
|
|
// Get query commands.
|
|
result = metricsLibrary.getGpuCommands(commandList, commandBuffer);
|
|
|
|
// Write completion event.
|
|
if (result && writeCompletionEvent) {
|
|
result = zeCommandListAppendSignalEvent(commandList.toHandle(), hSignalEvent) ==
|
|
ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
|
|
}
|
|
|
|
MetricQuery *MetricQuery::fromHandle(zet_metric_query_handle_t handle) {
|
|
return static_cast<MetricQuery *>(handle);
|
|
}
|
|
|
|
zet_metric_query_handle_t MetricQuery::toHandle() { return this; }
|
|
|
|
StatusCode ML_STDCALL MetricsLibrary::flushCommandBufferCallback(ClientHandle_1_0 handle) {
|
|
Device *device = static_cast<Device *>(handle.data);
|
|
if (device) {
|
|
device->getNEODevice()->stopDirectSubmissionAndWaitForCompletion();
|
|
return StatusCode::Success;
|
|
}
|
|
return StatusCode::Failed;
|
|
}
|
|
|
|
} // namespace L0
|