feature: adding eu stall support on windows

Related-To: NEO-12174


Signed-off-by: shubham kumar <shubham.kumar@intel.com>
This commit is contained in:
shubham kumar
2024-08-06 04:21:42 +00:00
committed by Compute-Runtime-Automation
parent 2a9bcdeb83
commit 0002eb3fcc
8 changed files with 358 additions and 36 deletions

View File

@@ -25,11 +25,4 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/os_interface_metric.h
${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/metric_device_context_create.cpp
)
if(WIN32)
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/windows/os_metric_ip_sampling_imp_windows.cpp
)
endif()
add_subdirectories()

View File

@@ -11,5 +11,6 @@ if(WIN32)
${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_streamer_imp_windows.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_query_imp_windows.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_metric_oa_enumeration_imp_windows.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_metric_ip_sampling_imp_windows.cpp
)
endif()

View File

@@ -1,46 +1,149 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/hw_info.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/os_interface/windows/wddm/wddm.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/tools/source/metrics/metric.h"
#include "level_zero/tools/source/metrics/os_interface_metric.h"
namespace L0 {
class MetricIpSamplingWindowsImp : public MetricIpSamplingOsInterface {
public:
MetricIpSamplingWindowsImp() {}
ze_result_t startMeasurement(uint32_t &notifyEveryNReports, uint32_t &samplingPeriodNs) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t stopMeasurement() override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override {
return 0u;
}
uint32_t getUnitReportSize() override {
return 0u;
}
bool isNReportsAvailable() override {
return false;
}
bool isDependencyAvailable() override {
return false;
}
ze_result_t getMetricsTimerResolution(uint64_t &timerResolution) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
MetricIpSamplingWindowsImp(Device &device);
~MetricIpSamplingWindowsImp() override = default;
ze_result_t startMeasurement(uint32_t &notifyEveryNReports, uint32_t &samplingPeriodNs) override;
ze_result_t stopMeasurement() override;
ze_result_t readData(uint8_t *pRawData, size_t *pRawDataSize) override;
uint32_t getRequiredBufferSize(const uint32_t maxReportCount) override;
uint32_t getUnitReportSize() override;
bool isNReportsAvailable() override;
bool isDependencyAvailable() override;
ze_result_t getMetricsTimerResolution(uint64_t &timerResolution) override;
private:
Device &device;
uint32_t notifyEveryNReports = 0u;
ze_result_t getNearestSupportedSamplingUnit(uint32_t &samplingPeriodNs, uint32_t &samplingRate);
};
std::unique_ptr<MetricIpSamplingOsInterface> MetricIpSamplingOsInterface::create(Device &device) {
return std::make_unique<MetricIpSamplingWindowsImp>();
return std::make_unique<MetricIpSamplingWindowsImp>(device);
}
MetricIpSamplingWindowsImp::MetricIpSamplingWindowsImp(Device &device) : device(device) {}
ze_result_t MetricIpSamplingWindowsImp::startMeasurement(uint32_t &notifyEveryNReports, uint32_t &samplingPeriodNs) {
const auto wddm = device.getOsInterface().getDriverModel()->as<NEO::Wddm>();
uint32_t samplingUnit = 0u;
if (getNearestSupportedSamplingUnit(samplingPeriodNs, samplingUnit) != ZE_RESULT_SUCCESS) {
METRICS_LOG_ERR("wddm getNearestSupportedSamplingUnit() call falied.");
return ZE_RESULT_ERROR_UNKNOWN;
}
notifyEveryNReports = std::max(notifyEveryNReports, 1u);
if (!wddm->perfOpenEuStallStream(samplingUnit, notifyEveryNReports)) {
METRICS_LOG_ERR("wddm perfOpenEuStallStream() call falied.");
return ZE_RESULT_ERROR_UNKNOWN;
}
this->notifyEveryNReports = notifyEveryNReports;
return ZE_RESULT_SUCCESS;
}
ze_result_t MetricIpSamplingWindowsImp::readData(uint8_t *pRawData, size_t *pRawDataSize) {
const auto wddm = device.getOsInterface().getDriverModel()->as<NEO::Wddm>();
bool result = wddm->perfReadEuStallStream(pRawData, pRawDataSize);
return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
}
ze_result_t MetricIpSamplingWindowsImp::stopMeasurement() {
const auto wddm = device.getOsInterface().getDriverModel()->as<NEO::Wddm>();
bool result = wddm->perfDisableEuStallStream();
return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
}
uint32_t MetricIpSamplingWindowsImp::getRequiredBufferSize(const uint32_t maxReportCount) {
const auto hwInfo = device.getNEODevice()->getHardwareInfo();
const auto maxSupportedReportCount = (maxDssBufferSize * hwInfo.gtSystemInfo.MaxSubSlicesSupported) / unitReportSize;
return std::min(maxSupportedReportCount, maxReportCount) * getUnitReportSize();
}
uint32_t MetricIpSamplingWindowsImp::getUnitReportSize() {
return unitReportSize;
}
bool MetricIpSamplingWindowsImp::isNReportsAvailable() {
size_t bytesAvailable = 0u;
const auto wddm = device.getOsInterface().getDriverModel()->as<NEO::Wddm>();
bool result = wddm->perfReadEuStallStream(nullptr, &bytesAvailable);
if (!result) {
METRICS_LOG_ERR("wddm perfReadEuStallStream() call falied.");
return false;
}
return (bytesAvailable / unitReportSize) >= notifyEveryNReports ? true : false;
}
bool MetricIpSamplingWindowsImp::isDependencyAvailable() {
const auto &hardwareInfo = device.getNEODevice()->getHardwareInfo();
const auto &productHelper = device.getNEODevice()->getProductHelper();
return productHelper.isIpSamplingSupported(hardwareInfo) ? true : false;
}
ze_result_t MetricIpSamplingWindowsImp::getMetricsTimerResolution(uint64_t &timerResolution) {
ze_result_t result = ZE_RESULT_SUCCESS;
const auto wddm = device.getOsInterface().getDriverModel()->as<NEO::Wddm>();
uint32_t gpuTimeStampfrequency = 0;
gpuTimeStampfrequency = wddm->getTimestampFrequency();
if (gpuTimeStampfrequency == 0) {
timerResolution = 0;
result = ZE_RESULT_ERROR_UNKNOWN;
METRICS_LOG_ERR("getTimestampFrequency() failed errno = %d | gpuTimeStampfrequency = %d ",
errno,
gpuTimeStampfrequency);
} else {
timerResolution = static_cast<uint64_t>(gpuTimeStampfrequency);
}
return result;
}
ze_result_t MetricIpSamplingWindowsImp::getNearestSupportedSamplingUnit(uint32_t &samplingPeriodNs, uint32_t &samplingUnit) {
static constexpr uint32_t samplingClockGranularity = 251u;
static constexpr uint32_t minSamplingUnit = 1u;
static constexpr uint32_t maxSamplingUnit = 7u;
uint64_t gpuTimeStampfrequency = 0;
ze_result_t ret = getMetricsTimerResolution(gpuTimeStampfrequency);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
uint64_t gpuClockPeriodNs = nsecPerSec / gpuTimeStampfrequency;
UNRECOVERABLE_IF(gpuClockPeriodNs == 0);
uint64_t numberOfClocks = samplingPeriodNs / gpuClockPeriodNs;
samplingUnit = std::clamp(static_cast<uint32_t>(numberOfClocks / samplingClockGranularity), minSamplingUnit, maxSamplingUnit);
samplingPeriodNs = samplingUnit * samplingClockGranularity * static_cast<uint32_t>(gpuClockPeriodNs);
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2021-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -7,5 +7,6 @@
if(WIN32)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_metric_oa_enumeration_windows.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_metric_ip_sampling.cpp
)
endif()

View File

@@ -0,0 +1,169 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_wddm.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/tools/source/metrics/metric_ip_sampling_source.h"
#include <level_zero/zet_api.h>
namespace L0 {
namespace ult {
class MetricIpSamplingWindowsFixtureXe2 : public DeviceFixture,
public ::testing::Test {
public:
void SetUp() override {
DeviceFixture::setUp();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->osInterface = std::make_unique<NEO::OSInterface>();
auto &osInterface = device->getOsInterface();
wddm = new NEO::WddmMock(const_cast<NEO::RootDeviceEnvironment &>(neoDevice->getRootDeviceEnvironment()));
wddm->init();
osInterface.setDriverModel(std::unique_ptr<DriverModel>(wddm));
metricIpSamplingOsInterface = MetricIpSamplingOsInterface::create(static_cast<L0::Device &>(*device));
}
void TearDown() override {
DeviceFixture::tearDown();
}
NEO::WddmMock *wddm;
std::unique_ptr<MetricIpSamplingOsInterface> metricIpSamplingOsInterface = nullptr;
};
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenCorrectArgumentsWhenStartMeasurementIsCalledThenReturnSuccess, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pGetTimestampFrequency)> mockGetTimestampFrequency(&NEO::pGetTimestampFrequency, []() -> uint32_t {
return 1u;
});
VariableBackup<decltype(NEO::pPerfOpenEuStallStream)> mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool {
return true;
});
constexpr uint32_t samplingGranularity = 251u;
constexpr uint32_t gpuClockPeriodNs = 1000000000ull;
constexpr uint32_t samplingUnit = 1;
uint32_t notifyEveryNReports = 0, samplingPeriodNs = samplingGranularity * samplingUnit * gpuClockPeriodNs;
EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS);
EXPECT_EQ(samplingPeriodNs, samplingGranularity * samplingUnit * gpuClockPeriodNs);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenGetTimestampFrequencyReturnsFrequencyEqualZeroWhenStartMeasurementIsCalledThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pGetTimestampFrequency)> mockGetTimestampFrequency(&NEO::pGetTimestampFrequency, []() -> uint32_t {
return 0u;
});
VariableBackup<decltype(NEO::pPerfOpenEuStallStream)> mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool {
return true;
});
uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000;
EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfOpenEuStallStreamFailsWhenStartMeasurementIsCalledThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pGetTimestampFrequency)> mockGetTimestampFrequency(&NEO::pGetTimestampFrequency, []() -> uint32_t {
return 1u;
});
VariableBackup<decltype(NEO::pPerfOpenEuStallStream)> mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool {
return false;
});
uint32_t notifyEveryNReports = 0, samplingPeriodNs = 10000;
EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_ERROR_UNKNOWN);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfDisableEuStallStreamSucceedsWhenStopMeasurementIsCalledThenReturnSuccess, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pPerfDisableEuStallStream)> mockPerfDisableEuStallStream(&NEO::pPerfDisableEuStallStream, []() -> bool {
return true;
});
EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_SUCCESS);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfDisableEuStallStreamFailsWhenStopMeasurementIsCalledThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pPerfDisableEuStallStream)> mockPerfDisableEuStallStream(&NEO::pPerfDisableEuStallStream, []() -> bool {
return false;
});
EXPECT_EQ(metricIpSamplingOsInterface->stopMeasurement(), ZE_RESULT_ERROR_UNKNOWN);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenReadSucceedsWhenReadDataIsCalledThenReturnSuccess, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pPerfReadEuStallStream)> mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool {
return true;
});
uint8_t pRawData = 0u;
size_t pRawDataSize = 0;
EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamFailsWhenReadDataIsCalledThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pPerfReadEuStallStream)> mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool {
return false;
});
uint8_t pRawData = 0u;
size_t pRawDataSize = 0;
EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_ERROR_UNKNOWN);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenGetRequiredBufferSizeIsCalledThenCorrectSizeIsReturned, IsXe2HpgCore) {
constexpr uint32_t unitReportSize = 64;
EXPECT_EQ(metricIpSamplingOsInterface->getRequiredBufferSize(10), unitReportSize * 10);
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenisNReportsAvailableIsCalledAndEnoughReportsAreNotAvailableThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pGetTimestampFrequency)> mockGetTimestampFrequency(&NEO::pGetTimestampFrequency, []() -> uint32_t {
return 1u;
});
VariableBackup<decltype(NEO::pPerfOpenEuStallStream)> mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool {
return true;
});
VariableBackup<decltype(NEO::pPerfReadEuStallStream)> mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool {
*pRawDataSize = 64u;
return true;
});
constexpr uint32_t samplingGranularity = 251u;
constexpr uint32_t gpuClockPeriodNs = 1000000000ull;
constexpr uint32_t samplingUnit = 1;
uint32_t notifyEveryNReports = 2, samplingPeriodNs = samplingGranularity * samplingUnit * gpuClockPeriodNs;
EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS);
EXPECT_EQ(samplingPeriodNs, samplingGranularity * samplingUnit * gpuClockPeriodNs);
EXPECT_FALSE(metricIpSamplingOsInterface->isNReportsAvailable());
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenisNReportsAvailableIsCalledAndEnoughReportsAreAvailableThenReturnSuccess, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pGetTimestampFrequency)> mockGetTimestampFrequency(&NEO::pGetTimestampFrequency, []() -> uint32_t {
return 1u;
});
VariableBackup<decltype(NEO::pPerfOpenEuStallStream)> mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool {
return true;
});
VariableBackup<decltype(NEO::pPerfReadEuStallStream)> mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool {
*pRawDataSize = 192u;
return true;
});
constexpr uint32_t samplingGranularity = 251u;
constexpr uint32_t gpuClockPeriodNs = 1000000000ull;
constexpr uint32_t samplingUnit = 1;
uint32_t notifyEveryNReports = 2, samplingPeriodNs = samplingGranularity * samplingUnit * gpuClockPeriodNs;
EXPECT_EQ(metricIpSamplingOsInterface->startMeasurement(notifyEveryNReports, samplingPeriodNs), ZE_RESULT_SUCCESS);
EXPECT_EQ(samplingPeriodNs, samplingGranularity * samplingUnit * gpuClockPeriodNs);
EXPECT_TRUE(metricIpSamplingOsInterface->isNReportsAvailable());
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenisNReportsAvailableIsCalledAndPerfReadEuStallStreamFailsThenReturnFailure, IsXe2HpgCore) {
VariableBackup<decltype(NEO::pCallEscape)> mockCallEscape(&NEO::pCallEscape, [](D3DKMT_ESCAPE &escapeCommand) -> NTSTATUS {
return -1;
});
EXPECT_FALSE(metricIpSamplingOsInterface->isNReportsAvailable());
}
HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, GivenSupportedProductFamilyIsUsedWhenIsDependencyAvailableIsCalledThenReturnSuccess, IsXe2HpgCore) {
auto hwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
hwInfo->platform.eProductFamily = productFamily;
EXPECT_TRUE(metricIpSamplingOsInterface->isDependencyAvailable());
}
} // namespace ult
} // namespace L0