From 703dd48038590a4ef885a5a42587f5231c1cad74 Mon Sep 17 00:00:00 2001 From: shubham kumar Date: Mon, 27 Oct 2025 13:37:22 +0000 Subject: [PATCH] feature: Add support for euss buffer overflow on windows Related-To: NEO-15684 Signed-off-by: shubham kumar --- .../os_metric_ip_sampling_imp_windows.cpp | 23 ++++++++- .../windows/test_metric_ip_sampling.cpp | 51 +++++++++++++++++-- .../os_interface/windows/wddm/perf_wddm.cpp | 2 +- .../source/os_interface/windows/wddm/wddm.h | 2 +- shared/test/common/mocks/mock_wddm.cpp | 8 +-- shared/test/common/mocks/mock_wddm.h | 4 +- .../os_interface/windows/wddm_perf_tests.cpp | 7 +-- 7 files changed, 80 insertions(+), 17 deletions(-) diff --git a/level_zero/tools/source/metrics/windows/os_metric_ip_sampling_imp_windows.cpp b/level_zero/tools/source/metrics/windows/os_metric_ip_sampling_imp_windows.cpp index 9dfa4f945a..3ef241c6df 100644 --- a/level_zero/tools/source/metrics/windows/os_metric_ip_sampling_imp_windows.cpp +++ b/level_zero/tools/source/metrics/windows/os_metric_ip_sampling_imp_windows.cpp @@ -17,6 +17,7 @@ namespace L0 { +#define GTDI_RET_BUFFER_OVERFLOW 13 class MetricIpSamplingWindowsImp : public MetricIpSamplingOsInterface { public: MetricIpSamplingWindowsImp(Device &device); @@ -31,6 +32,7 @@ class MetricIpSamplingWindowsImp : public MetricIpSamplingOsInterface { ze_result_t getMetricsTimerResolution(uint64_t &timerResolution) override; private: + bool overflowReported = false; Device &device; uint32_t notifyEveryNReports = 0u; ze_result_t getNearestSupportedSamplingUnit(uint32_t &samplingPeriodNs, uint32_t &samplingRate); @@ -65,9 +67,25 @@ ze_result_t MetricIpSamplingWindowsImp::startMeasurement(uint32_t ¬ifyEveryNR } ze_result_t MetricIpSamplingWindowsImp::readData(uint8_t *pRawData, size_t *pRawDataSize) { + // First read call to the KMD after overflow will just give the overflow status back, without any data being read from the HW buffer. This will not reset the HW overflow bit. + // Second read call to the KMD will reset the HW overflow bit, read the data from the HW buffer and return success to the UMD. This reading will make space for new reports. + bool result; + uint32_t retCode = 0; const auto wddm = device.getOsInterface()->getDriverModel()->as(); - bool result = wddm->perfReadEuStallStream(pRawData, pRawDataSize); + if (!overflowReported) { + size_t rawDataSizeTemp = 0u; + result = wddm->perfReadEuStallStream(nullptr, &rawDataSizeTemp, &retCode); + if (!result) { + return ZE_RESULT_ERROR_UNKNOWN; + } + if (retCode == GTDI_RET_BUFFER_OVERFLOW) { + overflowReported = true; + return ZE_RESULT_WARNING_DROPPED_DATA; + } + } + overflowReported = false; + result = wddm->perfReadEuStallStream(pRawData, pRawDataSize, &retCode); return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN; } @@ -91,8 +109,9 @@ uint32_t MetricIpSamplingWindowsImp::getUnitReportSize() { bool MetricIpSamplingWindowsImp::isNReportsAvailable() { size_t bytesAvailable = 0u; + uint32_t retCode = 0; const auto wddm = device.getOsInterface()->getDriverModel()->as(); - bool result = wddm->perfReadEuStallStream(nullptr, &bytesAvailable); + bool result = wddm->perfReadEuStallStream(nullptr, &bytesAvailable, &retCode); if (!result) { METRICS_LOG_ERR("wddm perfReadEuStallStream() call failed."); return false; diff --git a/level_zero/tools/test/unit_tests/sources/metrics/windows/test_metric_ip_sampling.cpp b/level_zero/tools/test/unit_tests/sources/metrics/windows/test_metric_ip_sampling.cpp index 0132b746bc..2dc384f7da 100644 --- a/level_zero/tools/test/unit_tests/sources/metrics/windows/test_metric_ip_sampling.cpp +++ b/level_zero/tools/test/unit_tests/sources/metrics/windows/test_metric_ip_sampling.cpp @@ -107,7 +107,7 @@ HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfDisableEuStallStreamFailsW } HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenReadSucceedsWhenReadDataIsCalledThenReturnSuccess, IsXe2HpgCore) { - VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool { + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { return true; }); uint8_t pRawData = 0u; @@ -116,7 +116,7 @@ HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenReadSucceedsWhenReadDataIsCall } HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamFailsWhenReadDataIsCalledThenReturnFailure, IsXe2HpgCore) { - VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool { + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { return false; }); uint8_t pRawData = 0u; @@ -124,6 +124,49 @@ HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamFailsWhen EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_ERROR_UNKNOWN); } +HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamOverflowsWhenReadDataIsCalledThenReturnCorrectOverflowStatus, IsXe2HpgCore) { + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { + *pOutRetCode = 13; // GTDI_RET_BUFFER_OVERFLOW + return true; + }); + uint8_t pRawData = 0u; + size_t pRawDataSize = 0; + EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_WARNING_DROPPED_DATA); +} + +HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamOverflowsWhenReadDataIsCalledTwiceThenCorrectReturnStatusAreReturned, IsXe2HpgCore) { + static bool overflowRetuned = false; + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { + if (!overflowRetuned) { + overflowRetuned = true; + *pOutRetCode = 13; // GTDI_RET_BUFFER_OVERFLOW + } + return true; + }); + uint8_t pRawData = 0u; + size_t pRawDataSize = 0; + EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_WARNING_DROPPED_DATA); + EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_SUCCESS); + overflowRetuned = false; +} + +HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, givenPerfReadEuStallStreamOverflowsWhenReadDataIsCalledTwiceAndReadDataFailsOnScondCallThenCorrectReturnStatusAreReturned, IsXe2HpgCore) { + static bool overflowRetuned = false; + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { + if (!overflowRetuned) { + overflowRetuned = true; + *pOutRetCode = 13; // GTDI_RET_BUFFER_OVERFLOW + return true; + } + return false; + }); + uint8_t pRawData = 0u; + size_t pRawDataSize = 0; + EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_WARNING_DROPPED_DATA); + EXPECT_EQ(metricIpSamplingOsInterface->readData(&pRawData, &pRawDataSize), ZE_RESULT_ERROR_UNKNOWN); + overflowRetuned = false; +} + HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenGetRequiredBufferSizeIsCalledThenCorrectSizeIsReturned, IsXe2HpgCore) { constexpr uint32_t unitReportSize = 64; EXPECT_EQ(metricIpSamplingOsInterface->getRequiredBufferSize(10), unitReportSize * 10); @@ -136,7 +179,7 @@ HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenisNReportsAvailableIsCalledAndE VariableBackup mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool { return true; }); - VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool { + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { *pRawDataSize = 64u; return true; }); @@ -156,7 +199,7 @@ HWTEST2_F(MetricIpSamplingWindowsFixtureXe2, WhenisNReportsAvailableIsCalledAndE VariableBackup mockPerfOpenEuStallStream(&NEO::pPerfOpenEuStallStream, [](uint32_t sampleRate, uint32_t minBufferSize) -> bool { return true; }); - VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize) -> bool { + VariableBackup mockPerfReadEuStallStream(&NEO::pPerfReadEuStallStream, [](uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) -> bool { *pRawDataSize = 192u; return true; }); diff --git a/shared/source/os_interface/windows/wddm/perf_wddm.cpp b/shared/source/os_interface/windows/wddm/perf_wddm.cpp index 0f006d3aa7..7d7b1b18be 100644 --- a/shared/source/os_interface/windows/wddm/perf_wddm.cpp +++ b/shared/source/os_interface/windows/wddm/perf_wddm.cpp @@ -13,7 +13,7 @@ bool Wddm::perfOpenEuStallStream(uint32_t sampleRate, uint32_t minBufferSize) { return false; } -bool Wddm::perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize) { +bool Wddm::perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) { return false; } diff --git a/shared/source/os_interface/windows/wddm/wddm.h b/shared/source/os_interface/windows/wddm/wddm.h index a062e23115..c2a428a126 100644 --- a/shared/source/os_interface/windows/wddm/wddm.h +++ b/shared/source/os_interface/windows/wddm/wddm.h @@ -201,7 +201,7 @@ class Wddm : public DriverModel { MOCKABLE_VIRTUAL bool perfOpenEuStallStream(uint32_t sampleRate, uint32_t minBufferSize); MOCKABLE_VIRTUAL bool perfDisableEuStallStream(); - MOCKABLE_VIRTUAL bool perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize); + MOCKABLE_VIRTUAL bool perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode); PhysicalDevicePciBusInfo getPciBusInfo() const override; diff --git a/shared/test/common/mocks/mock_wddm.cpp b/shared/test/common/mocks/mock_wddm.cpp index 403ea6d3aa..414a5acfb9 100644 --- a/shared/test/common/mocks/mock_wddm.cpp +++ b/shared/test/common/mocks/mock_wddm.cpp @@ -29,7 +29,7 @@ NTSTATUS(*pCallEscape) uint32_t (*pGetTimestampFrequency)() = nullptr; bool (*pPerfOpenEuStallStream)(uint32_t sampleRate, uint32_t minBufferSize) = nullptr; bool (*pPerfDisableEuStallStream)() = nullptr; -bool (*pPerfReadEuStallStream)(uint8_t *pRawData, size_t *pRawDataSize) = nullptr; +bool (*pPerfReadEuStallStream)(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) = nullptr; } // namespace NEO struct MockHwDeviceId : public HwDeviceIdWddm { @@ -385,11 +385,11 @@ bool WddmMock::perfDisableEuStallStream() { return Wddm::perfDisableEuStallStream(); } -bool WddmMock::perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize) { +bool WddmMock::perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) { if (pPerfReadEuStallStream != nullptr) { - return pPerfReadEuStallStream(pRawData, pRawDataSize); + return pPerfReadEuStallStream(pRawData, pRawDataSize, pOutRetCode); } - return Wddm::perfReadEuStallStream(pRawData, pRawDataSize); + return Wddm::perfReadEuStallStream(pRawData, pRawDataSize, pOutRetCode); } uint32_t WddmMock::getTimestampFrequency() const { diff --git a/shared/test/common/mocks/mock_wddm.h b/shared/test/common/mocks/mock_wddm.h index 8e9e3b7d6d..cca717523e 100644 --- a/shared/test/common/mocks/mock_wddm.h +++ b/shared/test/common/mocks/mock_wddm.h @@ -26,7 +26,7 @@ extern NTSTATUS (*pCallEscape)(D3DKMT_ESCAPE &escapeCommand); extern uint32_t (*pGetTimestampFrequency)(); extern bool (*pPerfOpenEuStallStream)(uint32_t sampleRate, uint32_t minBufferSize); extern bool (*pPerfDisableEuStallStream)(); -extern bool (*pPerfReadEuStallStream)(uint8_t *pRawData, size_t *pRawDataSize); +extern bool (*pPerfReadEuStallStream)(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode); class GraphicsAllocation; @@ -155,7 +155,7 @@ class WddmMock : public Wddm { uint32_t getTimestampFrequency() const override; bool perfOpenEuStallStream(uint32_t sampleRate, uint32_t minBufferSize) override; bool perfDisableEuStallStream() override; - bool perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize) override; + bool perfReadEuStallStream(uint8_t *pRawData, size_t *pRawDataSize, uint32_t *pOutRetCode) override; WddmMockHelpers::MakeResidentCall makeResidentResult; WddmMockHelpers::CallResult evictResult; diff --git a/shared/test/unit_test/os_interface/windows/wddm_perf_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_perf_tests.cpp index c6e76fe0d2..b1d6f9a516 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_perf_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_perf_tests.cpp @@ -20,9 +20,10 @@ TEST_F(WddmPerfTests, givenCorrectArgumentsWhenPerfOpenEuStallStreamIsCalledThen } TEST_F(WddmPerfTests, givenCorrectArgumentsWhenPerfReadEuStallStreamIsCalledThenReturnFailure) { - uint8_t pRawData = 0u; - size_t pRawDataSize = 0; - EXPECT_FALSE(wddm->perfReadEuStallStream(&pRawData, &pRawDataSize)); + uint8_t rawData = 0u; + size_t rawDataSize = 0; + uint32_t outRetCode = 0; + EXPECT_FALSE(wddm->perfReadEuStallStream(&rawData, &rawDataSize, &outRetCode)); } TEST_F(WddmPerfTests, givenCorrectArgumentsWhenPerfDisableEuStallStreamIsCalledThenReturnFailure) {