[clang][analyzer] Print empty per-EP metrics as empty CSV cells, fix missing PathRunningTime metric (#162839)

To avoid information loss, introduce a difference between unset stats
and 0 for statistics that are supposed to be set once per entry point.
Now, if the statistic is not set for an entry point, the corresponding
CSV cell will be empty, and not 0.

Thanks to this differentiation, I noticed that `PathRunningTime` was
actually never set, and fixed that.
Additionally, this patch enables the timers if
`DumpEntryPointStatsToCSV` is set, because in most cases you dump these
stats to get a detailed view on analyzer performance.

Finally, I added a dedicated debug checker that demonstrates the use of
a statistic and tested the set and unset scenarios explicitly.

--

CPP-7097

---------

Co-authored-by: Donát Nagy <donat.nagy@ericsson.com>
This commit is contained in:
Arseniy Zaostrovnykh
2025-10-13 17:18:27 +02:00
committed by GitHub
parent 1db148cc94
commit bb3b0203c0
7 changed files with 259 additions and 45 deletions

View File

@@ -22,7 +22,7 @@ However, note that with ``LLVM_ENABLE_STATS`` disabled, only storage of the valu
If you want to define a statistic only for entry point, EntryPointStats.h has four classes at your disposal:
- ``UnsignedEPStat`` - an unsigned value assigned at most once per entry point. For example: "the number of source characters in an entry-point body".
- ``UnsignedEPStat`` - an unsigned value assigned at most once per entry point. For example: "the number of source characters in an entry-point body". If no value is assigned during analysis of an entry point, the corresponding CSV cell will be empty.
- ``CounterEPStat`` - an additive statistic. It starts with 0 and you can add to it as many times as needed. For example: "the number of bugs discovered".
- ``UnsignedMaxEPStat`` - a maximizing statistic. It starts with 0 and when you join it with a value, it picks the maximum of the previous value and the new one. For example, "the longest execution path of a bug".

View File

@@ -9,6 +9,7 @@
#ifndef CLANG_INCLUDE_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_ENTRYPOINTSTATS_H
#define CLANG_INCLUDE_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_ENTRYPOINTSTATS_H
#include "clang/AST/ASTContext.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -25,7 +26,7 @@ class EntryPointStat {
public:
llvm::StringLiteral name() const { return Name; }
static void lockRegistry(llvm::StringRef CPPFileName);
static void lockRegistry(llvm::StringRef CPPFileName, ASTContext &Ctx);
static void takeSnapshot(const Decl *EntryPoint);
static void dumpStatsAsCSV(llvm::raw_ostream &OS);
@@ -85,7 +86,7 @@ class UnsignedEPStat : public EntryPointStat {
public:
explicit UnsignedEPStat(llvm::StringLiteral Name);
unsigned value() const { return Value.value_or(0); }
std::optional<unsigned> value() const { return Value; }
void reset() { Value.reset(); }
void set(unsigned V) {
assert(!Value.has_value());

View File

@@ -24,15 +24,21 @@ using namespace ento;
namespace {
struct Registry {
std::vector<UnsignedEPStat *> ExplicitlySetStats;
std::vector<UnsignedMaxEPStat *> MaxStats;
std::vector<CounterEPStat *> CounterStats;
std::vector<UnsignedMaxEPStat *> UnsignedMaxStats;
std::vector<UnsignedEPStat *> UnsignedStats;
bool IsLocked = false;
struct Snapshot {
const Decl *EntryPoint;
std::vector<unsigned> UnsignedStatValues;
// Explicitly set statistics may not have a value set, so they are separate
// from other unsigned statistics
std::vector<std::optional<unsigned>> ExplicitlySetStatValues;
// These are counting and maximizing statistics that initialize to 0, which
// is meaningful even if they are never updated, so their value is always
// present.
std::vector<unsigned> MaxOrCountStatValues;
void dumpAsCSV(llvm::raw_ostream &OS) const;
};
@@ -46,10 +52,16 @@ static llvm::ManagedStatic<Registry> StatsRegistry;
namespace {
template <typename Callback> void enumerateStatVectors(const Callback &Fn) {
// This order is important, it matches the order of the Snapshot fields:
// - ExplicitlySetStatValues
Fn(StatsRegistry->ExplicitlySetStats);
// - MaxOrCountStatValues
Fn(StatsRegistry->MaxStats);
Fn(StatsRegistry->CounterStats);
Fn(StatsRegistry->UnsignedMaxStats);
Fn(StatsRegistry->UnsignedStats);
}
void clearSnapshots(void *) { StatsRegistry->Snapshots.clear(); }
} // namespace
static void checkStatName(const EntryPointStat *M) {
@@ -69,7 +81,8 @@ static void checkStatName(const EntryPointStat *M) {
}
}
void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) {
void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName,
ASTContext &Ctx) {
auto CmpByNames = [](const EntryPointStat *L, const EntryPointStat *R) {
return L->name() < R->name();
};
@@ -80,6 +93,10 @@ void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) {
StatsRegistry->IsLocked = true;
llvm::raw_string_ostream OS(StatsRegistry->EscapedCPPFileName);
llvm::printEscapedString(CPPFileName, OS);
// Make sure snapshots (that reference function Decl's) do not persist after
// the AST is destroyed. This is especially relevant in the context of unit
// tests that construct and destruct multiple ASTs in the same process.
Ctx.AddDeallocation(clearSnapshots, nullptr);
}
[[maybe_unused]] static bool isRegistered(llvm::StringLiteral Name) {
@@ -101,33 +118,39 @@ UnsignedMaxEPStat::UnsignedMaxEPStat(llvm::StringLiteral Name)
: EntryPointStat(Name) {
assert(!StatsRegistry->IsLocked);
assert(!isRegistered(Name));
StatsRegistry->UnsignedMaxStats.push_back(this);
StatsRegistry->MaxStats.push_back(this);
}
UnsignedEPStat::UnsignedEPStat(llvm::StringLiteral Name)
: EntryPointStat(Name) {
assert(!StatsRegistry->IsLocked);
assert(!isRegistered(Name));
StatsRegistry->UnsignedStats.push_back(this);
StatsRegistry->ExplicitlySetStats.push_back(this);
}
static std::vector<unsigned> consumeUnsignedStats() {
static std::vector<std::optional<unsigned>> consumeExplicitlySetStats() {
std::vector<std::optional<unsigned>> Result;
Result.reserve(StatsRegistry->ExplicitlySetStats.size());
for (auto *M : StatsRegistry->ExplicitlySetStats) {
Result.push_back(M->value());
M->reset();
}
return Result;
}
static std::vector<unsigned> consumeMaxAndCounterStats() {
std::vector<unsigned> Result;
Result.reserve(StatsRegistry->CounterStats.size() +
StatsRegistry->UnsignedMaxStats.size() +
StatsRegistry->UnsignedStats.size());
StatsRegistry->MaxStats.size());
// Order is important, it must match the order in enumerateStatVectors
for (auto *M : StatsRegistry->MaxStats) {
Result.push_back(M->value());
M->reset();
}
for (auto *M : StatsRegistry->CounterStats) {
Result.push_back(M->value());
M->reset();
}
for (auto *M : StatsRegistry->UnsignedMaxStats) {
Result.push_back(M->value());
M->reset();
}
for (auto *M : StatsRegistry->UnsignedStats) {
Result.push_back(M->value());
M->reset();
}
return Result;
}
@@ -150,20 +173,33 @@ static std::string getUSR(const Decl *D) {
}
void Registry::Snapshot::dumpAsCSV(llvm::raw_ostream &OS) const {
auto PrintAsUnsignOpt = [&OS](std::optional<unsigned> U) {
OS << (U.has_value() ? std::to_string(*U) : "");
};
auto CommaIfNeeded = [&OS](const auto &Vec1, const auto &Vec2) {
if (!Vec1.empty() && !Vec2.empty())
OS << ",";
};
auto PrintAsUnsigned = [&OS](unsigned U) { OS << U; };
OS << '"';
llvm::printEscapedString(getUSR(EntryPoint), OS);
OS << "\",\"";
OS << StatsRegistry->EscapedCPPFileName << "\",\"";
llvm::printEscapedString(
clang::AnalysisDeclContext::getFunctionName(EntryPoint), OS);
OS << "\"";
OS << (UnsignedStatValues.empty() ? "" : ",");
llvm::interleave(UnsignedStatValues, OS, [&OS](unsigned U) { OS << U; }, ",");
OS << "\",";
llvm::interleave(ExplicitlySetStatValues, OS, PrintAsUnsignOpt, ",");
CommaIfNeeded(ExplicitlySetStatValues, MaxOrCountStatValues);
llvm::interleave(MaxOrCountStatValues, OS, PrintAsUnsigned, ",");
}
void EntryPointStat::takeSnapshot(const Decl *EntryPoint) {
auto UnsignedValues = consumeUnsignedStats();
StatsRegistry->Snapshots.push_back({EntryPoint, std::move(UnsignedValues)});
auto ExplicitlySetValues = consumeExplicitlySetStats();
auto MaxOrCounterValues = consumeMaxAndCounterStats();
StatsRegistry->Snapshots.push_back({EntryPoint,
std::move(ExplicitlySetValues),
std::move(MaxOrCounterValues)});
}
void EntryPointStat::dumpStatsAsCSV(llvm::StringRef FileName) {

View File

@@ -39,6 +39,7 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
#include <memory>
#include <utility>
@@ -125,6 +126,7 @@ public:
std::unique_ptr<llvm::Timer> SyntaxCheckTimer;
std::unique_ptr<llvm::Timer> ExprEngineTimer;
std::unique_ptr<llvm::Timer> BugReporterTimer;
bool ShouldClearTimersToPreventDisplayingThem;
/// The information about analyzed functions shared throughout the
/// translation unit.
@@ -138,11 +140,12 @@ public:
Injector(std::move(injector)), CTU(CI),
MacroExpansions(CI.getLangOpts()) {
EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation()));
EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation()),
CI.getASTContext());
DigestAnalyzerOptions();
if (Opts.AnalyzerDisplayProgress || Opts.PrintStats ||
Opts.ShouldSerializeStats) {
Opts.ShouldSerializeStats || !Opts.DumpEntryPointStatsToCSV.empty()) {
AnalyzerTimers = std::make_unique<llvm::TimerGroup>(
"analyzer", "Analyzer timers");
SyntaxCheckTimer = std::make_unique<llvm::Timer>(
@@ -154,6 +157,12 @@ public:
*AnalyzerTimers);
}
// Avoid displaying the timers created above in case we only want to record
// per-entry-point stats.
ShouldClearTimersToPreventDisplayingThem = !Opts.AnalyzerDisplayProgress &&
!Opts.PrintStats &&
!Opts.ShouldSerializeStats;
if (Opts.PrintStats || Opts.ShouldSerializeStats) {
llvm::EnableStatistics(/* DoPrintOnExit= */ false);
}
@@ -276,6 +285,9 @@ public:
checkerMgr->runCheckersOnASTDecl(D, *Mgr, *RecVisitorBR);
if (SyntaxCheckTimer)
SyntaxCheckTimer->stopTimer();
if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) {
AnalyzerTimers->clear();
}
}
return true;
}
@@ -569,6 +581,9 @@ void AnalysisConsumer::runAnalysisOnTranslationUnit(ASTContext &C) {
checkerMgr->runCheckersOnASTDecl(TU, *Mgr, BR);
if (SyntaxCheckTimer)
SyntaxCheckTimer->stopTimer();
if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) {
AnalyzerTimers->clear();
}
// Run the AST-only checks using the order in which functions are defined.
// If inlining is not turned on, use the simplest function order for path
@@ -745,6 +760,9 @@ void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode,
llvm::TimeRecord CheckerEndTime = SyntaxCheckTimer->getTotalTime();
CheckerEndTime -= CheckerStartTime;
DisplayTime(CheckerEndTime);
if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) {
AnalyzerTimers->clear();
}
}
}
@@ -788,7 +806,12 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
ExprEngineTimer->stopTimer();
llvm::TimeRecord ExprEngineEndTime = ExprEngineTimer->getTotalTime();
ExprEngineEndTime -= ExprEngineStartTime;
PathRunningTime.set(static_cast<unsigned>(
std::lround(ExprEngineEndTime.getWallTime() * 1000)));
DisplayTime(ExprEngineEndTime);
if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) {
AnalyzerTimers->clear();
}
}
if (!Mgr->options.DumpExplodedGraphTo.empty())
@@ -799,6 +822,9 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
Eng.ViewGraph(Mgr->options.TrimGraph);
flushReports(BugReporterTimer.get(), Eng.getBugReporter());
if (AnalyzerTimers && ShouldClearTimersToPreventDisplayingThem) {
AnalyzerTimers->clear();
}
}
//===----------------------------------------------------------------------===//

View File

@@ -8,6 +8,13 @@
// CHECK-NEXT: "c:@F@fib#i#": {
// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
// CHECK-NEXT: "DebugName": "fib(unsigned int)",
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}",
// CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxCFGSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxTimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",
@@ -33,18 +40,18 @@
// CHECK-NEXT: "NumTimesZ3SpendsTooMuchTimeOnASingleEQClass": "{{[0-9]+}}",
// CHECK-NEXT: "NumTimesZ3TimedOut": "{{[0-9]+}}",
// CHECK-NEXT: "NumZ3QueriesDone": "{{[0-9]+}}",
// CHECK-NEXT: "TimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxCFGSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxTimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}"
// CHECK-NEXT: "TimeSpentSolvingZ3Queries": "{{[0-9]+}}"
// CHECK-NEXT: },
// CHECK-NEXT: "c:@F@main#I#**C#": {
// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
// CHECK-NEXT: "DebugName": "main(int, char **)",
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}",
// CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxCFGSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxTimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",
@@ -70,14 +77,7 @@
// CHECK-NEXT: "NumTimesZ3SpendsTooMuchTimeOnASingleEQClass": "{{[0-9]+}}",
// CHECK-NEXT: "NumTimesZ3TimedOut": "{{[0-9]+}}",
// CHECK-NEXT: "NumZ3QueriesDone": "{{[0-9]+}}",
// CHECK-NEXT: "TimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxCFGSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxQueueSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxReachableSize": "{{[0-9]+}}",
// CHECK-NEXT: "MaxTimeSpentSolvingZ3Queries": "{{[0-9]+}}",
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}"
// CHECK-NEXT: "TimeSpentSolvingZ3Queries": "{{[0-9]+}}"
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NOT: non_entry_point

View File

@@ -20,6 +20,7 @@ add_clang_unittest(StaticAnalysisTests
SValSimplifyerTest.cpp
SValTest.cpp
TestReturnValueUnderConstruction.cpp
UnsignedStatDemo.cpp
Z3CrosscheckOracleTest.cpp
CLANG_LIBS
clangBasic

View File

@@ -0,0 +1,150 @@
//=== UnsignedStatDemo.cpp --------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This checker demonstrates the use of UnsignedEPStat for per-entry-point
// statistics. It conditionally sets a statistic based on the entry point name.
//
//===----------------------------------------------------------------------===//
#include "CheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/MemoryBuffer.h"
#include "gtest/gtest.h"
#include <optional>
using namespace clang;
using namespace ento;
static UnsignedEPStat DemoStat("DemoStat");
namespace {
class UnsignedStatTesterChecker : public Checker<check::BeginFunction> {
public:
void checkBeginFunction(CheckerContext &C) const {
StringRef Name;
if (const Decl *D = C.getLocationContext()->getDecl())
if (const FunctionDecl *F = D->getAsFunction())
Name = F->getName();
// Conditionally set the statistic based on the function name (leaving it
// undefined for all other functions)
if (Name == "func_one")
DemoStat.set(1);
else if (Name == "func_two")
DemoStat.set(2);
else
; // For any other function (e.g., "func_none") don't set the statistic
}
};
void addUnsignedStatTesterChecker(AnalysisASTConsumer &AnalysisConsumer,
AnalyzerOptions &AnOpts) {
AnOpts.CheckersAndPackages = {{"test.DemoStatChecker", true}};
AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
Registry.addChecker<UnsignedStatTesterChecker>(
"test.DemoStatChecker", "DescriptionOfDemoStatChecker");
});
}
// Find the index of a column in the CSV header.
// Returns std::nullopt if the column is not found.
static std::optional<unsigned>
findColumnIndex(llvm::ArrayRef<llvm::StringRef> Header,
llvm::StringRef ColumnName) {
auto Iter = llvm::find(Header, ColumnName);
if (Iter != Header.end())
return std::distance(Header.begin(), Iter);
return std::nullopt;
}
// Parse CSV content and extract a mapping from one column to another.
// KeyColumn is used as the map key (e.g., "DebugName").
// ValueColumn is used as the map value (e.g., "DemoStat").
// Returns a map from key column values to value column values.
static llvm::StringMap<std::string>
parseCSVColumnMapping(llvm::StringRef CSVContent, llvm::StringRef KeyColumn,
llvm::StringRef ValueColumn) {
llvm::StringMap<std::string> Result;
// Parse CSV: first line is header, subsequent lines are data
llvm::SmallVector<llvm::StringRef, 8> Lines;
CSVContent.split(Lines, '\n', -1, false);
if (Lines.size() < 2) // Need at least header + one data row
return Result;
// Parse header to find column indices
llvm::SmallVector<llvm::StringRef, 32> Header;
Lines[0].split(Header, ',');
std::optional<unsigned> KeyIdx = findColumnIndex(Header, KeyColumn);
std::optional<unsigned> ValueIdx = findColumnIndex(Header, ValueColumn);
if (!KeyIdx || !ValueIdx)
return Result;
// Parse data rows and extract mappings
for (auto Line : llvm::drop_begin(Lines)) {
llvm::SmallVector<llvm::StringRef, 32> Row;
Line.split(Row, ',');
if (Row.size() <= std::max(*KeyIdx, *ValueIdx))
continue;
llvm::StringRef KeyVal = Row[*KeyIdx].trim().trim('"');
llvm::StringRef ValueVal = Row[*ValueIdx].trim().trim('"');
if (!KeyVal.empty())
Result[KeyVal] = ValueVal.str();
}
return Result;
}
TEST(UnsignedStat, ExplicitlySetUnsignedStatistic) {
llvm::SmallString<128> TempMetricsCsvPath;
std::error_code EC =
llvm::sys::fs::createTemporaryFile("ep_stats", "csv", TempMetricsCsvPath);
ASSERT_FALSE(EC);
std::vector<std::string> Args = {
"-Xclang", "-analyzer-config", "-Xclang",
std::string("dump-entry-point-stats-to-csv=") +
TempMetricsCsvPath.str().str()};
// Clean up on exit
auto Cleanup = llvm::make_scope_exit(
[&]() { llvm::sys::fs::remove(TempMetricsCsvPath); });
EXPECT_TRUE(runCheckerOnCodeWithArgs<addUnsignedStatTesterChecker>(
R"cpp(
void func_one() {}
void func_two() {}
void func_none() {}
)cpp",
Args));
auto BufferOrError = llvm::MemoryBuffer::getFile(TempMetricsCsvPath);
ASSERT_TRUE(BufferOrError);
llvm::StringRef CSVContent = BufferOrError.get()->getBuffer();
// Parse the CSV and extract function statistics
llvm::StringMap<std::string> FunctionStats =
parseCSVColumnMapping(CSVContent, "DebugName", "DemoStat");
// Verify the expected values
ASSERT_TRUE(FunctionStats.count("func_one()"));
EXPECT_EQ(FunctionStats["func_one()"], "1");
ASSERT_TRUE(FunctionStats.count("func_two()"));
EXPECT_EQ(FunctionStats["func_two()"], "2");
ASSERT_TRUE(FunctionStats.count("func_none()"));
EXPECT_EQ(FunctionStats["func_none()"], ""); // Not set, should be empty
}
} // namespace