From 30402c7dea57d35ead80387c061cda6977ba98ef Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Thu, 25 Sep 2025 13:47:51 -0400 Subject: [PATCH] [analyzer] Emit IssueHash in SARIF (#159445) This change adds two new properties to each `result` object in the SARIF log: `partialFingerprints`: Contains the "issue hash" that the analyzer already generates for each result, which can help identify a result across runs even if surrounding code changes. `hostedViewUri`: If running with `-analyzer-format=sarif-html`, this property will now be emitted with the `file:` URL of the generated HTML report for that result. Along the way, I discovered an existing bug where the HTML diagnostic consumer does not record the path to the generated report if another compilation already created that report. This caused both the SARIF and Plist consumers to be missing the link to the file in all but one of the compilations in case of a warning in a header file. I added a new test to ensure that the generated SARIF for each compilation contains the property. Finally, I made a few changes to the `normalize_sarif` processing in the tests. I switched to `sed` to allow substitutions. The normalization now removes directory components from `file:` URLs, replaces the `length` property of the source file with a constant `-1`, and puts placeholders in the values of the `version` properties rather than just deleting them. The URL transformation in particular lets us verify that the right filename is generated for each HTML report. Fixes #158159 rdar://160410408 --- clang/include/clang/Analysis/PathDiagnostic.h | 4 + clang/include/clang/Basic/Sarif.h | 15 ++ clang/lib/Analysis/PathDiagnostic.cpp | 14 ++ clang/lib/Basic/Sarif.cpp | 16 +- .../StaticAnalyzer/Core/HTMLDiagnostics.cpp | 28 ++-- .../lib/StaticAnalyzer/Core/HTMLDiagnostics.h | 14 ++ .../StaticAnalyzer/Core/PlistDiagnostics.cpp | 10 +- .../StaticAnalyzer/Core/SarifDiagnostics.cpp | 46 +++++- .../sarif-diagnostics-taint-test.c.sarif | 11 +- .../sarif-multi-diagnostic-test.c.sarif | 44 +++++- .../sarif-multi-file-diagnostics.c.sarif | 144 ++++++++++++++++++ .../sarif-multi-file-diagnostics.c | 12 ++ clang/test/Analysis/lit.local.cfg | 12 +- 13 files changed, 335 insertions(+), 35 deletions(-) create mode 100644 clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.h create mode 100644 clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-file-diagnostics.c.sarif create mode 100644 clang/test/Analysis/diagnostics/sarif-multi-file-diagnostics.c diff --git a/clang/include/clang/Analysis/PathDiagnostic.h b/clang/include/clang/Analysis/PathDiagnostic.h index 5907df022e44..197920d4cd10 100644 --- a/clang/include/clang/Analysis/PathDiagnostic.h +++ b/clang/include/clang/Analysis/PathDiagnostic.h @@ -885,6 +885,10 @@ public: return UniqueingDecl; } + /// Get a hash that identifies the issue. + SmallString<32> getIssueHash(const SourceManager &SrcMgr, + const LangOptions &LangOpts) const; + void flattenLocations() { Loc.flatten(); for (const auto &I : pathImpl) diff --git a/clang/include/clang/Basic/Sarif.h b/clang/include/clang/Basic/Sarif.h index e6c46224b316..a88d1ee2965a 100644 --- a/clang/include/clang/Basic/Sarif.h +++ b/clang/include/clang/Basic/Sarif.h @@ -322,6 +322,8 @@ class SarifResult { uint32_t RuleIdx; std::string RuleId; std::string DiagnosticMessage; + std::string HostedViewerURI; + llvm::SmallDenseMap PartialFingerprints; llvm::SmallVector Locations; llvm::SmallVector ThreadFlows; std::optional LevelOverride; @@ -347,6 +349,11 @@ public: return *this; } + SarifResult setHostedViewerURI(llvm::StringRef URI) { + HostedViewerURI = URI.str(); + return *this; + } + SarifResult setLocations(llvm::ArrayRef DiagLocs) { #ifndef NDEBUG for (const auto &Loc : DiagLocs) { @@ -366,6 +373,12 @@ public: LevelOverride = TheLevel; return *this; } + + SarifResult addPartialFingerprint(llvm::StringRef key, + llvm::StringRef value) { + PartialFingerprints[key] = value; + return *this; + } }; /// This class handles creating a valid SARIF document given various input @@ -475,6 +488,8 @@ public: /// reported diagnostics, resulting in an expensive call. llvm::json::Object createDocument(); + static std::string fileNameToURI(llvm::StringRef Filename); + private: /// Source Manager to use for the current SARIF document. const SourceManager &SourceMgr; diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp index ef24efd3c4bd..e42731b93bfb 100644 --- a/clang/lib/Analysis/PathDiagnostic.cpp +++ b/clang/lib/Analysis/PathDiagnostic.cpp @@ -24,6 +24,7 @@ #include "clang/AST/Type.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" +#include "clang/Analysis/IssueHash.h" #include "clang/Analysis/ProgramPoint.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -1075,6 +1076,19 @@ unsigned PathDiagnostic::full_size() { return size; } +SmallString<32> +PathDiagnostic::getIssueHash(const SourceManager &SrcMgr, + const LangOptions &LangOpts) const { + PathDiagnosticLocation UPDLoc = getUniqueingLoc(); + FullSourceLoc FullLoc( + SrcMgr.getExpansionLoc(UPDLoc.isValid() ? UPDLoc.asLocation() + : getLocation().asLocation()), + SrcMgr); + + return clang::getIssueHash(FullLoc, getCheckerName(), getBugType(), + getDeclWithIssue(), LangOpts); +} + //===----------------------------------------------------------------------===// // FoldingSet profiling methods. //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/Sarif.cpp b/clang/lib/Basic/Sarif.cpp index 69862b73febd..b3fb9a21249e 100644 --- a/clang/lib/Basic/Sarif.cpp +++ b/clang/lib/Basic/Sarif.cpp @@ -67,7 +67,7 @@ static std::string percentEncodeURICharacter(char C) { /// \param Filename The filename to be represented as URI. /// /// \return RFC3986 URI representing the input file name. -static std::string fileNameToURI(StringRef Filename) { +std::string SarifDocumentWriter::fileNameToURI(StringRef Filename) { SmallString<32> Ret = StringRef("file://"); // Get the root name to see if it has a URI authority. @@ -391,6 +391,11 @@ void SarifDocumentWriter::appendResult(const SarifResult &Result) { json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, {"ruleIndex", static_cast(RuleIdx)}, {"ruleId", Rule.Id}}; + + if (!Result.HostedViewerURI.empty()) { + Ret["hostedViewerUri"] = Result.HostedViewerURI; + } + if (!Result.Locations.empty()) { json::Array Locs; for (auto &Range : Result.Locations) { @@ -398,6 +403,15 @@ void SarifDocumentWriter::appendResult(const SarifResult &Result) { } Ret["locations"] = std::move(Locs); } + + if (!Result.PartialFingerprints.empty()) { + json::Object fingerprints = {}; + for (auto &pair : Result.PartialFingerprints) { + fingerprints[pair.first] = pair.second; + } + Ret["partialFingerprints"] = std::move(fingerprints); + } + if (!Result.ThreadFlows.empty()) Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 4c9c619f2487..217b853305ed 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "HTMLDiagnostics.h" #include "PlistDiagnostics.h" #include "SarifDiagnostics.h" #include "clang/AST/Decl.h" @@ -82,7 +83,7 @@ public: void FlushDiagnosticsImpl(std::vector &Diags, FilesMade *filesMade) override; - StringRef getName() const override { return "HTMLDiagnostics"; } + StringRef getName() const override { return HTML_DIAGNOSTICS_NAME; } bool supportsCrossFileDiagnostics() const override { return SupportsCrossFileDiagnostics; @@ -254,18 +255,6 @@ void HTMLDiagnostics::FlushDiagnosticsImpl( ReportDiag(*Diag, filesMade); } -static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D, - const Preprocessor &PP) { - SourceManager &SMgr = PP.getSourceManager(); - PathDiagnosticLocation UPDLoc = D.getUniqueingLoc(); - FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid() - ? UPDLoc.asLocation() - : D.getLocation().asLocation()), - SMgr); - return getIssueHash(L, D.getCheckerName(), D.getBugType(), - D.getDeclWithIssue(), PP.getLangOpts()); -} - void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, FilesMade *filesMade) { // Create the HTML directory if it is missing. @@ -310,7 +299,8 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, } } - SmallString<32> IssueHash = getIssueHash(D, PP); + SmallString<32> IssueHash = + D.getIssueHash(PP.getSourceManager(), PP.getLangOpts()); auto [It, IsNew] = EmittedHashes.insert(IssueHash); if (!IsNew) { // We've already emitted a duplicate issue. It'll get overwritten anyway. @@ -369,6 +359,12 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, if (EC != llvm::errc::file_exists) { llvm::errs() << "warning: could not create file in '" << Directory << "': " << EC.message() << '\n'; + } else if (filesMade) { + // Record that we created the file so that it gets referenced in the + // plist and SARIF reports for every translation unit that found the + // issue. + filesMade->addDiagnostic(D, getName(), + llvm::sys::path::filename(ResultPath)); } return; } @@ -679,8 +675,8 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic &D, Rewriter &R, os << "\n\n"; - os << "\n\n"; + os << "\n\n"; os << "\n