[mlir][tblgen] Don't echo absolute paths into rewrite pattern source (#168984)

Currently, the declarative pattern rewrite generator will always print
the [source]:[line](s) from which a pattern came. This is a useful
debugging hint, but it causes problem when absolute paths are used as
arguments to mlir-tblgen (which LLVM's build rules automatically do).
Specifially, it causes the source to be tied to the build location,
harning reproducability and our collective ability to get ccache hits
from, say, separate worktrees.

This commit resolves the issue by replacing absolute paths in thes
"Generated from:" comments with their filenames. (The alternative would
have been to implement an entire file-prefix-map the way the C compilers
do, but since this is an isolated incident, I chose to resolve it
locally.)
This commit is contained in:
Krzysztof Drewniak
2025-11-25 11:30:43 -08:00
committed by GitHub
parent 0917a38c69
commit af0fcf85c8
3 changed files with 22 additions and 7 deletions

View File

@@ -643,8 +643,10 @@ public:
using IdentifierLine = std::pair<StringRef, unsigned>;
// Returns the file location of the pattern (buffer identifier + line number
// pair).
std::vector<IdentifierLine> getLocation() const;
// pair). If `forSourceOutput` is true, replace absolute paths in the buffer
// identifier with just their filename so that we don't leak build paths into
// the generated code.
std::vector<IdentifierLine> getLocation(bool forSourceOutput = false) const;
// Recursively collects all bound symbols inside the DAG tree rooted
// at `tree` and updates the given `infoMap`.

View File

@@ -18,6 +18,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Path.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -771,15 +772,27 @@ int Pattern::getBenefit() const {
return initBenefit + dyn_cast<IntInit>(delta->getArg(0))->getValue();
}
std::vector<Pattern::IdentifierLine> Pattern::getLocation() const {
std::vector<Pattern::IdentifierLine>
Pattern::getLocation(bool forSourceOutput) const {
std::vector<std::pair<StringRef, unsigned>> result;
result.reserve(def.getLoc().size());
for (auto loc : def.getLoc()) {
unsigned buf = llvm::SrcMgr.FindBufferContainingLoc(loc);
assert(buf && "invalid source location");
result.emplace_back(
llvm::SrcMgr.getBufferInfo(buf).Buffer->getBufferIdentifier(),
llvm::SrcMgr.getLineAndColumn(loc, buf).first);
StringRef bufferName =
llvm::SrcMgr.getBufferInfo(buf).Buffer->getBufferIdentifier();
// If we're emitting a generated file, we'd like to have some indication of
// where our patterns came from. However, LLVM's build rules use absolute
// paths as arguments to TableGen, and naively echoing such paths makes the
// contents of the generated source file depend on the build location,
// making MLIR builds substantially less reproducable. As a compromise, we
// trim absolute paths back to only the filename component.
if (forSourceOutput && llvm::sys::path::is_absolute(bufferName))
bufferName = llvm::sys::path::filename(bufferName);
result.emplace_back(bufferName,
llvm::SrcMgr.getLineAndColumn(loc, buf).first);
}
return result;
}

View File

@@ -1129,7 +1129,7 @@ void PatternEmitter::emit(StringRef rewriteName) {
LLVM_DEBUG(llvm::dbgs() << "done collecting ops used in result patterns\n");
// Emit RewritePattern for Pattern.
auto locs = pattern.getLocation();
auto locs = pattern.getLocation(/*forSourceOutput=*/true);
os << formatv("/* Generated from:\n {0:$[ instantiating\n ]}\n*/\n",
llvm::reverse(locs));
os << formatv(R"(struct {0} : public ::mlir::RewritePattern {