Files
llvm/polly/lib/CodeGen/CodeGeneration.cpp
Michael Kruse 7a0f7dbf2d [Polly] Introduce PhaseManager and remove LPM support (#125442) (#167560)
Reapply of a22d1c2225. Using this PR for
pre-merge CI.

Instead of relying on any pass manager to schedule Polly's passes, add
Polly's own pipeline manager which is seen as a monolithic pass in
LLVM's pass manager. Polly's former passes are now phases of the new
PhaseManager component.

Relying on LLVM's pass manager (the legacy as well as the New Pass
Manager) to manage Polly's phases never was a good fit that the
PhaseManager resolves:

* Polly passes were modifying analysis results, in particular RegionInfo
and ScopInfo. This means that there was not just one unique and
"definite" analysis result, the actual result depended on which analyses
ran prior, and the pass manager was not allowed to throw away cached
analyses or prior SCoP optimizations would have been forgotten. The LLVM
pass manger's persistance of analysis results is not contractual but
designed for caching.

* Polly depends on a particular execution order of passes and regions
(e.g. regression tests, invalidation of consecutive SCoPs). LLVM's pass
manager does not guarantee any excecution order.

* Polly does not completely preserve DominatorTree, RegionInfo,
LoopInfo, or ScalarEvolution, but only as-needed for Polly's own uses.
Because the ScopDetection object stores references to those analyses, it
still had to lie to the pass manager that they would be preserved, or
the pass manager would have released and recomputed the invalidated
analysis objects that ScopDetection/ScopInfo was still referencing. To
ensure that no non-Polly pass would see these not-completely-preserved
analyses, all analyses still had to be thrown away after the
ScopPassManager, respectively with a BarrierNoopPass in case of the LPM.
 
* The NPM's PassInstrumentation wraps the IR unit into an `llvm::Any`
object, but implementations such as PrintIRInstrumentation call
llvm_unreachable on encountering an unknown IR unit, such as SCoPs, with
no extension points to add support. Hence LLVM crashes when dumping IR
between SCoP passes (such as `-print-before-changed` with Polly being
active).

The new PhaseManager uses some command line options that previously
belonged to Polly's legacy passes, such as `-polly-print-detect` (so the
option will continue to work). Hence the LPM support is incompatible
with the new approach and support for it is removed.
2025-11-14 00:45:54 +01:00

336 lines
13 KiB
C++

//===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
// back to LLVM-IR using the ISL code generator.
//
// The Scop describes the high level memory behavior of a control flow region.
// Transformation passes can update the schedule (execution order) of statements
// in the Scop. ISL is used to generate an abstract syntax tree that reflects
// the updated execution order. This clast is used to create new LLVM-IR that is
// computationally equivalent to the original control flow region, but executes
// its code in the new execution order defined by the changed schedule.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/IRBuilder.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/PerfMonitor.h"
#include "polly/CodeGen/Utils.h"
#include "polly/DependenceInfo.h"
#include "polly/Options.h"
#include "polly/ScopInfo.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "isl/ast.h"
#include <cassert>
using namespace llvm;
using namespace polly;
#include "polly/Support/PollyDebug.h"
#define DEBUG_TYPE "polly-codegen"
static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::cat(PollyCategory));
bool polly::PerfMonitoring;
static cl::opt<bool, true>
XPerfMonitoring("polly-codegen-perf-monitoring",
cl::desc("Add run-time performance monitoring"), cl::Hidden,
cl::location(polly::PerfMonitoring),
cl::cat(PollyCategory));
STATISTIC(ScopsProcessed, "Number of SCoP processed");
STATISTIC(CodegenedScops, "Number of successfully generated SCoPs");
STATISTIC(CodegenedAffineLoops,
"Number of original affine loops in SCoPs that have been generated");
STATISTIC(CodegenedBoxedLoops,
"Number of original boxed loops in SCoPs that have been generated");
namespace polly {
/// Mark a basic block unreachable.
///
/// Marks the basic block @p Block unreachable by equipping it with an
/// UnreachableInst.
void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
auto OrigTerminator = Block.getTerminator()->getIterator();
Builder.SetInsertPoint(&Block, OrigTerminator);
Builder.CreateUnreachable();
OrigTerminator->eraseFromParent();
}
} // namespace polly
static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) {
if (!Verify || !verifyFunction(F, &errs()))
return;
POLLY_DEBUG({
errs() << "== ISL Codegen created an invalid function ==\n\n== The "
"SCoP ==\n";
errs() << S;
errs() << "\n== The isl AST ==\n";
AI.print(errs());
errs() << "\n== The invalid function ==\n";
F.print(errs());
});
llvm_unreachable("Polly generated function could not be verified. Add "
"-polly-codegen-verify=false to disable this assertion.");
}
// CodeGeneration adds a lot of BBs without updating the RegionInfo
// We make all created BBs belong to the scop's parent region without any
// nested structure to keep the RegionInfo verifier happy.
static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) {
for (BasicBlock &BB : F) {
if (RI.getRegionFor(&BB))
continue;
RI.setRegionFor(&BB, &ParentRegion);
}
}
/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
/// @R.
///
/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
/// which would leave the them only in the original path. This can transform
/// code such as
///
/// llvm.lifetime.start(%p)
/// llvm.lifetime.end(%p)
///
/// into
///
/// if (RTC) {
/// // generated code
/// } else {
/// // original code
/// llvm.lifetime.start(%p)
/// }
/// llvm.lifetime.end(%p)
///
/// The current StackColoring algorithm cannot handle if some, but not all,
/// paths from the end marker to the entry block cross the start marker. Same
/// for start markers that do not always cross the end markers. We avoid any
/// issues by removing all lifetime markers, even from the original code.
///
/// A better solution could be to hoist all llvm.lifetime.start to the split
/// node and all llvm.lifetime.end to the merge node, which should be
/// conservatively correct.
static void removeLifetimeMarkers(Region *R) {
for (auto *BB : R->blocks()) {
auto InstIt = BB->begin();
auto InstEnd = BB->end();
while (InstIt != InstEnd) {
auto NextIt = InstIt;
++NextIt;
if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
switch (IT->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
IT->eraseFromParent();
break;
default:
break;
}
}
InstIt = NextIt;
}
}
}
static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
DominatorTree &DT, ScalarEvolution &SE,
RegionInfo &RI) {
// Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen
// reports itself to preserve DependenceInfo and IslAstInfo, we might get
// those analysis that were computed by a different ScopInfo for a different
// Scop structure. When the ScopInfo/Scop object is freed, there is a high
// probability that the new ScopInfo/Scop object will be created at the same
// heap position with the same address. Comparing whether the Scop or ScopInfo
// address is the expected therefore is unreliable.
// Instead, we compare the address of the isl_ctx object. Both, DependenceInfo
// and IslAstInfo must hold a reference to the isl_ctx object to ensure it is
// not freed before the destruction of those analyses which might happen after
// the destruction of the Scop/ScopInfo they refer to. Hence, the isl_ctx
// will not be freed and its space not reused as long there is a
// DependenceInfo or IslAstInfo around.
IslAst &Ast = AI.getIslAst();
if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) {
POLLY_DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n");
return false;
}
// Check if we created an isl_ast root node, otherwise exit.
isl::ast_node AstRoot = Ast.getAst();
if (AstRoot.is_null())
return false;
// Collect statistics. Do it before we modify the IR to avoid having it any
// influence on the result.
auto ScopStats = S.getStatistics();
ScopsProcessed++;
auto &DL = S.getFunction().getDataLayout();
Region *R = &S.getRegion();
assert(!R->isTopLevelRegion() && "Top level regions are not supported");
ScopAnnotator Annotator;
simplifyRegion(R, &DT, &LI, &RI);
assert(R->isSimple());
BasicBlock *EnteringBB = S.getEnteringBlock();
assert(EnteringBB);
PollyIRBuilder Builder(EnteringBB->getContext(), ConstantFolder(),
IRInserter(Annotator));
Builder.SetInsertPoint(EnteringBB,
EnteringBB->getTerminator()->getIterator());
// Only build the run-time condition and parameters _after_ having
// introduced the conditional branch. This is important as the conditional
// branch will guard the original scop from new induction variables that
// the SCEVExpander may introduce while code generating the parameters and
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
BBPair StartExitBlocks =
std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
removeLifetimeMarkers(R);
auto *SplitBlock = StartBlock->getSinglePredecessor();
IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
// All arrays must have their base pointers known before
// ScopAnnotator::buildAliasScopes.
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
P.insertRegionStart(SplitBlock->getTerminator());
BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor();
P.insertRegionEnd(MergeBlock->getTerminator());
}
// First generate code for the hoisted invariant loads and transitively the
// parameters they reference. Afterwards, for the remaining parameters that
// might reference the hoisted loads. Finally, build the runtime check
// that might reference both hoisted loads as well as parameters.
// If the hoisting fails we have to bail and execute the original code.
Builder.SetInsertPoint(SplitBlock,
SplitBlock->getTerminator()->getIterator());
if (!NodeBuilder.preloadInvariantLoads()) {
// Patch the introduced branch condition to ensure that we always execute
// the original SCoP.
auto *FalseI1 = Builder.getFalse();
auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
SplitBBTerm->setOperand(0, FalseI1);
// Since the other branch is hence ignored we mark it as unreachable and
// adjust the dominator tree accordingly.
auto *ExitingBlock = StartBlock->getUniqueSuccessor();
assert(ExitingBlock);
auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
assert(MergeBlock);
markBlockUnreachable(*StartBlock, Builder);
markBlockUnreachable(*ExitingBlock, Builder);
auto *ExitingBB = S.getExitingBlock();
assert(ExitingBB);
DT.changeImmediateDominator(MergeBlock, ExitingBB);
DT.eraseNode(ExitingBlock);
} else {
NodeBuilder.addParameters(S.getContext().release());
Value *RTC = NodeBuilder.createRTC(AI.getRunCondition().release());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
auto *CI = dyn_cast<ConstantInt>(RTC);
// The code below annotates the "llvm.loop.vectorize.enable" to false
// for the code flow taken when RTCs fail. Because we don't want the
// Loop Vectorizer to come in later and vectorize the original fall back
// loop when Polly is enabled. This avoids loop versioning on fallback
// loop by Loop Vectorizer. Don't do this when Polly's RTC value is
// false (due to code generation failure), as we are left with only one
// version of Loop.
if (!(CI && CI->isZero())) {
for (Loop *L : LI.getLoopsInPreorder()) {
if (S.contains(L))
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0);
}
}
// Explicitly set the insert point to the end of the block to avoid that a
// split at the builder's current
// insert position would move the malloc calls to the wrong BasicBlock.
// Ideally we would just split the block during allocation of the new
// arrays, but this would break the assumption that there are no blocks
// between polly.start and polly.exiting (at this point).
Builder.SetInsertPoint(StartBlock,
StartBlock->getTerminator()->getIterator());
NodeBuilder.create(AstRoot.release());
NodeBuilder.finalize();
fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
CodegenedScops++;
CodegenedAffineLoops += ScopStats.NumAffineLoops;
CodegenedBoxedLoops += ScopStats.NumBoxedLoops;
}
Function *F = EnteringBB->getParent();
verifyGeneratedFunction(S, *F, AI);
for (auto *SubF : NodeBuilder.getParallelSubfunctions())
verifyGeneratedFunction(S, *SubF, AI);
// Mark the function such that we run additional cleanup passes on this
// function (e.g. mem2reg to rediscover phi nodes).
F->addFnAttr("polly-optimized");
return true;
}
PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM,
ScopStandardAnalysisResults &AR,
SPMUpdater &U) {
auto &AI = SAM.getResult<IslAstAnalysis>(S, AR);
if (generateCode(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) {
U.invalidateScop(S);
return PreservedAnalyses::none();
}
return PreservedAnalyses::all();
}
bool polly::runCodeGeneration(Scop &S, RegionInfo &RI, IslAstInfo &AI) {
return generateCode(S, AI, *S.getLI(), *S.getDT(), *S.getSE(), RI);
}