Files
llvm/polly/lib/CodeGen/CodeGeneration.cpp

317 lines
11 KiB
C++
Raw Normal View History

//===------ CodeGeneration.cpp - Code generate the Scops using ISL. ----======//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
// back to LLVM-IR using the ISL code generator.
//
// The Scop describes the high level memory behaviour of a control flow region.
// Transformation passes can update the schedule (execution order) of statements
// in the Scop. ISL is used to generate an abstract syntax tree that reflects
// the updated execution order. This clast is used to create new LLVM-IR that is
// computationally equivalent to the original control flow region, but executes
// its code in the new execution order defined by the changed schedule.
//
//===----------------------------------------------------------------------===//
2013-05-07 08:11:54 +00:00
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/PerfMonitor.h"
#include "polly/CodeGen/Utils.h"
#include "polly/DependenceInfo.h"
2013-05-07 08:11:54 +00:00
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
2013-05-07 08:11:54 +00:00
#include "polly/ScopInfo.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
using namespace polly;
using namespace llvm;
#define DEBUG_TYPE "polly-codegen"
static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool>
PerfMonitoring("polly-codegen-perf-monitoring",
cl::desc("Add run-time performance monitoring"), cl::Hidden,
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
namespace {
class CodeGeneration : public ScopPass {
public:
static char ID;
CodeGeneration() : ScopPass(ID) {}
/// The datalayout used
Add OpenMP code generation to isl backend This backend supports besides the classical code generation the upcoming SCEV based code generation (which the existing CLooG backend does not support robustly). OpenMP code generation in the isl backend benefits from our run-time alias checks such that the set of loops that can possibly be parallelized is a lot larger. The code was tested on LNT. We do not regress on builds without -polly-parallel. When using -polly-parallel most tests work flawlessly, but a few issues still remain and will be addressed in follow up commits. SCEV/non-SCEV codegen: - Compile time failure in ldecod and TimberWolfMC due a problem in our run-time alias check generation triggered by pointers that escape through the OpenMP subfunction (OpenMP specific). - Several execution time failures. Due to the larger set of loops that we now parallelize (compared to the classical code generation), we currently run into some timeouts in tests with a lot loops that have a low trip count and are slowed down by parallelizing them. SCEV only: - One existing failure in lencod due to llvm.org/PR21204 (not OpenMP specific) OpenMP code generation is the last feature that was only available in the CLooG backend. With the isl backend being the only one supporting features such as run-time alias checks and delinearization, we will soon switch to use the isl ast generator by the default and subsequently remove our dependency on CLooG. http://reviews.llvm.org/D5517 llvm-svn: 222088
2014-11-15 21:32:53 +00:00
const DataLayout *DL;
/// @name The analysis passes we need to generate code.
///
///{
LoopInfo *LI;
IslAstInfo *AI;
DominatorTree *DT;
ScalarEvolution *SE;
Revise the simplification of regions The previous code had several problems: For newly created BasicBlocks it did not (always) call RegionInfo::setRegionFor in order to update its analysis. At the moment RegionInfo does not verify its BBMap, but will in the future. This is fixed by determining the region new BBs belong to and set it accordingly. The new executeScopConditionally() requires accurate getRegionFor information. Which block is created by SplitEdge depends on the incoming and outgoing edges of the blocks it connects, which makes handling its output more difficult than it needs to be. Especially for finding which block has been created an to assign a region to it for the setRegionFor problem above. This patch uses an implementation for splitEdge that always creates a block between the predecessor and successor. simplifyRegion has also been simplified by using SplitBlockPredecessors instead of SplitEdge. Isolating the entries and exits have been refectored into individual functions. Previously simplifyRegion did more than just ensuring that there is only one entering and one exiting edge. It ensured that the entering block had no other outgoing edge which was necessary for executeScopConditionally(). Now the latter uses the alternative splitEdge implementation which can handle this situation so simplifyRegion really only needs to simplify the region. Also, executeScopConditionally assumed that there can be no PHI nodes in blocks with one incoming edge. This is wrong and LCSSA deliberately produces such edges. However, previous passes ensured that there can be no such PHIs in exit nodes, but which will no longer hold in the future. The new code that the property that it preserves the identity of region block (the property that the memory address of the BasicBlock containing the instructions remains the same; new blocks only contain PHI nodes and a terminator), especially the entry block. As a result, there is no need to update the reference to the BasicBlock of ScopStmt that contain its instructions because they have been moved to other basic blocks. Reviewers: grosser Part of Differential Revision: http://reviews.llvm.org/D11867 llvm-svn: 244606
2015-08-11 14:39:21 +00:00
RegionInfo *RI;
///}
void verifyGeneratedFunction(Scop &S, Function &F) {
if (!Verify || !verifyFunction(F, &errs()))
return;
DEBUG({
errs() << "== ISL Codegen created an invalid function ==\n\n== The "
"SCoP ==\n";
S.print(errs());
errs() << "\n== The isl AST ==\n";
AI->printScop(errs(), S);
errs() << "\n== The invalid function ==\n";
F.print(errs());
});
llvm_unreachable("Polly generated function could not be verified. Add "
"-polly-codegen-verify=false to disable this assertion.");
}
// CodeGeneration adds a lot of BBs without updating the RegionInfo
// We make all created BBs belong to the scop's parent region without any
// nested structure to keep the RegionInfo verifier happy.
void fixRegionInfo(Function *F, Region *ParentRegion) {
for (BasicBlock &BB : *F) {
if (RI->getRegionFor(&BB))
continue;
RI->setRegionFor(&BB, ParentRegion);
}
}
/// Mark a basic block unreachable.
///
/// Marks the basic block @p Block unreachable by equipping it with an
/// UnreachableInst.
void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
auto *OrigTerminator = Block.getTerminator();
Builder.SetInsertPoint(OrigTerminator);
Builder.CreateUnreachable();
OrigTerminator->eraseFromParent();
}
/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
/// @R.
///
/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
/// which would leave the them only in the original path. This can transform
/// code such as
///
/// llvm.lifetime.start(%p)
/// llvm.lifetime.end(%p)
///
/// into
///
/// if (RTC) {
/// // generated code
/// } else {
/// // original code
/// llvm.lifetime.start(%p)
/// }
/// llvm.lifetime.end(%p)
///
/// The current StackColoring algorithm cannot handle if some, but not all,
/// paths from the end marker to the entry block cross the start marker. Same
/// for start markers that do not always cross the end markers. We avoid any
/// issues by removing all lifetime markers, even from the original code.
///
/// A better solution could be to hoist all llvm.lifetime.start to the split
/// node and all llvm.lifetime.end to the merge node, which should be
/// conservatively correct.
void removeLifetimeMarkers(Region *R) {
for (auto *BB : R->blocks()) {
auto InstIt = BB->begin();
auto InstEnd = BB->end();
while (InstIt != InstEnd) {
auto NextIt = InstIt;
++NextIt;
if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
switch (IT->getIntrinsicID()) {
case llvm::Intrinsic::lifetime_start:
case llvm::Intrinsic::lifetime_end:
BB->getInstList().erase(InstIt);
break;
default:
break;
}
}
InstIt = NextIt;
}
}
}
/// Generate LLVM-IR for the SCoP @p S.
bool runOnScop(Scop &S) override {
AI = &getAnalysis<IslAstInfo>();
// Check if we created an isl_ast root node, otherwise exit.
isl_ast_node *AstRoot = AI->getAst();
if (!AstRoot)
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DL = &S.getFunction().getParent()->getDataLayout();
Revise the simplification of regions The previous code had several problems: For newly created BasicBlocks it did not (always) call RegionInfo::setRegionFor in order to update its analysis. At the moment RegionInfo does not verify its BBMap, but will in the future. This is fixed by determining the region new BBs belong to and set it accordingly. The new executeScopConditionally() requires accurate getRegionFor information. Which block is created by SplitEdge depends on the incoming and outgoing edges of the blocks it connects, which makes handling its output more difficult than it needs to be. Especially for finding which block has been created an to assign a region to it for the setRegionFor problem above. This patch uses an implementation for splitEdge that always creates a block between the predecessor and successor. simplifyRegion has also been simplified by using SplitBlockPredecessors instead of SplitEdge. Isolating the entries and exits have been refectored into individual functions. Previously simplifyRegion did more than just ensuring that there is only one entering and one exiting edge. It ensured that the entering block had no other outgoing edge which was necessary for executeScopConditionally(). Now the latter uses the alternative splitEdge implementation which can handle this situation so simplifyRegion really only needs to simplify the region. Also, executeScopConditionally assumed that there can be no PHI nodes in blocks with one incoming edge. This is wrong and LCSSA deliberately produces such edges. However, previous passes ensured that there can be no such PHIs in exit nodes, but which will no longer hold in the future. The new code that the property that it preserves the identity of region block (the property that the memory address of the BasicBlock containing the instructions remains the same; new blocks only contain PHI nodes and a terminator), especially the entry block. As a result, there is no need to update the reference to the BasicBlock of ScopStmt that contain its instructions because they have been moved to other basic blocks. Reviewers: grosser Part of Differential Revision: http://reviews.llvm.org/D11867 llvm-svn: 244606
2015-08-11 14:39:21 +00:00
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
Region *R = &S.getRegion();
assert(!R->isTopLevelRegion() && "Top level regions are not supported");
ScopAnnotator Annotator;
Annotator.buildAliasScopes(S);
Revise the simplification of regions The previous code had several problems: For newly created BasicBlocks it did not (always) call RegionInfo::setRegionFor in order to update its analysis. At the moment RegionInfo does not verify its BBMap, but will in the future. This is fixed by determining the region new BBs belong to and set it accordingly. The new executeScopConditionally() requires accurate getRegionFor information. Which block is created by SplitEdge depends on the incoming and outgoing edges of the blocks it connects, which makes handling its output more difficult than it needs to be. Especially for finding which block has been created an to assign a region to it for the setRegionFor problem above. This patch uses an implementation for splitEdge that always creates a block between the predecessor and successor. simplifyRegion has also been simplified by using SplitBlockPredecessors instead of SplitEdge. Isolating the entries and exits have been refectored into individual functions. Previously simplifyRegion did more than just ensuring that there is only one entering and one exiting edge. It ensured that the entering block had no other outgoing edge which was necessary for executeScopConditionally(). Now the latter uses the alternative splitEdge implementation which can handle this situation so simplifyRegion really only needs to simplify the region. Also, executeScopConditionally assumed that there can be no PHI nodes in blocks with one incoming edge. This is wrong and LCSSA deliberately produces such edges. However, previous passes ensured that there can be no such PHIs in exit nodes, but which will no longer hold in the future. The new code that the property that it preserves the identity of region block (the property that the memory address of the BasicBlock containing the instructions remains the same; new blocks only contain PHI nodes and a terminator), especially the entry block. As a result, there is no need to update the reference to the BasicBlock of ScopStmt that contain its instructions because they have been moved to other basic blocks. Reviewers: grosser Part of Differential Revision: http://reviews.llvm.org/D11867 llvm-svn: 244606
2015-08-11 14:39:21 +00:00
simplifyRegion(R, DT, LI, RI);
assert(R->isSimple());
BasicBlock *EnteringBB = S.getEnteringBlock();
Revise the simplification of regions The previous code had several problems: For newly created BasicBlocks it did not (always) call RegionInfo::setRegionFor in order to update its analysis. At the moment RegionInfo does not verify its BBMap, but will in the future. This is fixed by determining the region new BBs belong to and set it accordingly. The new executeScopConditionally() requires accurate getRegionFor information. Which block is created by SplitEdge depends on the incoming and outgoing edges of the blocks it connects, which makes handling its output more difficult than it needs to be. Especially for finding which block has been created an to assign a region to it for the setRegionFor problem above. This patch uses an implementation for splitEdge that always creates a block between the predecessor and successor. simplifyRegion has also been simplified by using SplitBlockPredecessors instead of SplitEdge. Isolating the entries and exits have been refectored into individual functions. Previously simplifyRegion did more than just ensuring that there is only one entering and one exiting edge. It ensured that the entering block had no other outgoing edge which was necessary for executeScopConditionally(). Now the latter uses the alternative splitEdge implementation which can handle this situation so simplifyRegion really only needs to simplify the region. Also, executeScopConditionally assumed that there can be no PHI nodes in blocks with one incoming edge. This is wrong and LCSSA deliberately produces such edges. However, previous passes ensured that there can be no such PHIs in exit nodes, but which will no longer hold in the future. The new code that the property that it preserves the identity of region block (the property that the memory address of the BasicBlock containing the instructions remains the same; new blocks only contain PHI nodes and a terminator), especially the entry block. As a result, there is no need to update the reference to the BasicBlock of ScopStmt that contain its instructions because they have been moved to other basic blocks. Reviewers: grosser Part of Differential Revision: http://reviews.llvm.org/D11867 llvm-svn: 244606
2015-08-11 14:39:21 +00:00
assert(EnteringBB);
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
// Only build the run-time condition and parameters _after_ having
// introduced the conditional branch. This is important as the conditional
// branch will guard the original scop from new induction variables that
// the SCEVExpander may introduce while code generating the parameters and
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
BasicBlock *StartBlock =
executeScopConditionally(S, Builder.getTrue(), *DT, *RI, *LI);
removeLifetimeMarkers(R);
auto *SplitBlock = StartBlock->getSinglePredecessor();
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-07 20:17:36 +00:00
IslNodeBuilder NodeBuilder(Builder, Annotator, *DL, *LI, *SE, *DT, S,
StartBlock);
if (PerfMonitoring) {
PerfMonitor P(EnteringBB->getParent()->getParent());
P.initialize();
P.insertRegionStart(SplitBlock->getTerminator());
BasicBlock *MergeBlock = SplitBlock->getTerminator()
->getSuccessor(0)
->getUniqueSuccessor()
->getUniqueSuccessor();
P.insertRegionEnd(MergeBlock->getTerminator());
}
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-07 20:17:36 +00:00
// First generate code for the hoisted invariant loads and transitively the
// parameters they reference. Afterwards, for the remaining parameters that
// might reference the hoisted loads. Finally, build the runtime check
// that might reference both hoisted loads as well as parameters.
// If the hoisting fails we have to bail and execute the original code.
Builder.SetInsertPoint(SplitBlock->getTerminator());
if (!NodeBuilder.preloadInvariantLoads()) {
// Patch the introduced branch condition to ensure that we always execute
// the original SCoP.
auto *FalseI1 = Builder.getFalse();
auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
SplitBBTerm->setOperand(0, FalseI1);
// Since the other branch is hence ignored we mark it as unreachable and
// adjust the dominator tree accordingly.
auto *ExitingBlock = StartBlock->getUniqueSuccessor();
assert(ExitingBlock);
auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
assert(MergeBlock);
markBlockUnreachable(*StartBlock, Builder);
markBlockUnreachable(*ExitingBlock, Builder);
auto *ExitingBB = S.getExitingBlock();
assert(ExitingBB);
DT->changeImmediateDominator(MergeBlock, ExitingBB);
DT->eraseNode(ExitingBlock);
isl_ast_node_free(AstRoot);
} else {
NodeBuilder.allocateNewArrays();
NodeBuilder.addParameters(S.getContext());
Value *RTC = NodeBuilder.createRTC(AI->getRunCondition());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(&StartBlock->front());
NodeBuilder.create(AstRoot);
NodeBuilder.finalize();
fixRegionInfo(EnteringBB->getParent(), R->getParent());
}
2016-06-06 12:13:24 +00:00
Function *F = EnteringBB->getParent();
verifyGeneratedFunction(S, *F);
for (auto *SubF : NodeBuilder.getParallelSubfunctions())
verifyGeneratedFunction(S, *SubF);
// Mark the function such that we run additional cleanup passes on this
// function (e.g. mem2reg to rediscover phi nodes).
F->addFnAttr("polly-optimized");
return true;
}
/// Register all analyses and transformation required.
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<IslAstInfo>();
AU.addRequired<RegionInfoPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<ScopDetection>();
AU.addRequired<ScopInfoRegionPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DependenceInfo>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<IslAstInfo>();
AU.addPreserved<ScopDetection>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
// FIXME: We do not yet add regions for the newly generated code to the
// region tree.
AU.addPreserved<RegionInfoPass>();
AU.addPreserved<ScopInfoRegionPass>();
}
};
} // namespace
char CodeGeneration::ID = 1;
Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); }
2013-02-22 08:07:06 +00:00
INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
2013-02-22 08:07:06 +00:00
"Polly - Create LLVM-IR from SCoPs", false, false);
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2013-02-22 08:07:06 +00:00
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
2013-02-22 08:07:06 +00:00
"Polly - Create LLVM-IR from SCoPs", false, false)