From 64c0ff414109082f14060f318794398e7bc65bde Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 31 Aug 2015 05:52:24 +0000 Subject: [PATCH] Add support for scalar dependences to OpenMP code generation Scalar dependences between scop statements have caused troubles during parallel code generation as we did not pass on the new stack allocation created for such scalars to the parallel subfunctions. This change now detects all scalar reads/writes in parallel subfunctions, creates the allocas for these scalar objects, passes the resulting memory locations to the subfunctions and ensures that within the subfunction requests for these memory locations will return the rewritten values. Johannes suggested as a future optimization to privatizing some of the scalars in the subfunction. llvm-svn: 246414 --- polly/include/polly/CodeGen/BlockGenerators.h | 61 ++++++++++++----- polly/lib/CodeGen/BlockGenerators.cpp | 65 +++++++++++-------- polly/lib/CodeGen/IslNodeBuilder.cpp | 24 +++++-- 3 files changed, 101 insertions(+), 49 deletions(-) diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 337993294739..b906e9aed746 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -120,27 +120,42 @@ public: /// If no alloca was mapped to @p ScalarBase a new one is created. /// /// @param ScalarBase The demoted scalar value. + /// @param GlobalMap A mapping from Allocas to other memory locations that + /// can be used to replace the original alloca locations + /// with new memory locations, e.g. when passing values to + /// subfunctions while offloading parallel sections. /// - /// @returns The alloca for @p ScalarBase - AllocaInst *getOrCreateScalarAlloca(Value *ScalarBase); + /// @returns The alloca for @p ScalarBase or a replacement value taken from + /// GlobalMap. + Value *getOrCreateScalarAlloca(Value *ScalarBase, ValueMapT *GlobalMap); /// @brief Return the PHi-node alloca for @p ScalarBase /// /// If no alloca was mapped to @p ScalarBase a new one is created. /// /// @param ScalarBase The demoted scalar value. + /// @param GlobalMap A mapping from Allocas to other memory locations that + /// can be used to replace the original alloca locations + /// with new memory locations, e.g. when passing values to + /// subfunctions while offloading parallel sections. /// - /// @returns The alloca for @p ScalarBase - AllocaInst *getOrCreatePHIAlloca(Value *ScalarBase); + /// @returns The alloca for @p ScalarBase or a replacement value taken from + /// GlobalMap. + Value *getOrCreatePHIAlloca(Value *ScalarBase, ValueMapT *GlobalMap); /// @brief Return the alloca for @p Access /// /// If no alloca was mapped for @p Access a new one is created. /// - /// @param Access The memory access for which to generate the alloca + /// @param Access The memory access for which to generate the alloca + /// @param GlobalMap A mapping from Allocas to other memory locations that + /// can be used to replace the original alloca locations with + /// new memory locations, e.g. when passing values to + /// subfunctions while offloading parallel sections. /// - /// @returns The alloca for @p Access - AllocaInst *getOrCreateAlloca(MemoryAccess &Access); + /// @returns The alloca for @p Access or a replacement value taken from + /// GlobalMap. + Value *getOrCreateAlloca(MemoryAccess &Access, ValueMapT *GlobalMap); /// @brief Finalize the code generation for the SCoP @p S. /// @@ -348,10 +363,15 @@ protected: /// @param ScalarBase The demoted scalar value. /// @param Map The map we should look for a mapped alloca value. /// @param NameExt The suffix we add to the name of a new created alloca. + /// @param GlobalMap A mapping from Allocas to other memory locations that + /// can be used to replace the original alloca locations + /// with new memory locations, e.g. when passing values to + /// subfunctions while offloading parallel sections. /// - /// @returns The alloca for @p ScalarBase in @p Map. - AllocaInst *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, - const char *NameExt); + /// @returns The alloca for @p ScalarBase or a replacement value taken from + /// GlobalMap. + Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, + ValueMapT *GlobalMap, const char *NameExt); /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. /// @@ -359,7 +379,7 @@ protected: /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + ValueMapT &BBMap, ValueMapT &GlobalMap); /// @brief Generate the scalar stores for the given statement. /// @@ -376,10 +396,15 @@ protected: /// @brief Handle users of @p Inst outside the SCoP. /// - /// @param R The current SCoP region. - /// @param Inst The current instruction we check. - /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. - void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @param R The current SCoP region. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param Inst The current instruction we check. + /// @param InstCopy The copy of the instruction @p Inst in the optimized + /// SCoP. + void handleOutsideUsers(const Region &R, ValueMapT &GlobalMap, + Instruction *Inst, Value *InstCopy); /// @brief Initialize the memory of demoted scalars. /// @@ -449,6 +474,9 @@ protected: LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. @@ -752,7 +780,8 @@ private: /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; + ValueMapT &BBMap, + ValueMapT &GlobalMap) override; /// @brief Generate the scalar stores for the given statement. /// diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 94a7dc70ea91..2f18e29481fe 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -234,7 +234,7 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, isl_id_to_ast_expr *NewAccesses) { // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); + generateScalarLoads(Stmt, Inst, BBMap, GlobalMap); // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. @@ -321,12 +321,13 @@ void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) - handleOutsideUsers(R, &Inst, BBMap[&Inst]); + handleOutsideUsers(R, GlobalMap, &Inst, BBMap[&Inst]); } -AllocaInst *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, - ScalarAllocaMapTy &Map, - const char *NameExt) { +Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, + ScalarAllocaMapTy &Map, + ValueMapT *GlobalMap, + const char *NameExt) { // Check if an alloca was cached for the base instruction. AllocaInst *&Addr = Map[ScalarBase]; @@ -334,29 +335,36 @@ AllocaInst *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, if (!Addr) { auto *Ty = ScalarBase->getType(); Addr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); + EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); Addr->insertBefore(EntryBB->getFirstInsertionPt()); } + if (GlobalMap && GlobalMap->count(Addr)) + return (*GlobalMap)[Addr]; + return Addr; } -AllocaInst *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access) { +Value *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access, + ValueMapT *GlobalMap) { if (Access.getScopArrayInfo()->isPHI()) - return getOrCreatePHIAlloca(Access.getBaseAddr()); + return getOrCreatePHIAlloca(Access.getBaseAddr(), GlobalMap); else - return getOrCreateScalarAlloca(Access.getBaseAddr()); + return getOrCreateScalarAlloca(Access.getBaseAddr(), GlobalMap); } -AllocaInst *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); +Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase, + ValueMapT *GlobalMap) { + return getOrCreateAlloca(ScalarBase, ScalarMap, GlobalMap, ".s2a"); } -AllocaInst *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); +Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase, + ValueMapT *GlobalMap) { + return getOrCreateAlloca(ScalarBase, PHIOpMap, GlobalMap, ".phiops"); } -void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, - Value *InstCopy) { +void BlockGenerator::handleOutsideUsers(const Region &R, ValueMapT &GlobalMap, + Instruction *Inst, Value *InstCopy) { // If there are escape users we get the alloca for this instruction and put it // in the EscapeMap for later finalization. Lastly, if the instruction was // copied multiple times we already did this and can exit. @@ -382,7 +390,8 @@ void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, return; // Get or create an escape alloca for this instruction. - AllocaInst *ScalarAddr = getOrCreateScalarAlloca(Inst); + auto *ScalarAddr = + cast(getOrCreateScalarAlloca(Inst, &GlobalMap)); // Remember that this instruction has escape uses and the escape alloca. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); @@ -390,7 +399,8 @@ void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, + ValueMapT &GlobalMap) { auto *MAL = Stmt.lookupAccessesFor(Inst); if (!MAL) @@ -400,7 +410,7 @@ void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, if (!MA.isScalar() || !MA.isRead()) continue; - auto *Address = getOrCreateAlloca(MA); + auto *Address = getOrCreateAlloca(MA, &GlobalMap); BBMap[MA.getBaseAddr()] = Builder.CreateLoad(Address, Address->getName() + ".reload"); } @@ -437,7 +447,7 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, return /* Case (3a) */ ScalarValueCopy; // Case (3b) - Value *Address = getOrCreateScalarAlloca(ScalarValueInst); + Value *Address = getOrCreateScalarAlloca(ScalarValueInst, &GlobalMap); ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload"); return ScalarValue; @@ -457,7 +467,7 @@ void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, continue; Value *Val = MA->getAccessValue(); - auto *Address = getOrCreateAlloca(*MA); + auto *Address = getOrCreateAlloca(*MA, &GlobalMap); Val = getNewScalarValue(Val, R, BBMap, GlobalMap); Builder.CreateStore(Val, Address); @@ -500,7 +510,7 @@ void BlockGenerator::createScalarInitialization(Scop &S) { Value *ScalarValue = PHI->getIncomingValue(Idx); - Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI)); + Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI, nullptr)); continue; } @@ -509,8 +519,9 @@ void BlockGenerator::createScalarInitialization(Scop &S) { if (Inst && R.contains(Inst)) continue; + ValueMapT EmptyMap; Builder.CreateStore(Array->getBasePtr(), - getOrCreateScalarAlloca(Array->getBasePtr())); + getOrCreateScalarAlloca(Array->getBasePtr(), nullptr)); } } @@ -532,7 +543,7 @@ void BlockGenerator::createScalarFinalization(Region &R) { Instruction *EscapeInst = EscapeMapping.getFirst(); const auto &EscapeMappingValue = EscapeMapping.getSecond(); const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; - AllocaInst *ScalarAddr = EscapeMappingValue.first; + Value *ScalarAddr = EscapeMappingValue.first; // Reload the demoted instruction in the optimized version of the SCoP. Instruction *EscapeInstReload = @@ -1068,7 +1079,8 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, + ValueMapT &GlobalMap) { // Inside a non-affine region PHI nodes are copied not demoted. Once the // phi is copied it will reload all inputs from outside the region, hence @@ -1077,7 +1089,7 @@ void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, if (isa(Inst)) return; - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap, GlobalMap); } void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, @@ -1102,7 +1114,7 @@ void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, Value *Val = MA->getAccessValue(); - auto Address = getOrCreateAlloca(*MA); + auto Address = getOrCreateAlloca(*MA, &GlobalMap); Val = getNewScalarValue(Val, R, BBMap, GlobalMap); Builder.CreateStore(Val, Address); @@ -1139,7 +1151,8 @@ void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) return; - AllocaInst *PHIOpAddr = getOrCreatePHIAlloca(const_cast(PHI)); + Value *PHIOpAddr = + getOrCreatePHIAlloca(const_cast(PHI), &GlobalMap); OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", BlockMap[IncomingBB]->getTerminator()); } diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 3590ab5542a8..4c766f7aaa26 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -178,6 +178,7 @@ struct FindValuesUser { Region &R; SetVector &Values; SetVector &SCEVs; + BlockGenerator &BlockGen; }; /// @brief Extract the values and SCEVs needed to generate code for a block. @@ -192,12 +193,6 @@ static int findValuesInBlock(struct FindValuesUser &User, const ScopStmt *Stmt, User.SE.getSCEVAtScope(OpInst, User.LI.getLoopFor(BB))); continue; } - if (Instruction *OpInst = dyn_cast(SrcVal)) - if (Stmt->getParent()->getRegion().contains(OpInst)) - continue; - - if (isa(SrcVal) || isa(SrcVal)) - User.Values.insert(SrcVal); } } return 0; @@ -222,6 +217,20 @@ static isl_stat findValuesInStmt(isl_set *Set, void *UserPtr) { findValuesInBlock(User, Stmt, BB); } + for (auto &Access : *Stmt) { + if (!Access->isScalar()) { + auto *BasePtr = Access->getScopArrayInfo()->getBasePtr(); + if (Instruction *OpInst = dyn_cast(BasePtr)) + if (Stmt->getParent()->getRegion().contains(OpInst)) + continue; + + User.Values.insert(BasePtr); + continue; + } + + User.Values.insert(User.BlockGen.getOrCreateAlloca(*Access, nullptr)); + } + isl_id_free(Id); isl_set_free(Set); return isl_stat_ok; @@ -232,7 +241,8 @@ void IslNodeBuilder::getReferencesInSubtree(__isl_keep isl_ast_node *For, SetVector &Loops) { SetVector SCEVs; - struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; + struct FindValuesUser FindValues = {LI, SE, S.getRegion(), + Values, SCEVs, getBlockGenerator()}; for (const auto &I : IDToValue) Values.insert(I.second);