mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[OMPIRBuilder] Clarify CanonicalLoopInfo. NFC.
Add in-source documentation on how CanonicalLoopInfo is intended to be used. In particular, clarify what parts of a CanonicalLoopInfo is considered part of the loop, that those parts must be side-effect free, and that InsertPoints to instructions outside those parts can be expected to be preserved after method calls implementing loop-associated directives. CanonicalLoopInfo are now invalidated after it does not describe canonical loop anymore and asserts when trying to use it afterwards. In addition, rename `createXYZWorkshareLoop` to `applyXYZWorkshareLoop` and remove the update location to avoid that the impression that they insert something from scratch at that location where in reality its InsertPoint is ignored. createStaticWorkshareLoop does not return a CanonicalLoopInfo anymore. First, it was not a canonical loop in the clarified sense (containing side-effects in form of calls to the OpenMP runtime). Second, it is ambiguous which of the two possible canonical loops it should actually return. It will not be needed before a feature expected to be introduced in OpenMP 6.0 Also see discussion in D105706. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D107540
This commit is contained in:
@@ -3621,7 +3621,8 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
||||
CGM.getOpenMPRuntime().getOMPBuilder();
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
|
||||
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
|
||||
OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
|
||||
OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
|
||||
AllocaIP, NeedsBarrier);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -257,18 +257,17 @@ public:
|
||||
///
|
||||
/// * Sign of the step and the comparison operator might disagree:
|
||||
///
|
||||
/// for (int i = 0; i < 42; --i)
|
||||
/// for (int i = 0; i < 42; i -= 1u)
|
||||
///
|
||||
//
|
||||
/// \param Loc The insert and source location description.
|
||||
/// \param BodyGenCB Callback that will generate the loop body code.
|
||||
/// \param Start Value of the loop counter for the first iterations.
|
||||
/// \param Stop Loop counter values past this will stop the the
|
||||
/// iterations.
|
||||
/// \param Stop Loop counter values past this will stop the loop.
|
||||
/// \param Step Loop counter increment after each iteration; negative
|
||||
/// means counting down. \param IsSigned Whether Start, Stop
|
||||
/// and Stop are signed integers.
|
||||
/// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
|
||||
/// means counting down.
|
||||
/// \param IsSigned Whether Start, Stop and Step are signed integers.
|
||||
/// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
|
||||
/// counter.
|
||||
/// \param ComputeIP Insertion point for instructions computing the trip
|
||||
/// count. Can be used to ensure the trip count is available
|
||||
@@ -335,7 +334,7 @@ public:
|
||||
/// has a trip count of 0). This is permitted by the OpenMP specification.
|
||||
///
|
||||
/// \param DL Debug location for instructions added for collapsing,
|
||||
/// such as instructions to compute derive the input loop's
|
||||
/// such as instructions to compute/derive the input loop's
|
||||
/// induction variables.
|
||||
/// \param Loops Loops in the loop nest to collapse. Loops are specified
|
||||
/// from outermost-to-innermost and every control flow of a
|
||||
@@ -358,8 +357,16 @@ public:
|
||||
/// the current thread, updates the relevant instructions in the canonical
|
||||
/// loop and calls to an OpenMP runtime finalization function after the loop.
|
||||
///
|
||||
/// \param Loc The source location description, the insertion location
|
||||
/// is not used.
|
||||
/// TODO: Workshare loops with static scheduling may contain up to two loops
|
||||
/// that fulfill the requirements of an OpenMP canonical loop. One for
|
||||
/// iterating over all iterations of a chunk and another one for iterating
|
||||
/// over all chunks that are executed on the same thread. Returning
|
||||
/// CanonicalLoopInfo objects representing them may eventually be useful for
|
||||
/// the apply clause planned in OpenMP 6.0, but currently whether these are
|
||||
/// canonical loops is irrelevant.
|
||||
///
|
||||
/// \param DL Debug location for instructions added for the
|
||||
/// workshare-loop construct itself.
|
||||
/// \param CLI A descriptor of the canonical loop to workshare.
|
||||
/// \param AllocaIP An insertion point for Alloca instructions usable in the
|
||||
/// preheader of the loop.
|
||||
@@ -368,12 +375,11 @@ public:
|
||||
/// \param Chunk The size of loop chunk considered as a unit when
|
||||
/// scheduling. If \p nullptr, defaults to 1.
|
||||
///
|
||||
/// \returns Updated CanonicalLoopInfo.
|
||||
CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
|
||||
CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier,
|
||||
Value *Chunk = nullptr);
|
||||
/// \returns Point where to insert code after the workshare construct.
|
||||
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier,
|
||||
Value *Chunk = nullptr);
|
||||
|
||||
/// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
|
||||
///
|
||||
@@ -382,8 +388,9 @@ public:
|
||||
/// turn it into a workshare loop. In particular, it calls to an OpenMP
|
||||
/// runtime function in the preheader to obtain, and then in each iteration
|
||||
/// to update the loop counter.
|
||||
/// \param Loc The source location description, the insertion location
|
||||
/// is not used.
|
||||
///
|
||||
/// \param DL Debug location for instructions added for the
|
||||
/// workshare-loop construct itself.
|
||||
/// \param CLI A descriptor of the canonical loop to workshare.
|
||||
/// \param AllocaIP An insertion point for Alloca instructions usable in the
|
||||
/// preheader of the loop.
|
||||
@@ -393,13 +400,12 @@ public:
|
||||
/// \param Chunk The size of loop chunk considered as a unit when
|
||||
/// scheduling. If \p nullptr, defaults to 1.
|
||||
///
|
||||
/// \returns Point where to insert code after the loop.
|
||||
InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
|
||||
CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
omp::OMPScheduleType SchedType,
|
||||
bool NeedsBarrier,
|
||||
Value *Chunk = nullptr);
|
||||
/// \returns Point where to insert code after the workshare construct.
|
||||
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
omp::OMPScheduleType SchedType,
|
||||
bool NeedsBarrier,
|
||||
Value *Chunk = nullptr);
|
||||
|
||||
/// Modifies the canonical loop to be a workshare loop.
|
||||
///
|
||||
@@ -410,19 +416,17 @@ public:
|
||||
/// the current thread, updates the relevant instructions in the canonical
|
||||
/// loop and calls to an OpenMP runtime finalization function after the loop.
|
||||
///
|
||||
/// \param Loc The source location description, the insertion location
|
||||
/// is not used.
|
||||
/// \param DL Debug location for instructions added for the
|
||||
/// workshare-loop construct itself.
|
||||
/// \param CLI A descriptor of the canonical loop to workshare.
|
||||
/// \param AllocaIP An insertion point for Alloca instructions usable in the
|
||||
/// preheader of the loop.
|
||||
/// \param NeedsBarrier Indicates whether a barrier must be insterted after
|
||||
/// the loop.
|
||||
///
|
||||
/// \returns Updated CanonicalLoopInfo.
|
||||
CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
|
||||
CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier);
|
||||
/// \returns Point where to insert code after the workshare construct.
|
||||
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, bool NeedsBarrier);
|
||||
|
||||
/// Tile a loop nest.
|
||||
///
|
||||
@@ -637,6 +641,10 @@ public:
|
||||
Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
|
||||
unsigned Line, unsigned Column);
|
||||
|
||||
/// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
|
||||
/// fallback if \p DL does not specify the function name.
|
||||
Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr);
|
||||
|
||||
/// Return the (LLVM-IR) string describing the source location \p Loc.
|
||||
Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
|
||||
|
||||
@@ -1243,7 +1251,25 @@ public:
|
||||
/// The control-flow structure is standardized for easy consumption by
|
||||
/// directives associated with loops. For instance, the worksharing-loop
|
||||
/// construct may change this control flow such that each loop iteration is
|
||||
/// executed on only one thread.
|
||||
/// executed on only one thread. The constraints of a canonical loop in brief
|
||||
/// are:
|
||||
///
|
||||
/// * The number of loop iterations must have been computed before entering the
|
||||
/// loop.
|
||||
///
|
||||
/// * Has an (unsigned) logical induction variable that starts at zero and
|
||||
/// increments by one.
|
||||
///
|
||||
/// * The loop's CFG itself has no side-effects. The OpenMP specification
|
||||
/// itself allows side-effects, but the order in which they happen, including
|
||||
/// how often or whether at all, is unspecified. We expect that the frontend
|
||||
/// will emit those side-effect instructions somewhere (e.g. before the loop)
|
||||
/// such that the CanonicalLoopInfo itself can be side-effect free.
|
||||
///
|
||||
/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
|
||||
/// execution of a loop body that satifies these constraints. It does NOT
|
||||
/// represent arbitrary SESE regions that happen to contain a loop. Do not use
|
||||
/// CanonicalLoopInfo for such purposes.
|
||||
///
|
||||
/// The control flow can be described as follows:
|
||||
///
|
||||
@@ -1263,73 +1289,149 @@ public:
|
||||
/// |
|
||||
/// After
|
||||
///
|
||||
/// Code in the header, condition block, latch and exit block must not have any
|
||||
/// side-effect. The body block is the single entry point into the loop body,
|
||||
/// which may contain arbitrary control flow as long as all control paths
|
||||
/// eventually branch to the latch block.
|
||||
/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
|
||||
/// including) and end at AfterIP (at the After's first instruction, excluding).
|
||||
/// That is, instructions in the Preheader and After blocks (except the
|
||||
/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
|
||||
/// side-effects. Typically, the Preheader is used to compute the loop's trip
|
||||
/// count. The instructions from BodyIP (at the Body block's first instruction,
|
||||
/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
|
||||
/// control and thus can have side-effects. The body block is the single entry
|
||||
/// point into the loop body, which may contain arbitrary control flow as long
|
||||
/// as all control paths eventually branch to the Latch block.
|
||||
///
|
||||
/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
|
||||
/// classes.
|
||||
/// TODO: Consider adding another standardized BasicBlock between Body CFG and
|
||||
/// Latch to guarantee that there is only a single edge to the latch. It would
|
||||
/// make loop transformations easier to not needing to consider multiple
|
||||
/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
|
||||
/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
|
||||
/// executes after each body iteration.
|
||||
///
|
||||
/// There must be no loop-carried dependencies through llvm::Values. This is
|
||||
/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
|
||||
/// for the induction variable.
|
||||
///
|
||||
/// All code in Header, Cond, Latch and Exit (plus the terminator of the
|
||||
/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
|
||||
/// by assertOK(). They are expected to not be modified unless explicitly
|
||||
/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
|
||||
/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
|
||||
/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
|
||||
/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
|
||||
/// anymore as its underlying control flow may not exist anymore.
|
||||
/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
|
||||
/// may also return a new CanonicalLoopInfo that can be passed to other
|
||||
/// loop-associated construct implementing methods. These loop-transforming
|
||||
/// methods may either create a new CanonicalLoopInfo usually using
|
||||
/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
|
||||
/// modify one of the input CanonicalLoopInfo and return it as representing the
|
||||
/// modified loop. What is done is an implementation detail of
|
||||
/// transformation-implementing method and callers should always assume that the
|
||||
/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
|
||||
/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
|
||||
/// created by createCanonicalLoop, such that transforming methods do not have
|
||||
/// to special case where the CanonicalLoopInfo originated from.
|
||||
///
|
||||
/// Generally, methods consuming CanonicalLoopInfo do not need an
|
||||
/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
|
||||
/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
|
||||
/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
|
||||
/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
|
||||
/// any InsertPoint in the Preheader, After or Block can still be used after
|
||||
/// calling such a method.
|
||||
///
|
||||
/// TODO: Provide mechanisms for exception handling and cancellation points.
|
||||
///
|
||||
/// Defined outside OpenMPIRBuilder because nested classes cannot be
|
||||
/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
|
||||
class CanonicalLoopInfo {
|
||||
friend class OpenMPIRBuilder;
|
||||
|
||||
private:
|
||||
/// Whether this object currently represents a loop.
|
||||
bool IsValid = false;
|
||||
|
||||
BasicBlock *Preheader;
|
||||
BasicBlock *Header;
|
||||
BasicBlock *Cond;
|
||||
BasicBlock *Body;
|
||||
BasicBlock *Latch;
|
||||
BasicBlock *Exit;
|
||||
BasicBlock *After;
|
||||
BasicBlock *Preheader = nullptr;
|
||||
BasicBlock *Header = nullptr;
|
||||
BasicBlock *Cond = nullptr;
|
||||
BasicBlock *Body = nullptr;
|
||||
BasicBlock *Latch = nullptr;
|
||||
BasicBlock *Exit = nullptr;
|
||||
BasicBlock *After = nullptr;
|
||||
|
||||
/// Add the control blocks of this loop to \p BBs.
|
||||
///
|
||||
/// This does not include any block from the body, including the one returned
|
||||
/// by getBody().
|
||||
///
|
||||
/// FIXME: This currently includes the Preheader and After blocks even though
|
||||
/// their content is (mostly) not under CanonicalLoopInfo's control.
|
||||
/// Re-evaluated whether this makes sense.
|
||||
void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
|
||||
|
||||
public:
|
||||
/// Returns whether this object currently represents the IR of a loop. If
|
||||
/// returning false, it may have been consumed by a loop transformation or not
|
||||
/// been intialized. Do not use in this case;
|
||||
bool isValid() const { return Header; }
|
||||
|
||||
/// The preheader ensures that there is only a single edge entering the loop.
|
||||
/// Code that must be execute before any loop iteration can be emitted here,
|
||||
/// such as computing the loop trip count and begin lifetime markers. Code in
|
||||
/// the preheader is not considered part of the canonical loop.
|
||||
BasicBlock *getPreheader() const { return Preheader; }
|
||||
BasicBlock *getPreheader() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Preheader;
|
||||
}
|
||||
|
||||
/// The header is the entry for each iteration. In the canonical control flow,
|
||||
/// it only contains the PHINode for the induction variable.
|
||||
BasicBlock *getHeader() const { return Header; }
|
||||
BasicBlock *getHeader() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Header;
|
||||
}
|
||||
|
||||
/// The condition block computes whether there is another loop iteration. If
|
||||
/// yes, branches to the body; otherwise to the exit block.
|
||||
BasicBlock *getCond() const { return Cond; }
|
||||
BasicBlock *getCond() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Cond;
|
||||
}
|
||||
|
||||
/// The body block is the single entry for a loop iteration and not controlled
|
||||
/// by CanonicalLoopInfo. It can contain arbitrary control flow but must
|
||||
/// eventually branch to the \p Latch block.
|
||||
BasicBlock *getBody() const { return Body; }
|
||||
BasicBlock *getBody() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Body;
|
||||
}
|
||||
|
||||
/// Reaching the latch indicates the end of the loop body code. In the
|
||||
/// canonical control flow, it only contains the increment of the induction
|
||||
/// variable.
|
||||
BasicBlock *getLatch() const { return Latch; }
|
||||
BasicBlock *getLatch() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Latch;
|
||||
}
|
||||
|
||||
/// Reaching the exit indicates no more iterations are being executed.
|
||||
BasicBlock *getExit() const { return Exit; }
|
||||
BasicBlock *getExit() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Exit;
|
||||
}
|
||||
|
||||
/// The after block is intended for clean-up code such as lifetime end
|
||||
/// markers. It is separate from the exit block to ensure, analogous to the
|
||||
/// preheader, it having just a single entry edge and being free from PHI
|
||||
/// nodes should there be multiple loop exits (such as from break
|
||||
/// statements/cancellations).
|
||||
BasicBlock *getAfter() const { return After; }
|
||||
BasicBlock *getAfter() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return After;
|
||||
}
|
||||
|
||||
/// Returns the llvm::Value containing the number of loop iterations. It must
|
||||
/// be valid in the preheader and always interpreted as an unsigned integer of
|
||||
/// any bit-width.
|
||||
Value *getTripCount() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
Instruction *CmpI = &Cond->front();
|
||||
assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
|
||||
return CmpI->getOperand(1);
|
||||
@@ -1338,33 +1440,47 @@ public:
|
||||
/// Returns the instruction representing the current logical induction
|
||||
/// variable. Always unsigned, always starting at 0 with an increment of one.
|
||||
Instruction *getIndVar() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
Instruction *IndVarPHI = &Header->front();
|
||||
assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
|
||||
return IndVarPHI;
|
||||
}
|
||||
|
||||
/// Return the type of the induction variable (and the trip count).
|
||||
Type *getIndVarType() const { return getIndVar()->getType(); }
|
||||
Type *getIndVarType() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return getIndVar()->getType();
|
||||
}
|
||||
|
||||
/// Return the insertion point for user code before the loop.
|
||||
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return {Preheader, std::prev(Preheader->end())};
|
||||
};
|
||||
|
||||
/// Return the insertion point for user code in the body.
|
||||
OpenMPIRBuilder::InsertPointTy getBodyIP() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return {Body, Body->begin()};
|
||||
};
|
||||
|
||||
/// Return the insertion point for user code after the loop.
|
||||
OpenMPIRBuilder::InsertPointTy getAfterIP() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return {After, After->begin()};
|
||||
};
|
||||
|
||||
Function *getFunction() const { return Header->getParent(); }
|
||||
Function *getFunction() const {
|
||||
assert(isValid() && "Requires a valid canonical loop");
|
||||
return Header->getParent();
|
||||
}
|
||||
|
||||
/// Consistency self-check.
|
||||
void assertOK() const;
|
||||
|
||||
/// Invalidate this loop. That is, the underlying IR does not fulfill the
|
||||
/// requirements of an OpenMP canonical loop anymore.
|
||||
void invalidate();
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
@@ -310,9 +310,8 @@ Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
|
||||
return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
|
||||
}
|
||||
|
||||
Constant *
|
||||
OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
|
||||
DILocation *DIL = Loc.DL.get();
|
||||
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) {
|
||||
DILocation *DIL = DL.get();
|
||||
if (!DIL)
|
||||
return getOrCreateDefaultSrcLocStr();
|
||||
StringRef FileName = M.getName();
|
||||
@@ -320,12 +319,17 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
|
||||
if (Optional<StringRef> Source = DIF->getSource())
|
||||
FileName = *Source;
|
||||
StringRef Function = DIL->getScope()->getSubprogram()->getName();
|
||||
Function =
|
||||
!Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
|
||||
if (Function.empty() && F)
|
||||
Function = F->getName();
|
||||
return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
|
||||
DIL->getColumn());
|
||||
}
|
||||
|
||||
Constant *
|
||||
OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
|
||||
return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
|
||||
}
|
||||
|
||||
Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
|
||||
return Builder.CreateCall(
|
||||
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
|
||||
@@ -965,8 +969,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
|
||||
Value *ST = ConstantInt::get(I32Ty, 1);
|
||||
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
|
||||
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
|
||||
LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
|
||||
BasicBlock *LoopAfterBB = LoopInfo->getAfter();
|
||||
InsertPointTy AfterIP =
|
||||
applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
|
||||
BasicBlock *LoopAfterBB = AfterIP.getBlock();
|
||||
Instruction *SplitPos = LoopAfterBB->getTerminator();
|
||||
if (!isa_and_nonnull<BranchInst>(SplitPos))
|
||||
SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
|
||||
@@ -1306,8 +1311,6 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
|
||||
CL->Exit = Exit;
|
||||
CL->After = After;
|
||||
|
||||
CL->IsValid = true;
|
||||
|
||||
#ifndef NDEBUG
|
||||
CL->assertOK();
|
||||
#endif
|
||||
@@ -1444,14 +1447,17 @@ void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
|
||||
CLI->assertOK();
|
||||
}
|
||||
|
||||
CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
|
||||
const LocationDescription &Loc, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
|
||||
// Set up the source location value for OpenMP runtime.
|
||||
if (!updateToLocation(Loc))
|
||||
return nullptr;
|
||||
OpenMPIRBuilder::InsertPointTy
|
||||
OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier, Value *Chunk) {
|
||||
assert(CLI->isValid() && "Requires a valid canonical loop");
|
||||
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
|
||||
// Set up the source location value for OpenMP runtime.
|
||||
Builder.restoreIP(CLI->getPreheaderIP());
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
|
||||
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
|
||||
|
||||
// Declare useful OpenMP runtime functions.
|
||||
@@ -1481,6 +1487,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
|
||||
Builder.CreateStore(UpperBound, PUpperBound);
|
||||
Builder.CreateStore(One, PStride);
|
||||
|
||||
// FIXME: schedule(static) is NOT the same as schedule(static,1)
|
||||
if (!Chunk)
|
||||
Chunk = One;
|
||||
|
||||
@@ -1521,19 +1528,21 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
|
||||
|
||||
// Add the barrier if requested.
|
||||
if (NeedsBarrier)
|
||||
createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
|
||||
createBarrier(LocationDescription(Builder.saveIP(), DL),
|
||||
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
|
||||
/* CheckCancelFlag */ false);
|
||||
|
||||
CLI->assertOK();
|
||||
return CLI;
|
||||
InsertPointTy AfterIP = CLI->getAfterIP();
|
||||
CLI->invalidate();
|
||||
|
||||
return AfterIP;
|
||||
}
|
||||
|
||||
CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
|
||||
const LocationDescription &Loc, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, bool NeedsBarrier) {
|
||||
OpenMPIRBuilder::InsertPointTy
|
||||
OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, bool NeedsBarrier) {
|
||||
// Currently only supports static schedules.
|
||||
return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
|
||||
return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
|
||||
}
|
||||
|
||||
/// Returns an LLVM function to call for initializing loop bounds using OpenMP
|
||||
@@ -1568,14 +1577,15 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
|
||||
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
|
||||
}
|
||||
|
||||
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
|
||||
const LocationDescription &Loc, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
|
||||
Value *Chunk) {
|
||||
// Set up the source location value for OpenMP runtime.
|
||||
Builder.SetCurrentDebugLocation(Loc.DL);
|
||||
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
|
||||
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
|
||||
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
|
||||
assert(CLI->isValid() && "Requires a valid canonical loop");
|
||||
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
|
||||
// Set up the source location value for OpenMP runtime.
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
|
||||
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
|
||||
|
||||
// Declare useful OpenMP runtime functions.
|
||||
@@ -1669,11 +1679,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
|
||||
// Add the barrier if requested.
|
||||
if (NeedsBarrier) {
|
||||
Builder.SetInsertPoint(&Exit->back());
|
||||
createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
|
||||
createBarrier(LocationDescription(Builder.saveIP(), DL),
|
||||
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
|
||||
/* CheckCancelFlag */ false);
|
||||
}
|
||||
|
||||
CLI->invalidate();
|
||||
return AfterIP;
|
||||
}
|
||||
|
||||
@@ -1765,6 +1776,8 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
// TODO: Find common/largest indvar type.
|
||||
Value *CollapsedTripCount = nullptr;
|
||||
for (CanonicalLoopInfo *L : Loops) {
|
||||
assert(L->isValid() &&
|
||||
"All loops to collapse must be valid canonical loops");
|
||||
Value *OrigTripCount = L->getTripCount();
|
||||
if (!CollapsedTripCount) {
|
||||
CollapsedTripCount = OrigTripCount;
|
||||
@@ -1853,6 +1866,9 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
Loop->collectControlBlocks(OldControlBBs);
|
||||
removeUnusedBlocksFromParent(OldControlBBs);
|
||||
|
||||
for (CanonicalLoopInfo *L : Loops)
|
||||
L->invalidate();
|
||||
|
||||
#ifndef NDEBUG
|
||||
Result->assertOK();
|
||||
#endif
|
||||
@@ -1879,6 +1895,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
// any original CanonicalLoopInfo.
|
||||
SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
|
||||
for (CanonicalLoopInfo *L : Loops) {
|
||||
assert(L->isValid() && "All input loops must be valid canonical loops");
|
||||
OrigTripCounts.push_back(L->getTripCount());
|
||||
OrigIndVars.push_back(L->getIndVar());
|
||||
}
|
||||
@@ -2037,6 +2054,9 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
Loop->collectControlBlocks(OldControlBBs);
|
||||
removeUnusedBlocksFromParent(OldControlBBs);
|
||||
|
||||
for (CanonicalLoopInfo *L : Loops)
|
||||
L->invalidate();
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (CanonicalLoopInfo *GenL : Result)
|
||||
GenL->assertOK();
|
||||
@@ -2922,7 +2942,8 @@ void CanonicalLoopInfo::collectControlBlocks(
|
||||
|
||||
void CanonicalLoopInfo::assertOK() const {
|
||||
#ifndef NDEBUG
|
||||
if (!IsValid)
|
||||
// No constraints if this object currently does not describe a loop.
|
||||
if (!isValid())
|
||||
return;
|
||||
|
||||
// Verify standard control-flow we use for OpenMP loops.
|
||||
@@ -3008,3 +3029,13 @@ void CanonicalLoopInfo::assertOK() const {
|
||||
"Exit condition must compare with the trip count");
|
||||
#endif
|
||||
}
|
||||
|
||||
void CanonicalLoopInfo::invalidate() {
|
||||
Preheader = nullptr;
|
||||
Header = nullptr;
|
||||
Cond = nullptr;
|
||||
Body = nullptr;
|
||||
Latch = nullptr;
|
||||
Exit = nullptr;
|
||||
After = nullptr;
|
||||
}
|
||||
|
||||
@@ -1586,6 +1586,7 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
|
||||
CanonicalLoopInfo *Loop =
|
||||
OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
|
||||
StepVal, IsSigned, InclusiveStop);
|
||||
InsertPointTy AfterIP = Loop->getAfterIP();
|
||||
|
||||
// Tile the loop.
|
||||
Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
|
||||
@@ -1594,7 +1595,7 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
|
||||
|
||||
// Set the insertion pointer to after loop, where the next loop will be
|
||||
// emitted.
|
||||
Builder.restoreIP(Loop->getAfterIP());
|
||||
Builder.restoreIP(AfterIP);
|
||||
|
||||
// Extract the trip count.
|
||||
CanonicalLoopInfo *FloorLoop = GenLoops[0];
|
||||
@@ -1663,12 +1664,20 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
||||
CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
|
||||
Loc, LoopBodyGen, StartVal, StopVal, StepVal,
|
||||
/*IsSigned=*/false, /*InclusiveStop=*/false);
|
||||
BasicBlock *Preheader = CLI->getPreheader();
|
||||
BasicBlock *Body = CLI->getBody();
|
||||
Value *IV = CLI->getIndVar();
|
||||
BasicBlock *ExitBlock = CLI->getExit();
|
||||
|
||||
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
|
||||
InsertPointTy AllocaIP = Builder.saveIP();
|
||||
|
||||
CLI = OMPBuilder.createStaticWorkshareLoop(Loc, CLI, AllocaIP,
|
||||
/*NeedsBarrier=*/true);
|
||||
OMPBuilder.applyStaticWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true);
|
||||
|
||||
BasicBlock *Cond = Body->getSinglePredecessor();
|
||||
Instruction *Cmp = &*Cond->begin();
|
||||
Value *TripCount = Cmp->getOperand(1);
|
||||
|
||||
auto AllocaIter = BB->begin();
|
||||
ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
|
||||
AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
|
||||
@@ -1680,10 +1689,8 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
||||
EXPECT_NE(PUpperBound, nullptr);
|
||||
EXPECT_NE(PStride, nullptr);
|
||||
|
||||
auto PreheaderIter = CLI->getPreheader()->begin();
|
||||
ASSERT_GE(
|
||||
std::distance(CLI->getPreheader()->begin(), CLI->getPreheader()->end()),
|
||||
7);
|
||||
auto PreheaderIter = Preheader->begin();
|
||||
ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
|
||||
StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
|
||||
StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
|
||||
StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
|
||||
@@ -1705,15 +1712,15 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
||||
|
||||
// Check that the loop IV is updated to account for the lower bound returned
|
||||
// by the OpenMP runtime call.
|
||||
BinaryOperator *Add = dyn_cast<BinaryOperator>(&CLI->getBody()->front());
|
||||
EXPECT_EQ(Add->getOperand(0), CLI->getIndVar());
|
||||
BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
|
||||
EXPECT_EQ(Add->getOperand(0), IV);
|
||||
auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
|
||||
ASSERT_NE(LoadedLowerBound, nullptr);
|
||||
EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
|
||||
|
||||
// Check that the trip count is updated to account for the lower and upper
|
||||
// bounds return by the OpenMP runtime call.
|
||||
auto *AddOne = dyn_cast<Instruction>(CLI->getTripCount());
|
||||
auto *AddOne = dyn_cast<Instruction>(TripCount);
|
||||
ASSERT_NE(AddOne, nullptr);
|
||||
ASSERT_TRUE(AddOne->isBinaryOp());
|
||||
auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
|
||||
@@ -1729,12 +1736,10 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
||||
|
||||
// The original loop iterator should only be used in the condition, in the
|
||||
// increment and in the statement that adds the lower bound to it.
|
||||
Value *IV = CLI->getIndVar();
|
||||
EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
|
||||
|
||||
// The exit block should contain the "fini" call and the barrier call,
|
||||
// plus the call to obtain the thread ID.
|
||||
BasicBlock *ExitBlock = CLI->getExit();
|
||||
size_t NumCallsInExitBlock =
|
||||
count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
|
||||
EXPECT_EQ(NumCallsInExitBlock, 3u);
|
||||
@@ -1785,8 +1790,8 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
||||
Value *IV = CLI->getIndVar();
|
||||
|
||||
InsertPointTy EndIP =
|
||||
OMPBuilder.createDynamicWorkshareLoop(Loc, CLI, AllocaIP, SchedType,
|
||||
/*NeedsBarrier=*/true, ChunkVal);
|
||||
OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
|
||||
/*NeedsBarrier=*/true, ChunkVal);
|
||||
// The returned value should be the "after" point.
|
||||
ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
|
||||
ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
|
||||
|
||||
@@ -338,8 +338,8 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
|
||||
findAllocaInsertPoint(builder, moduleTranslation);
|
||||
if (schedule == omp::ClauseScheduleKind::Static) {
|
||||
ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP,
|
||||
!loop.nowait(), chunk);
|
||||
ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
|
||||
!loop.nowait(), chunk);
|
||||
} else {
|
||||
llvm::omp::OMPScheduleType schedType;
|
||||
switch (schedule) {
|
||||
@@ -360,8 +360,8 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
||||
break;
|
||||
}
|
||||
|
||||
ompBuilder->createDynamicWorkshareLoop(ompLoc, loopInfo, allocaIP,
|
||||
schedType, !loop.nowait(), chunk);
|
||||
ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
|
||||
schedType, !loop.nowait(), chunk);
|
||||
}
|
||||
|
||||
// Continue building IR after the loop. Note that the LoopInfo returned by
|
||||
|
||||
Reference in New Issue
Block a user