[flang] add a pass to move array temporaries to the stack

This pass implements the `-fstack-arrays` flag. See the RFC in
`flang/docs/fstack-arrays.md` for more information.

Differential revision: https://reviews.llvm.org/D140415
This commit is contained in:
Tom Eccles
2022-12-09 18:07:31 +00:00
parent 1637351fd1
commit cc14bf22bd
14 changed files with 1261 additions and 14 deletions

View File

@@ -2299,6 +2299,7 @@ flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp
flang/lib/Optimizer/Transforms/PassDetail.h
flang/lib/Optimizer/Transforms/RewriteLoop.cpp
flang/lib/Optimizer/Transforms/StackArrays.cpp
flang/lib/Parser/basic-parsers.h
flang/lib/Parser/char-block.cpp
flang/lib/Parser/char-buffer.cpp

View File

@@ -127,8 +127,8 @@ void genFinalization(fir::FirOpBuilder &builder, mlir::Location loc,
void genInlinedAllocation(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
mlir::ValueRange lbounds, mlir::ValueRange extents,
mlir::ValueRange lenParams,
llvm::StringRef allocName);
mlir::ValueRange lenParams, llvm::StringRef allocName,
bool mustBeHeap = false);
void genInlinedDeallocate(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box);

View File

@@ -57,6 +57,15 @@ public:
mlir::Type getType() const;
};
/// Attribute which can be applied to a fir.allocmem operation, specifying that
/// the allocation may not be moved to the heap by passes
class MustBeHeapAttr : public mlir::BoolAttr {
public:
using BoolAttr::BoolAttr;
static constexpr llvm::StringRef getAttrName() { return "fir.must_be_heap"; }
};
// Attributes for building SELECT CASE multiway branches
/// A closed interval (including the bound values) is an interval with both an

View File

@@ -55,6 +55,7 @@ std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
std::unique_ptr<mlir::Pass> createStackArraysPass();
std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();

View File

@@ -235,6 +235,16 @@ def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> {
let constructor = "::fir::createMemoryAllocationPass()";
}
def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
let summary = "Move local array allocations from heap memory into stack memory";
let description = [{
Convert heap allocations for arrays, even those of unknown size, into stack
allocations.
}];
let dependentDialects = [ "fir::FIROpsDialect" ];
let constructor = "::fir::createStackArraysPass()";
}
def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
let summary = "Region simplification";
let description = [{

View File

@@ -428,7 +428,8 @@ private:
}
}
fir::factory::genInlinedAllocation(builder, loc, box, lbounds, extents,
lenParams, mangleAlloc(alloc));
lenParams, mangleAlloc(alloc),
/*mustBeHeap=*/true);
}
void genSimpleAllocation(const Allocation &alloc,

View File

@@ -16,6 +16,7 @@
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Runtime/Stop.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Support/FatalError.h"
@@ -719,13 +720,11 @@ static mlir::Value allocateAndInitNewStorage(fir::FirOpBuilder &builder,
return newStorage;
}
void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder,
mlir::Location loc,
const fir::MutableBoxValue &box,
mlir::ValueRange lbounds,
mlir::ValueRange extents,
mlir::ValueRange lenParams,
llvm::StringRef allocName) {
void fir::factory::genInlinedAllocation(
fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box, mlir::ValueRange lbounds,
mlir::ValueRange extents, mlir::ValueRange lenParams,
llvm::StringRef allocName, bool mustBeHeap) {
auto lengths = getNewLengths(builder, loc, box, lenParams);
llvm::SmallVector<mlir::Value> safeExtents;
for (mlir::Value extent : extents)
@@ -742,6 +741,9 @@ void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder,
mlir::Value irBox = fir::factory::getMutableIRBox(builder, loc, box);
fir::runtime::genDerivedTypeInitialize(builder, loc, irBox);
}
heap->setAttr(fir::MustBeHeapAttr::getAttrName(),
fir::MustBeHeapAttr::get(builder.getContext(), mustBeHeap));
}
void fir::factory::genInlinedDeallocate(fir::FirOpBuilder &builder,

View File

@@ -8,6 +8,7 @@ add_flang_library(FIRTransforms
ArrayValueCopy.cpp
ExternalNameConversion.cpp
MemoryAllocation.cpp
StackArrays.cpp
MemRefDataFlowOpt.cpp
SimplifyRegionLite.cpp
AlgebraicSimplification.cpp

View File

@@ -0,0 +1,773 @@
//===- StackArrays.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/FIRContext.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
#include "mlir/Analysis/DataFlowFramework.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Value.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
namespace fir {
#define GEN_PASS_DEF_STACKARRAYS
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir
#define DEBUG_TYPE "stack-arrays"
namespace {
/// The state of an SSA value at each program point
enum class AllocationState {
/// This means that the allocation state of a variable cannot be determined
/// at this program point, e.g. because one route through a conditional freed
/// the variable and the other route didn't.
/// This asserts a known-unknown: different from the unknown-unknown of having
/// no AllocationState stored for a particular SSA value
Unknown,
/// Means this SSA value was allocated on the heap in this function and has
/// now been freed
Freed,
/// Means this SSA value was allocated on the heap in this function and is a
/// candidate for moving to the stack
Allocated,
};
/// Stores where an alloca should be inserted. If the PointerUnion is an
/// Operation the alloca should be inserted /after/ the operation. If it is a
/// block, the alloca can be placed anywhere in that block.
class InsertionPoint {
llvm::PointerUnion<mlir::Operation *, mlir::Block *> location;
bool saveRestoreStack;
/// Get contained pointer type or nullptr
template <class T>
T *tryGetPtr() const {
if (location.is<T *>())
return location.get<T *>();
return nullptr;
}
public:
template <class T>
InsertionPoint(T *ptr, bool saveRestoreStack = false)
: location(ptr), saveRestoreStack{saveRestoreStack} {}
InsertionPoint(std::nullptr_t null)
: location(null), saveRestoreStack{false} {}
/// Get contained operation, or nullptr
mlir::Operation *tryGetOperation() const {
return tryGetPtr<mlir::Operation>();
}
/// Get contained block, or nullptr
mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); }
/// Get whether the stack should be saved/restored. If yes, an llvm.stacksave
/// intrinsic should be added before the alloca, and an llvm.stackrestore
/// intrinsic should be added where the freemem is
bool shouldSaveRestoreStack() const { return saveRestoreStack; }
operator bool() const { return tryGetOperation() || tryGetBlock(); }
bool operator==(const InsertionPoint &rhs) const {
return (location == rhs.location) &&
(saveRestoreStack == rhs.saveRestoreStack);
}
bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); }
};
/// Maps SSA values to their AllocationState at a particular program point.
/// Also caches the insertion points for the new alloca operations
class LatticePoint : public mlir::dataflow::AbstractDenseLattice {
// Maps all values we are interested in to states
llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap;
public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint)
using AbstractDenseLattice::AbstractDenseLattice;
bool operator==(const LatticePoint &rhs) const {
return stateMap == rhs.stateMap;
}
/// Join the lattice accross control-flow edges
mlir::ChangeResult join(const AbstractDenseLattice &lattice) override;
void print(llvm::raw_ostream &os) const override;
/// Clear all modifications
mlir::ChangeResult reset();
/// Set the state of an SSA value
mlir::ChangeResult set(mlir::Value value, AllocationState state);
/// Get fir.allocmem ops which were allocated in this function and always
/// freed before the function returns, plus whre to insert replacement
/// fir.alloca ops
void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const;
std::optional<AllocationState> get(mlir::Value val) const;
};
class AllocationAnalysis
: public mlir::dataflow::DenseDataFlowAnalysis<LatticePoint> {
public:
using DenseDataFlowAnalysis::DenseDataFlowAnalysis;
void visitOperation(mlir::Operation *op, const LatticePoint &before,
LatticePoint *after) override;
/// At an entry point, the last modifications of all memory resources are
/// yet to be determined
void setToEntryState(LatticePoint *lattice) override;
protected:
/// Visit control flow operations and decide whether to call visitOperation
/// to apply the transfer function
void processOperation(mlir::Operation *op) override;
};
/// Drives analysis to find candidate fir.allocmem operations which could be
/// moved to the stack. Intended to be used with mlir::Pass::getAnalysis
class StackArraysAnalysisWrapper {
public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)
// Maps fir.allocmem -> place to insert alloca
using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;
StackArraysAnalysisWrapper(mlir::Operation *op) {}
bool hasErrors() const;
const AllocMemMap &getCandidateOps(mlir::Operation *func);
private:
llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;
bool gotError = false;
void analyseFunction(mlir::Operation *func);
};
/// Converts a fir.allocmem to a fir.alloca
class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
public:
using OpRewritePattern::OpRewritePattern;
AllocMemConversion(
mlir::MLIRContext *ctx,
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps);
mlir::LogicalResult
matchAndRewrite(fir::AllocMemOp allocmem,
mlir::PatternRewriter &rewriter) const override;
/// Determine where to insert the alloca operation. The returned value should
/// be checked to see if it is inside a loop
static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc);
private:
/// allocmem operations that DFA has determined are safe to move to the stack
/// mapping to where to insert replacement freemem operations
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps;
/// If we failed to find an insertion point not inside a loop, see if it would
/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc);
/// Returns the alloca if it was successfully inserted, otherwise {}
std::optional<fir::AllocaOp>
insertAlloca(fir::AllocMemOp &oldAlloc,
mlir::PatternRewriter &rewriter) const;
/// Inserts a stacksave before oldAlloc and a stackrestore after each freemem
void insertStackSaveRestore(fir::AllocMemOp &oldAlloc,
mlir::PatternRewriter &rewriter) const;
};
class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> {
public:
StackArraysPass() = default;
StackArraysPass(const StackArraysPass &pass);
llvm::StringRef getDescription() const override;
void runOnOperation() override;
void runOnFunc(mlir::Operation *func);
private:
Statistic runCount{this, "stackArraysRunCount",
"Number of heap allocations moved to the stack"};
};
} // namespace
static void print(llvm::raw_ostream &os, AllocationState state) {
switch (state) {
case AllocationState::Unknown:
os << "Unknown";
break;
case AllocationState::Freed:
os << "Freed";
break;
case AllocationState::Allocated:
os << "Allocated";
break;
}
}
/// Join two AllocationStates for the same value coming from different CFG
/// blocks
static AllocationState join(AllocationState lhs, AllocationState rhs) {
// | Allocated | Freed | Unknown
// ========= | ========= | ========= | =========
// Allocated | Allocated | Unknown | Unknown
// Freed | Unknown | Freed | Unknown
// Unknown | Unknown | Unknown | Unknown
if (lhs == rhs)
return lhs;
return AllocationState::Unknown;
}
mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
const auto &rhs = static_cast<const LatticePoint &>(lattice);
mlir::ChangeResult changed = mlir::ChangeResult::NoChange;
// add everything from rhs to map, handling cases where values are in both
for (const auto &[value, rhsState] : rhs.stateMap) {
auto it = stateMap.find(value);
if (it != stateMap.end()) {
// value is present in both maps
AllocationState myState = it->second;
AllocationState newState = ::join(myState, rhsState);
if (newState != myState) {
changed = mlir::ChangeResult::Change;
it->getSecond() = newState;
}
} else {
// value not present in current map: add it
stateMap.insert({value, rhsState});
changed = mlir::ChangeResult::Change;
}
}
return changed;
}
void LatticePoint::print(llvm::raw_ostream &os) const {
for (const auto &[value, state] : stateMap) {
os << value << ": ";
::print(os, state);
}
}
mlir::ChangeResult LatticePoint::reset() {
if (stateMap.empty())
return mlir::ChangeResult::NoChange;
stateMap.clear();
return mlir::ChangeResult::Change;
}
mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) {
if (stateMap.count(value)) {
// already in map
AllocationState &oldState = stateMap[value];
if (oldState != state) {
stateMap[value] = state;
return mlir::ChangeResult::Change;
}
return mlir::ChangeResult::NoChange;
}
stateMap.insert({value, state});
return mlir::ChangeResult::Change;
}
/// Get values which were allocated in this function and always freed before
/// the function returns
void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const {
for (auto &[value, state] : stateMap) {
if (state == AllocationState::Freed)
out.insert(value);
}
}
std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
auto it = stateMap.find(val);
if (it == stateMap.end())
return {};
return it->second;
}
void AllocationAnalysis::visitOperation(mlir::Operation *op,
const LatticePoint &before,
LatticePoint *after) {
LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
<< "\n");
LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
// propagate before -> after
mlir::ChangeResult changed = after->join(before);
if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) {
assert(op->getNumResults() == 1 && "fir.allocmem has one result");
auto attr = op->getAttrOfType<fir::MustBeHeapAttr>(
fir::MustBeHeapAttr::getAttrName());
if (attr && attr.getValue()) {
LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n");
// skip allocation marked not to be moved
return;
}
auto retTy = allocmem.getAllocatedType();
if (!retTy.isa<fir::SequenceType>()) {
LLVM_DEBUG(llvm::dbgs()
<< "--Allocation is not for an array: skipping\n");
return;
}
mlir::Value result = op->getResult(0);
changed |= after->set(result, AllocationState::Allocated);
} else if (mlir::isa<fir::FreeMemOp>(op)) {
assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
mlir::Value operand = op->getOperand(0);
std::optional<AllocationState> operandState = before.get(operand);
if (operandState && *operandState == AllocationState::Allocated) {
// don't tag things not allocated in this function as freed, so that we
// don't think they are candidates for moving to the stack
changed |= after->set(operand, AllocationState::Freed);
}
} else if (mlir::isa<fir::ResultOp>(op)) {
mlir::Operation *parent = op->getParentOp();
LatticePoint *parentLattice = getLattice(parent);
assert(parentLattice);
mlir::ChangeResult parentChanged = parentLattice->join(*after);
propagateIfChanged(parentLattice, parentChanged);
}
// we pass lattices straight through fir.call because called functions should
// not deallocate flang-generated array temporaries
LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n");
propagateIfChanged(after, changed);
}
void AllocationAnalysis::setToEntryState(LatticePoint *lattice) {
propagateIfChanged(lattice, lattice->reset());
}
/// Mostly a copy of AbstractDenseLattice::processOperation - the difference
/// being that call operations are passed through to the transfer function
void AllocationAnalysis::processOperation(mlir::Operation *op) {
// If the containing block is not executable, bail out.
if (!getOrCreateFor<mlir::dataflow::Executable>(op, op->getBlock())->isLive())
return;
// Get the dense lattice to update
mlir::dataflow::AbstractDenseLattice *after = getLattice(op);
// If this op implements region control-flow, then control-flow dictates its
// transfer function.
if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op))
return visitRegionBranchOperation(op, branch, after);
// pass call operations through to the transfer function
// Get the dense state before the execution of the op.
const mlir::dataflow::AbstractDenseLattice *before;
if (mlir::Operation *prev = op->getPrevNode())
before = getLatticeFor(op, prev);
else
before = getLatticeFor(op, op->getBlock());
/// Invoke the operation transfer function
visitOperationImpl(op, *before, after);
}
void StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
assert(mlir::isa<mlir::func::FuncOp>(func));
mlir::DataFlowSolver solver;
// constant propagation is required for dead code analysis, dead code analysis
// is required to mark blocks live (required for mlir dense dfa)
solver.load<mlir::dataflow::SparseConstantPropagation>();
solver.load<mlir::dataflow::DeadCodeAnalysis>();
auto [it, inserted] = funcMaps.try_emplace(func);
AllocMemMap &candidateOps = it->second;
solver.load<AllocationAnalysis>();
if (failed(solver.initializeAndRun(func))) {
llvm::errs() << "DataFlowSolver failed!";
gotError = true;
return;
}
LatticePoint point{func};
func->walk([&](mlir::func::ReturnOp child) {
const LatticePoint *lattice = solver.lookupState<LatticePoint>(child);
// there will be no lattice for an unreachable block
if (lattice)
point.join(*lattice);
});
llvm::DenseSet<mlir::Value> freedValues;
point.appendFreedValues(freedValues);
// We only replace allocations which are definately freed on all routes
// through the function because otherwise the allocation may have an intende
// lifetime longer than the current stack frame (e.g. a heap allocation which
// is then freed by another function).
for (mlir::Value freedValue : freedValues) {
fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>();
InsertionPoint insertionPoint =
AllocMemConversion::findAllocaInsertionPoint(allocmem);
if (insertionPoint)
candidateOps.insert({allocmem, insertionPoint});
}
LLVM_DEBUG(for (auto [allocMemOp, _]
: candidateOps) {
llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n';
});
}
bool StackArraysAnalysisWrapper::hasErrors() const { return gotError; }
const StackArraysAnalysisWrapper::AllocMemMap &
StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {
if (!funcMaps.count(func))
analyseFunction(func);
return funcMaps[func];
}
AllocMemConversion::AllocMemConversion(
mlir::MLIRContext *ctx,
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps)
: OpRewritePattern(ctx), candidateOps(candidateOps) {}
mlir::LogicalResult
AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
mlir::PatternRewriter &rewriter) const {
auto oldInsertionPt = rewriter.saveInsertionPoint();
// add alloca operation
std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
rewriter.restoreInsertionPoint(oldInsertionPt);
if (!alloca)
return mlir::failure();
// remove freemem operations
for (mlir::Operation *user : allocmem.getOperation()->getUsers())
if (mlir::isa<fir::FreeMemOp>(user))
rewriter.eraseOp(user);
// replace references to heap allocation with references to stack allocation
rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult());
// remove allocmem operation
rewriter.eraseOp(allocmem.getOperation());
return mlir::success();
}
// TODO: use mlir::blockIsInLoop once D141401 is merged
static bool isInLoop(mlir::Block *block) {
mlir::Operation *parent = block->getParentOp();
// The block could be inside a loop-like operation
if (mlir::isa<mlir::LoopLikeOpInterface>(parent) ||
parent->getParentOfType<mlir::LoopLikeOpInterface>())
return true;
// This block might be nested inside another block, which is in a loop
if (!mlir::isa<mlir::FunctionOpInterface>(parent))
if (isInLoop(parent->getBlock()))
return true;
// Or the block could be inside a control flow graph loop:
// A block is in a control flow graph loop if it can reach itself in a graph
// traversal
llvm::DenseSet<mlir::Block *> visited;
llvm::SmallVector<mlir::Block *> stack;
stack.push_back(block);
while (!stack.empty()) {
mlir::Block *current = stack.pop_back_val();
auto [it, inserted] = visited.insert(current);
if (!inserted) {
// loop detected
if (current == block)
return true;
continue;
}
stack.reserve(stack.size() + current->getNumSuccessors());
for (mlir::Block *successor : current->getSuccessors())
stack.push_back(successor);
}
return false;
}
static bool isInLoop(mlir::Operation *op) {
return isInLoop(op->getBlock()) ||
op->getParentOfType<mlir::LoopLikeOpInterface>();
}
InsertionPoint
AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
// Ideally the alloca should be inserted at the end of the function entry
// block so that we do not allocate stack space in a loop. However,
// the operands to the alloca may not be available that early, so insert it
// after the last operand becomes available
// If the old allocmem op was in an openmp region then it should not be moved
// outside of that
LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: "
<< oldAlloc << "\n");
// check that an Operation or Block we are about to return is not in a loop
auto checkReturn = [&](auto *point) -> InsertionPoint {
if (isInLoop(point)) {
mlir::Operation *oldAllocOp = oldAlloc.getOperation();
if (isInLoop(oldAllocOp)) {
// where we want to put it is in a loop, and even the old location is in
// a loop. Give up.
return findAllocaLoopInsertionPoint(oldAlloc);
}
return {oldAllocOp};
}
return {point};
};
auto oldOmpRegion =
oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
// Find when the last operand value becomes available
mlir::Block *operandsBlock = nullptr;
mlir::Operation *lastOperand = nullptr;
for (mlir::Value operand : oldAlloc.getOperands()) {
LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n");
mlir::Operation *op = operand.getDefiningOp();
if (!op)
return checkReturn(oldAlloc.getOperation());
if (!operandsBlock)
operandsBlock = op->getBlock();
else if (operandsBlock != op->getBlock()) {
LLVM_DEBUG(llvm::dbgs()
<< "----operand declared in a different block!\n");
// Operation::isBeforeInBlock requires the operations to be in the same
// block. The best we can do is the location of the allocmem.
return checkReturn(oldAlloc.getOperation());
}
if (!lastOperand || lastOperand->isBeforeInBlock(op))
lastOperand = op;
}
if (lastOperand) {
// there were value operands to the allocmem so insert after the last one
LLVM_DEBUG(llvm::dbgs()
<< "--Placing after last operand: " << *lastOperand << "\n");
// check we aren't moving out of an omp region
auto lastOpOmpRegion =
lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
if (lastOpOmpRegion == oldOmpRegion)
return checkReturn(lastOperand);
// Presumably this happened because the operands became ready before the
// start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should
// imply that oldOmpRegion comes after lastOpOmpRegion.
return checkReturn(oldOmpRegion.getAllocaBlock());
}
// There were no value operands to the allocmem so we are safe to insert it
// as early as we want
// handle openmp case
if (oldOmpRegion)
return checkReturn(oldOmpRegion.getAllocaBlock());
// fall back to the function entry block
mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>();
assert(func && "This analysis is run on func.func");
mlir::Block &entryBlock = func.getBlocks().front();
LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n");
return checkReturn(&entryBlock);
}
InsertionPoint
AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
mlir::Operation *oldAllocOp = oldAlloc;
// This is only called as a last resort. We should try to insert at the
// location of the old allocation, which is inside of a loop, using
// llvm.stacksave/llvm.stackrestore
// find freemem ops
llvm::SmallVector<mlir::Operation *, 1> freeOps;
for (mlir::Operation *user : oldAllocOp->getUsers())
if (mlir::isa<fir::FreeMemOp>(user))
freeOps.push_back(user);
assert(freeOps.size() && "DFA should only return freed memory");
// Don't attempt to reason about a stacksave/stackrestore between different
// blocks
for (mlir::Operation *free : freeOps)
if (free->getBlock() != oldAllocOp->getBlock())
return {nullptr};
// Check that there aren't any other stack allocations in between the
// stack save and stack restore
// note: for flang generated temporaries there should only be one free op
for (mlir::Operation *free : freeOps) {
for (mlir::Operation *op = oldAlloc; op && op != free;
op = op->getNextNode()) {
if (mlir::isa<fir::AllocaOp>(op))
return {nullptr};
}
}
return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true};
}
std::optional<fir::AllocaOp>
AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc,
mlir::PatternRewriter &rewriter) const {
auto it = candidateOps.find(oldAlloc.getOperation());
if (it == candidateOps.end())
return {};
InsertionPoint insertionPoint = it->second;
if (!insertionPoint)
return {};
if (insertionPoint.shouldSaveRestoreStack())
insertStackSaveRestore(oldAlloc, rewriter);
mlir::Location loc = oldAlloc.getLoc();
mlir::Type varTy = oldAlloc.getInType();
if (mlir::Operation *op = insertionPoint.tryGetOperation()) {
rewriter.setInsertionPointAfter(op);
} else {
mlir::Block *block = insertionPoint.tryGetBlock();
assert(block && "There must be a valid insertion point");
rewriter.setInsertionPointToStart(block);
}
auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
if (opt)
return *opt;
return {};
};
llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName());
llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName());
return rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName,
oldAlloc.getTypeparams(),
oldAlloc.getShape());
}
void AllocMemConversion::insertStackSaveRestore(
fir::AllocMemOp &oldAlloc, mlir::PatternRewriter &rewriter) const {
auto oldPoint = rewriter.saveInsertionPoint();
auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>();
fir::KindMapping kindMap = fir::getKindMapping(mod);
fir::FirOpBuilder builder{rewriter, kindMap};
mlir::func::FuncOp stackSaveFn = fir::factory::getLlvmStackSave(builder);
mlir::SymbolRefAttr stackSaveSym =
builder.getSymbolRefAttr(stackSaveFn.getName());
builder.setInsertionPoint(oldAlloc);
mlir::Value sp =
builder
.create<fir::CallOp>(oldAlloc.getLoc(),
stackSaveFn.getFunctionType().getResults(),
stackSaveSym, mlir::ValueRange{})
.getResult(0);
mlir::func::FuncOp stackRestoreFn =
fir::factory::getLlvmStackRestore(builder);
mlir::SymbolRefAttr stackRestoreSym =
builder.getSymbolRefAttr(stackRestoreFn.getName());
for (mlir::Operation *user : oldAlloc->getUsers()) {
if (mlir::isa<fir::FreeMemOp>(user)) {
builder.setInsertionPoint(user);
builder.create<fir::CallOp>(user->getLoc(),
stackRestoreFn.getFunctionType().getResults(),
stackRestoreSym, mlir::ValueRange{sp});
}
}
rewriter.restoreInsertionPoint(oldPoint);
}
StackArraysPass::StackArraysPass(const StackArraysPass &pass)
: fir::impl::StackArraysBase<StackArraysPass>(pass) {}
llvm::StringRef StackArraysPass::getDescription() const {
return "Move heap allocated array temporaries to the stack";
}
void StackArraysPass::runOnOperation() {
mlir::ModuleOp mod = getOperation();
mod.walk([this](mlir::func::FuncOp func) { runOnFunc(func); });
}
void StackArraysPass::runOnFunc(mlir::Operation *func) {
assert(mlir::isa<mlir::func::FuncOp>(func));
auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();
const auto &candidateOps = analysis.getCandidateOps(func);
if (analysis.hasErrors()) {
signalPassFailure();
return;
}
if (candidateOps.empty())
return;
runCount += candidateOps.size();
mlir::MLIRContext &context = getContext();
mlir::RewritePatternSet patterns(&context);
mlir::ConversionTarget target(context);
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
mlir::func::FuncDialect>();
target.addDynamicallyLegalOp<fir::AllocMemOp>([&](fir::AllocMemOp alloc) {
return !candidateOps.count(alloc.getOperation());
});
patterns.insert<AllocMemConversion>(&context, candidateOps);
if (mlir::failed(
mlir::applyPartialConversion(func, target, std::move(patterns)))) {
mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");
signalPassFailure();
}
}
std::unique_ptr<mlir::Pass> fir::createStackArraysPass() {
return std::make_unique<StackArraysPass>();
}

View File

@@ -20,7 +20,7 @@ subroutine allocation(x)
! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {uniq_name = "_QFallocationEx.alloc"}
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFallocationEx.alloc"}
! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) typeparams %[[VAL_2]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
! CHECK: fir.store %[[VAL_17]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
@@ -84,7 +84,7 @@ subroutine alloc_comp(x)
! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : index
! CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_8]], %[[VAL_9]] : index
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {uniq_name = "_QEa.alloc"}
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {fir.must_be_heap = true, uniq_name = "_QEa.alloc"}
! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
! CHECK: fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>

View File

@@ -177,7 +177,7 @@ end
! CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ptr<i32>
! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "ptr", uniq_name = "_QFc_loc_non_save_pointer_scalarEptr"}
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<i32>) -> !fir.ptr<i32>
! CHECK: fir.store %[[VAL_5]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
! CHECK: %[[VAL_6:.*]] = arith.constant 10 : i32

View File

@@ -43,7 +43,7 @@ subroutine ss(count)
! CHECK: %[[V_6:[0-9]+]] = fir.alloca i64 {bindc_name = "count_rate_", fir.target, uniq_name = "_QFssEcount_rate_"}
! CHECK: %[[V_7:[0-9]+]] = fir.convert %[[V_6]] : (!fir.ref<i64>) -> !fir.ptr<i64>
! CHECK: fir.store %[[V_7]] to %[[V_4]] : !fir.ref<!fir.ptr<i64>>
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {uniq_name = "_QFssEcount_max.alloc"}
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {fir.must_be_heap = true, uniq_name = "_QFssEcount_max.alloc"}
! CHECK: fir.store %[[V_8]] to %[[V_1]] : !fir.ref<!fir.heap<i64>>
! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_4]] : !fir.ref<!fir.ptr<i64>>
! CHECK: %[[V_10:[0-9]+]] = fir.load %[[V_1]] : !fir.ref<!fir.heap<i64>>

View File

@@ -0,0 +1,140 @@
! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s
! check simple array value copy case
subroutine array_value_copy_simple(arr)
integer, intent(inout) :: arr(4)
arr(3:4) = arr(1:2)
end subroutine
! CHECK-LABEL: func.func @_QParray_value_copy_simple(%arg0: !fir.ref<!fir.array<4xi32>>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<4xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
! check complex array value copy case
module stuff
type DerivedWithAllocatable
integer, dimension(:), allocatable :: dat
end type
contains
subroutine array_value_copy_complex(arr)
type(DerivedWithAllocatable), intent(inout) :: arr(:)
arr(3:4) = arr(1:2)
end subroutine
end module
! CHECK: func.func
! CHECK-SAME: array_value_copy_complex
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<?x!fir.type<_QMstuffTderivedwithallocatable
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
subroutine parameter_array_init
integer, parameter :: p(100) = 42
call use_p(p)
end subroutine
! CHECK: func.func
! CHECK-SAME: parameter_array_init
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<100xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
subroutine test_vector_subscripted_section_to_box(v, x)
interface
subroutine takes_box(y)
real :: y(:)
end subroutine
end interface
integer :: v(:)
real :: x(:)
call takes_box(x(v))
end subroutine
! CHECK: func.func
! CHECK-SAME: test_vector_subscripted_section_to_box
! CHECK-NOT: fir.allocmem
! CHECK: fir.alloca !fir.array<?xf32>
! CHECK-NOT: fir.allocmem
! CHECK: fir.call @_QPtakes_box
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
subroutine call_parenthesized_arg(x)
integer :: x(100)
call bar((x))
end subroutine
! CHECK: func.func
! CHECK-SAME: call_parenthesized_arg
! CHECK-NOT: fir.allocmem
! CHECK: fir.alloca !fir.array<100xi32>
! CHECK-NOT: fir.allocmem
! CHECK: fir.call @_QPbar
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
subroutine where_allocatable_assignments(a, b)
integer :: a(:)
integer, allocatable :: b(:)
where(b > 0)
b = a
elsewhere
b(:) = 0
end where
end subroutine
! TODO: broken: passing allocation through fir.result
! CHECK: func.func
! CHECK-SAME: where_allocatable_assignments
! CHECK: return
! CHECK-NEXT: }
subroutine array_constructor(a, b)
real :: a(5), b
real, external :: f
a = [f(b), f(b+1), f(b+2), f(b+5), f(b+11)]
end subroutine
! TODO: broken: realloc
! CHECK: func.func
! CHECK-SAME: array_constructor
! CHECK: return
! CHECK-NEXT: }
subroutine sequence(seq, n)
integer :: n, seq(n)
seq = [(i,i=1,n)]
end subroutine
! TODO: broken: realloc
! CHECK: func.func
! CHECK-SAME: sequence
! CHECK: return
! CHECK-NEXT: }
subroutine CFGLoop(x)
integer, parameter :: k = 100, m=1000000, n = k*m
integer :: x(n)
logical :: has_error
do i=0,m-1
x(k*i+1:k*(i+1)) = x(k*(i+1):k*i+1:-1)
if (has_error(x, k)) stop
end do
end subroutine
! CHECK: func.func
! CHECK-SAME: cfgloop
! CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<100000000xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

View File

@@ -0,0 +1,309 @@
// RUN: fir-opt --stack-arrays %s | FileCheck %s
// Simplest transformation
func.func @simple() {
%0 = fir.allocmem !fir.array<42xi32>
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @simple() {
// CHECK-NEXT: fir.alloca !fir.array<42xi32>
// CHECK-NEXT: return
// CHECK-NEXT: }
// Check fir.must_be_heap allocations are not moved
func.func @must_be_heap() {
%0 = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @must_be_heap() {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: return
// CHECK-NEXT: }
// Check the data-flow-analysis can detect cases where we aren't sure if memory
// is freed by the end of the function
func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
%7 = arith.constant 42 : index
%8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
%9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
%10 = fir.convert %9 : (!fir.logical<4>) -> i1
fir.if %10 {
fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
} else {
}
return
}
// CHECK: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
// CHECK-NEXT: fir.if %[[BOOL]] {
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: } else {
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// Check scf.if (fir.if is not considered a branch operation)
func.func @dfa2(%arg0: i1) {
%a = fir.allocmem !fir.array<1xi8>
scf.if %arg0 {
fir.freemem %a : !fir.heap<!fir.array<1xi8>>
} else {
}
return
}
// CHECK: func.func @dfa2(%arg0: i1) {
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<1xi8>
// CHECK-NEXT: scf.if %arg0 {
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<1xi8>>
// CHECK-NEXT: } else {
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// check the alloca is placed after all operands become available
func.func @placement1() {
// do some stuff with other ssa values
%1 = arith.constant 1 : index
%2 = arith.constant 2 : index
%3 = arith.addi %1, %2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
return
}
// CHECK: func.func @placement1() {
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[ARG:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[ARG]]
// CHECK-NEXT: return
// CHECK-NEXT: }
// check that if there are no operands, then the alloca is placed early
func.func @placement2() {
// do some stuff with other ssa values
%1 = arith.constant 1 : index
%2 = arith.constant 2 : index
%3 = arith.addi %1, %2 : index
%4 = fir.allocmem !fir.array<42xi32>
// ...
fir.freemem %4 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @placement2() {
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
// CHECK-NEXT: return
// CHECK-NEXT: }
// check that stack allocations which must be placed in loops use stacksave
func.func @placement3() {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
fir.result %3, %c1_i32 : index, i32
}
return
}
// CHECK: func.func @placement3() {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: fir.do_loop
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]])
// CHECK-NEXT: fir.result
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// check that stack save/restore are used in CFG loops
func.func @placement4(%arg0 : i1) {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
cf.br ^bb1
^bb1:
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.cond_br %arg0, ^bb1, ^bb2
^bb2:
return
}
// CHECK: func.func @placement4(%arg0: i1) {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]]) : (!fir.ref<i8>) -> ()
// CHECK-NEXT: cf.cond_br %arg0, ^bb1, ^bb2
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }
// check that stacksave is not used when there is an intervening alloca
func.func @placement5() {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
%5 = fir.alloca i32
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
fir.result %3, %c1_i32 : index, i32
}
return
}
// CHECK: func.func @placement5() {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: fir.do_loop
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: %[[IDX:.*]] = fir.alloca i32
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: fir.result
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }
// check that stack save/restore are not used when the memalloc and freemem are
// in different blocks
func.func @placement6(%arg0: i1) {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
cf.br ^bb1
^bb1:
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
cf.cond_br %arg0, ^bb2, ^bb3
^bb2:
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.br ^bb1
^bb3:
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.br ^bb1
}
// CHECK: func.func @placement6(%arg0: i1) {
// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[c1_i32:.*]] = fir.convert %[[c1]] : (index) -> i32
// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[c10:.*]] = arith.constant 10 : index
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[c1]], %[[c2]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[ADD]]
// CHECK-NEXT: cf.cond_br %arg0, ^bb2, ^bb3
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb3:
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: }
// Check multiple returns, where the memory is always freed
func.func @returns(%arg0: i1) {
%0 = fir.allocmem !fir.array<42xi32>
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
^bb2:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @returns(%[[COND:.*]]: i1) {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.alloca !fir.array<42xi32>
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: return
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }
// Check multiple returns, where the memory is not freed on one branch
func.func @returns2(%arg0: i1) {
%0 = fir.allocmem !fir.array<42xi32>
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
^bb2:
return
}
// CHECK: func.func @returns2(%[[COND:.*]]: i1) {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32>
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: return
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }
// Check allocations are not moved outside of an omp region
func.func @omp_placement1() {
omp.sections {
omp.section {
%mem = fir.allocmem !fir.array<42xi32>
fir.freemem %mem : !fir.heap<!fir.array<42xi32>>
omp.terminator
}
omp.terminator
}
return
}
// CHECK: func.func @omp_placement1() {
// CHECK-NEXT: omp.sections {
// CHECK-NEXT: omp.section {
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
// TODO: this allocation should be moved to the stack. Unfortunately, the data
// flow analysis fails to propogate the lattice out of the omp region to the
// return satement.
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }