mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[flang] add a pass to move array temporaries to the stack
This pass implements the `-fstack-arrays` flag. See the RFC in `flang/docs/fstack-arrays.md` for more information. Differential revision: https://reviews.llvm.org/D140415
This commit is contained in:
@@ -2299,6 +2299,7 @@ flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
|
||||
flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp
|
||||
flang/lib/Optimizer/Transforms/PassDetail.h
|
||||
flang/lib/Optimizer/Transforms/RewriteLoop.cpp
|
||||
flang/lib/Optimizer/Transforms/StackArrays.cpp
|
||||
flang/lib/Parser/basic-parsers.h
|
||||
flang/lib/Parser/char-block.cpp
|
||||
flang/lib/Parser/char-buffer.cpp
|
||||
|
||||
@@ -127,8 +127,8 @@ void genFinalization(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
void genInlinedAllocation(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
const fir::MutableBoxValue &box,
|
||||
mlir::ValueRange lbounds, mlir::ValueRange extents,
|
||||
mlir::ValueRange lenParams,
|
||||
llvm::StringRef allocName);
|
||||
mlir::ValueRange lenParams, llvm::StringRef allocName,
|
||||
bool mustBeHeap = false);
|
||||
|
||||
void genInlinedDeallocate(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
const fir::MutableBoxValue &box);
|
||||
|
||||
@@ -57,6 +57,15 @@ public:
|
||||
mlir::Type getType() const;
|
||||
};
|
||||
|
||||
/// Attribute which can be applied to a fir.allocmem operation, specifying that
|
||||
/// the allocation may not be moved to the heap by passes
|
||||
class MustBeHeapAttr : public mlir::BoolAttr {
|
||||
public:
|
||||
using BoolAttr::BoolAttr;
|
||||
|
||||
static constexpr llvm::StringRef getAttrName() { return "fir.must_be_heap"; }
|
||||
};
|
||||
|
||||
// Attributes for building SELECT CASE multiway branches
|
||||
|
||||
/// A closed interval (including the bound values) is an interval with both an
|
||||
|
||||
@@ -55,6 +55,7 @@ std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
|
||||
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
|
||||
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
|
||||
std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
|
||||
std::unique_ptr<mlir::Pass> createStackArraysPass();
|
||||
std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
|
||||
std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();
|
||||
|
||||
|
||||
@@ -235,6 +235,16 @@ def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> {
|
||||
let constructor = "::fir::createMemoryAllocationPass()";
|
||||
}
|
||||
|
||||
def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
|
||||
let summary = "Move local array allocations from heap memory into stack memory";
|
||||
let description = [{
|
||||
Convert heap allocations for arrays, even those of unknown size, into stack
|
||||
allocations.
|
||||
}];
|
||||
let dependentDialects = [ "fir::FIROpsDialect" ];
|
||||
let constructor = "::fir::createStackArraysPass()";
|
||||
}
|
||||
|
||||
def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
|
||||
let summary = "Region simplification";
|
||||
let description = [{
|
||||
|
||||
@@ -428,7 +428,8 @@ private:
|
||||
}
|
||||
}
|
||||
fir::factory::genInlinedAllocation(builder, loc, box, lbounds, extents,
|
||||
lenParams, mangleAlloc(alloc));
|
||||
lenParams, mangleAlloc(alloc),
|
||||
/*mustBeHeap=*/true);
|
||||
}
|
||||
|
||||
void genSimpleAllocation(const Allocation &alloc,
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "flang/Optimizer/Builder/Runtime/Derived.h"
|
||||
#include "flang/Optimizer/Builder/Runtime/Stop.h"
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/Support/FatalError.h"
|
||||
@@ -719,13 +720,11 @@ static mlir::Value allocateAndInitNewStorage(fir::FirOpBuilder &builder,
|
||||
return newStorage;
|
||||
}
|
||||
|
||||
void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder,
|
||||
mlir::Location loc,
|
||||
const fir::MutableBoxValue &box,
|
||||
mlir::ValueRange lbounds,
|
||||
mlir::ValueRange extents,
|
||||
mlir::ValueRange lenParams,
|
||||
llvm::StringRef allocName) {
|
||||
void fir::factory::genInlinedAllocation(
|
||||
fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
const fir::MutableBoxValue &box, mlir::ValueRange lbounds,
|
||||
mlir::ValueRange extents, mlir::ValueRange lenParams,
|
||||
llvm::StringRef allocName, bool mustBeHeap) {
|
||||
auto lengths = getNewLengths(builder, loc, box, lenParams);
|
||||
llvm::SmallVector<mlir::Value> safeExtents;
|
||||
for (mlir::Value extent : extents)
|
||||
@@ -742,6 +741,9 @@ void fir::factory::genInlinedAllocation(fir::FirOpBuilder &builder,
|
||||
mlir::Value irBox = fir::factory::getMutableIRBox(builder, loc, box);
|
||||
fir::runtime::genDerivedTypeInitialize(builder, loc, irBox);
|
||||
}
|
||||
|
||||
heap->setAttr(fir::MustBeHeapAttr::getAttrName(),
|
||||
fir::MustBeHeapAttr::get(builder.getContext(), mustBeHeap));
|
||||
}
|
||||
|
||||
void fir::factory::genInlinedDeallocate(fir::FirOpBuilder &builder,
|
||||
|
||||
@@ -8,6 +8,7 @@ add_flang_library(FIRTransforms
|
||||
ArrayValueCopy.cpp
|
||||
ExternalNameConversion.cpp
|
||||
MemoryAllocation.cpp
|
||||
StackArrays.cpp
|
||||
MemRefDataFlowOpt.cpp
|
||||
SimplifyRegionLite.cpp
|
||||
AlgebraicSimplification.cpp
|
||||
|
||||
773
flang/lib/Optimizer/Transforms/StackArrays.cpp
Normal file
773
flang/lib/Optimizer/Transforms/StackArrays.cpp
Normal file
@@ -0,0 +1,773 @@
|
||||
//===- StackArrays.cpp ----------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
||||
#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
|
||||
#include "flang/Optimizer/Dialect/FIRAttr.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIROps.h"
|
||||
#include "flang/Optimizer/Dialect/FIRType.h"
|
||||
#include "flang/Optimizer/Support/FIRContext.h"
|
||||
#include "flang/Optimizer/Transforms/Passes.h"
|
||||
#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
|
||||
#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
|
||||
#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
|
||||
#include "mlir/Analysis/DataFlowFramework.h"
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Diagnostics.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
#include "mlir/Interfaces/LoopLikeInterface.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Support/LogicalResult.h"
|
||||
#include "mlir/Transforms/DialectConversion.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/PointerUnion.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <optional>
|
||||
|
||||
namespace fir {
|
||||
#define GEN_PASS_DEF_STACKARRAYS
|
||||
#include "flang/Optimizer/Transforms/Passes.h.inc"
|
||||
} // namespace fir
|
||||
|
||||
#define DEBUG_TYPE "stack-arrays"
|
||||
|
||||
namespace {
|
||||
|
||||
/// The state of an SSA value at each program point
|
||||
enum class AllocationState {
|
||||
/// This means that the allocation state of a variable cannot be determined
|
||||
/// at this program point, e.g. because one route through a conditional freed
|
||||
/// the variable and the other route didn't.
|
||||
/// This asserts a known-unknown: different from the unknown-unknown of having
|
||||
/// no AllocationState stored for a particular SSA value
|
||||
Unknown,
|
||||
/// Means this SSA value was allocated on the heap in this function and has
|
||||
/// now been freed
|
||||
Freed,
|
||||
/// Means this SSA value was allocated on the heap in this function and is a
|
||||
/// candidate for moving to the stack
|
||||
Allocated,
|
||||
};
|
||||
|
||||
/// Stores where an alloca should be inserted. If the PointerUnion is an
|
||||
/// Operation the alloca should be inserted /after/ the operation. If it is a
|
||||
/// block, the alloca can be placed anywhere in that block.
|
||||
class InsertionPoint {
|
||||
llvm::PointerUnion<mlir::Operation *, mlir::Block *> location;
|
||||
bool saveRestoreStack;
|
||||
|
||||
/// Get contained pointer type or nullptr
|
||||
template <class T>
|
||||
T *tryGetPtr() const {
|
||||
if (location.is<T *>())
|
||||
return location.get<T *>();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
template <class T>
|
||||
InsertionPoint(T *ptr, bool saveRestoreStack = false)
|
||||
: location(ptr), saveRestoreStack{saveRestoreStack} {}
|
||||
InsertionPoint(std::nullptr_t null)
|
||||
: location(null), saveRestoreStack{false} {}
|
||||
|
||||
/// Get contained operation, or nullptr
|
||||
mlir::Operation *tryGetOperation() const {
|
||||
return tryGetPtr<mlir::Operation>();
|
||||
}
|
||||
|
||||
/// Get contained block, or nullptr
|
||||
mlir::Block *tryGetBlock() const { return tryGetPtr<mlir::Block>(); }
|
||||
|
||||
/// Get whether the stack should be saved/restored. If yes, an llvm.stacksave
|
||||
/// intrinsic should be added before the alloca, and an llvm.stackrestore
|
||||
/// intrinsic should be added where the freemem is
|
||||
bool shouldSaveRestoreStack() const { return saveRestoreStack; }
|
||||
|
||||
operator bool() const { return tryGetOperation() || tryGetBlock(); }
|
||||
|
||||
bool operator==(const InsertionPoint &rhs) const {
|
||||
return (location == rhs.location) &&
|
||||
(saveRestoreStack == rhs.saveRestoreStack);
|
||||
}
|
||||
|
||||
bool operator!=(const InsertionPoint &rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
|
||||
/// Maps SSA values to their AllocationState at a particular program point.
|
||||
/// Also caches the insertion points for the new alloca operations
|
||||
class LatticePoint : public mlir::dataflow::AbstractDenseLattice {
|
||||
// Maps all values we are interested in to states
|
||||
llvm::SmallDenseMap<mlir::Value, AllocationState, 1> stateMap;
|
||||
|
||||
public:
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LatticePoint)
|
||||
using AbstractDenseLattice::AbstractDenseLattice;
|
||||
|
||||
bool operator==(const LatticePoint &rhs) const {
|
||||
return stateMap == rhs.stateMap;
|
||||
}
|
||||
|
||||
/// Join the lattice accross control-flow edges
|
||||
mlir::ChangeResult join(const AbstractDenseLattice &lattice) override;
|
||||
|
||||
void print(llvm::raw_ostream &os) const override;
|
||||
|
||||
/// Clear all modifications
|
||||
mlir::ChangeResult reset();
|
||||
|
||||
/// Set the state of an SSA value
|
||||
mlir::ChangeResult set(mlir::Value value, AllocationState state);
|
||||
|
||||
/// Get fir.allocmem ops which were allocated in this function and always
|
||||
/// freed before the function returns, plus whre to insert replacement
|
||||
/// fir.alloca ops
|
||||
void appendFreedValues(llvm::DenseSet<mlir::Value> &out) const;
|
||||
|
||||
std::optional<AllocationState> get(mlir::Value val) const;
|
||||
};
|
||||
|
||||
class AllocationAnalysis
|
||||
: public mlir::dataflow::DenseDataFlowAnalysis<LatticePoint> {
|
||||
public:
|
||||
using DenseDataFlowAnalysis::DenseDataFlowAnalysis;
|
||||
|
||||
void visitOperation(mlir::Operation *op, const LatticePoint &before,
|
||||
LatticePoint *after) override;
|
||||
|
||||
/// At an entry point, the last modifications of all memory resources are
|
||||
/// yet to be determined
|
||||
void setToEntryState(LatticePoint *lattice) override;
|
||||
|
||||
protected:
|
||||
/// Visit control flow operations and decide whether to call visitOperation
|
||||
/// to apply the transfer function
|
||||
void processOperation(mlir::Operation *op) override;
|
||||
};
|
||||
|
||||
/// Drives analysis to find candidate fir.allocmem operations which could be
|
||||
/// moved to the stack. Intended to be used with mlir::Pass::getAnalysis
|
||||
class StackArraysAnalysisWrapper {
|
||||
public:
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(StackArraysAnalysisWrapper)
|
||||
|
||||
// Maps fir.allocmem -> place to insert alloca
|
||||
using AllocMemMap = llvm::DenseMap<mlir::Operation *, InsertionPoint>;
|
||||
|
||||
StackArraysAnalysisWrapper(mlir::Operation *op) {}
|
||||
|
||||
bool hasErrors() const;
|
||||
|
||||
const AllocMemMap &getCandidateOps(mlir::Operation *func);
|
||||
|
||||
private:
|
||||
llvm::DenseMap<mlir::Operation *, AllocMemMap> funcMaps;
|
||||
bool gotError = false;
|
||||
|
||||
void analyseFunction(mlir::Operation *func);
|
||||
};
|
||||
|
||||
/// Converts a fir.allocmem to a fir.alloca
|
||||
class AllocMemConversion : public mlir::OpRewritePattern<fir::AllocMemOp> {
|
||||
public:
|
||||
using OpRewritePattern::OpRewritePattern;
|
||||
|
||||
AllocMemConversion(
|
||||
mlir::MLIRContext *ctx,
|
||||
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps);
|
||||
|
||||
mlir::LogicalResult
|
||||
matchAndRewrite(fir::AllocMemOp allocmem,
|
||||
mlir::PatternRewriter &rewriter) const override;
|
||||
|
||||
/// Determine where to insert the alloca operation. The returned value should
|
||||
/// be checked to see if it is inside a loop
|
||||
static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc);
|
||||
|
||||
private:
|
||||
/// allocmem operations that DFA has determined are safe to move to the stack
|
||||
/// mapping to where to insert replacement freemem operations
|
||||
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps;
|
||||
|
||||
/// If we failed to find an insertion point not inside a loop, see if it would
|
||||
/// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop
|
||||
static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc);
|
||||
|
||||
/// Returns the alloca if it was successfully inserted, otherwise {}
|
||||
std::optional<fir::AllocaOp>
|
||||
insertAlloca(fir::AllocMemOp &oldAlloc,
|
||||
mlir::PatternRewriter &rewriter) const;
|
||||
|
||||
/// Inserts a stacksave before oldAlloc and a stackrestore after each freemem
|
||||
void insertStackSaveRestore(fir::AllocMemOp &oldAlloc,
|
||||
mlir::PatternRewriter &rewriter) const;
|
||||
};
|
||||
|
||||
class StackArraysPass : public fir::impl::StackArraysBase<StackArraysPass> {
|
||||
public:
|
||||
StackArraysPass() = default;
|
||||
StackArraysPass(const StackArraysPass &pass);
|
||||
|
||||
llvm::StringRef getDescription() const override;
|
||||
|
||||
void runOnOperation() override;
|
||||
void runOnFunc(mlir::Operation *func);
|
||||
|
||||
private:
|
||||
Statistic runCount{this, "stackArraysRunCount",
|
||||
"Number of heap allocations moved to the stack"};
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static void print(llvm::raw_ostream &os, AllocationState state) {
|
||||
switch (state) {
|
||||
case AllocationState::Unknown:
|
||||
os << "Unknown";
|
||||
break;
|
||||
case AllocationState::Freed:
|
||||
os << "Freed";
|
||||
break;
|
||||
case AllocationState::Allocated:
|
||||
os << "Allocated";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Join two AllocationStates for the same value coming from different CFG
|
||||
/// blocks
|
||||
static AllocationState join(AllocationState lhs, AllocationState rhs) {
|
||||
// | Allocated | Freed | Unknown
|
||||
// ========= | ========= | ========= | =========
|
||||
// Allocated | Allocated | Unknown | Unknown
|
||||
// Freed | Unknown | Freed | Unknown
|
||||
// Unknown | Unknown | Unknown | Unknown
|
||||
if (lhs == rhs)
|
||||
return lhs;
|
||||
return AllocationState::Unknown;
|
||||
}
|
||||
|
||||
mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
|
||||
const auto &rhs = static_cast<const LatticePoint &>(lattice);
|
||||
mlir::ChangeResult changed = mlir::ChangeResult::NoChange;
|
||||
|
||||
// add everything from rhs to map, handling cases where values are in both
|
||||
for (const auto &[value, rhsState] : rhs.stateMap) {
|
||||
auto it = stateMap.find(value);
|
||||
if (it != stateMap.end()) {
|
||||
// value is present in both maps
|
||||
AllocationState myState = it->second;
|
||||
AllocationState newState = ::join(myState, rhsState);
|
||||
if (newState != myState) {
|
||||
changed = mlir::ChangeResult::Change;
|
||||
it->getSecond() = newState;
|
||||
}
|
||||
} else {
|
||||
// value not present in current map: add it
|
||||
stateMap.insert({value, rhsState});
|
||||
changed = mlir::ChangeResult::Change;
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
void LatticePoint::print(llvm::raw_ostream &os) const {
|
||||
for (const auto &[value, state] : stateMap) {
|
||||
os << value << ": ";
|
||||
::print(os, state);
|
||||
}
|
||||
}
|
||||
|
||||
mlir::ChangeResult LatticePoint::reset() {
|
||||
if (stateMap.empty())
|
||||
return mlir::ChangeResult::NoChange;
|
||||
stateMap.clear();
|
||||
return mlir::ChangeResult::Change;
|
||||
}
|
||||
|
||||
mlir::ChangeResult LatticePoint::set(mlir::Value value, AllocationState state) {
|
||||
if (stateMap.count(value)) {
|
||||
// already in map
|
||||
AllocationState &oldState = stateMap[value];
|
||||
if (oldState != state) {
|
||||
stateMap[value] = state;
|
||||
return mlir::ChangeResult::Change;
|
||||
}
|
||||
return mlir::ChangeResult::NoChange;
|
||||
}
|
||||
stateMap.insert({value, state});
|
||||
return mlir::ChangeResult::Change;
|
||||
}
|
||||
|
||||
/// Get values which were allocated in this function and always freed before
|
||||
/// the function returns
|
||||
void LatticePoint::appendFreedValues(llvm::DenseSet<mlir::Value> &out) const {
|
||||
for (auto &[value, state] : stateMap) {
|
||||
if (state == AllocationState::Freed)
|
||||
out.insert(value);
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<AllocationState> LatticePoint::get(mlir::Value val) const {
|
||||
auto it = stateMap.find(val);
|
||||
if (it == stateMap.end())
|
||||
return {};
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void AllocationAnalysis::visitOperation(mlir::Operation *op,
|
||||
const LatticePoint &before,
|
||||
LatticePoint *after) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "StackArrays: Visiting operation: " << *op
|
||||
<< "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "--Lattice in: " << before << "\n");
|
||||
|
||||
// propagate before -> after
|
||||
mlir::ChangeResult changed = after->join(before);
|
||||
|
||||
if (auto allocmem = mlir::dyn_cast<fir::AllocMemOp>(op)) {
|
||||
assert(op->getNumResults() == 1 && "fir.allocmem has one result");
|
||||
auto attr = op->getAttrOfType<fir::MustBeHeapAttr>(
|
||||
fir::MustBeHeapAttr::getAttrName());
|
||||
if (attr && attr.getValue()) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "--Found fir.must_be_heap: skipping\n");
|
||||
// skip allocation marked not to be moved
|
||||
return;
|
||||
}
|
||||
|
||||
auto retTy = allocmem.getAllocatedType();
|
||||
if (!retTy.isa<fir::SequenceType>()) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "--Allocation is not for an array: skipping\n");
|
||||
return;
|
||||
}
|
||||
|
||||
mlir::Value result = op->getResult(0);
|
||||
changed |= after->set(result, AllocationState::Allocated);
|
||||
} else if (mlir::isa<fir::FreeMemOp>(op)) {
|
||||
assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
|
||||
mlir::Value operand = op->getOperand(0);
|
||||
std::optional<AllocationState> operandState = before.get(operand);
|
||||
if (operandState && *operandState == AllocationState::Allocated) {
|
||||
// don't tag things not allocated in this function as freed, so that we
|
||||
// don't think they are candidates for moving to the stack
|
||||
changed |= after->set(operand, AllocationState::Freed);
|
||||
}
|
||||
} else if (mlir::isa<fir::ResultOp>(op)) {
|
||||
mlir::Operation *parent = op->getParentOp();
|
||||
LatticePoint *parentLattice = getLattice(parent);
|
||||
assert(parentLattice);
|
||||
mlir::ChangeResult parentChanged = parentLattice->join(*after);
|
||||
propagateIfChanged(parentLattice, parentChanged);
|
||||
}
|
||||
|
||||
// we pass lattices straight through fir.call because called functions should
|
||||
// not deallocate flang-generated array temporaries
|
||||
|
||||
LLVM_DEBUG(llvm::dbgs() << "--Lattice out: " << *after << "\n");
|
||||
propagateIfChanged(after, changed);
|
||||
}
|
||||
|
||||
void AllocationAnalysis::setToEntryState(LatticePoint *lattice) {
|
||||
propagateIfChanged(lattice, lattice->reset());
|
||||
}
|
||||
|
||||
/// Mostly a copy of AbstractDenseLattice::processOperation - the difference
|
||||
/// being that call operations are passed through to the transfer function
|
||||
void AllocationAnalysis::processOperation(mlir::Operation *op) {
|
||||
// If the containing block is not executable, bail out.
|
||||
if (!getOrCreateFor<mlir::dataflow::Executable>(op, op->getBlock())->isLive())
|
||||
return;
|
||||
|
||||
// Get the dense lattice to update
|
||||
mlir::dataflow::AbstractDenseLattice *after = getLattice(op);
|
||||
|
||||
// If this op implements region control-flow, then control-flow dictates its
|
||||
// transfer function.
|
||||
if (auto branch = mlir::dyn_cast<mlir::RegionBranchOpInterface>(op))
|
||||
return visitRegionBranchOperation(op, branch, after);
|
||||
|
||||
// pass call operations through to the transfer function
|
||||
|
||||
// Get the dense state before the execution of the op.
|
||||
const mlir::dataflow::AbstractDenseLattice *before;
|
||||
if (mlir::Operation *prev = op->getPrevNode())
|
||||
before = getLatticeFor(op, prev);
|
||||
else
|
||||
before = getLatticeFor(op, op->getBlock());
|
||||
|
||||
/// Invoke the operation transfer function
|
||||
visitOperationImpl(op, *before, after);
|
||||
}
|
||||
|
||||
void StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
|
||||
assert(mlir::isa<mlir::func::FuncOp>(func));
|
||||
mlir::DataFlowSolver solver;
|
||||
// constant propagation is required for dead code analysis, dead code analysis
|
||||
// is required to mark blocks live (required for mlir dense dfa)
|
||||
solver.load<mlir::dataflow::SparseConstantPropagation>();
|
||||
solver.load<mlir::dataflow::DeadCodeAnalysis>();
|
||||
|
||||
auto [it, inserted] = funcMaps.try_emplace(func);
|
||||
AllocMemMap &candidateOps = it->second;
|
||||
|
||||
solver.load<AllocationAnalysis>();
|
||||
if (failed(solver.initializeAndRun(func))) {
|
||||
llvm::errs() << "DataFlowSolver failed!";
|
||||
gotError = true;
|
||||
return;
|
||||
}
|
||||
|
||||
LatticePoint point{func};
|
||||
func->walk([&](mlir::func::ReturnOp child) {
|
||||
const LatticePoint *lattice = solver.lookupState<LatticePoint>(child);
|
||||
// there will be no lattice for an unreachable block
|
||||
if (lattice)
|
||||
point.join(*lattice);
|
||||
});
|
||||
llvm::DenseSet<mlir::Value> freedValues;
|
||||
point.appendFreedValues(freedValues);
|
||||
|
||||
// We only replace allocations which are definately freed on all routes
|
||||
// through the function because otherwise the allocation may have an intende
|
||||
// lifetime longer than the current stack frame (e.g. a heap allocation which
|
||||
// is then freed by another function).
|
||||
for (mlir::Value freedValue : freedValues) {
|
||||
fir::AllocMemOp allocmem = freedValue.getDefiningOp<fir::AllocMemOp>();
|
||||
InsertionPoint insertionPoint =
|
||||
AllocMemConversion::findAllocaInsertionPoint(allocmem);
|
||||
if (insertionPoint)
|
||||
candidateOps.insert({allocmem, insertionPoint});
|
||||
}
|
||||
|
||||
LLVM_DEBUG(for (auto [allocMemOp, _]
|
||||
: candidateOps) {
|
||||
llvm::dbgs() << "StackArrays: Found candidate op: " << *allocMemOp << '\n';
|
||||
});
|
||||
}
|
||||
|
||||
bool StackArraysAnalysisWrapper::hasErrors() const { return gotError; }
|
||||
|
||||
const StackArraysAnalysisWrapper::AllocMemMap &
|
||||
StackArraysAnalysisWrapper::getCandidateOps(mlir::Operation *func) {
|
||||
if (!funcMaps.count(func))
|
||||
analyseFunction(func);
|
||||
return funcMaps[func];
|
||||
}
|
||||
|
||||
AllocMemConversion::AllocMemConversion(
|
||||
mlir::MLIRContext *ctx,
|
||||
const llvm::DenseMap<mlir::Operation *, InsertionPoint> &candidateOps)
|
||||
: OpRewritePattern(ctx), candidateOps(candidateOps) {}
|
||||
|
||||
mlir::LogicalResult
|
||||
AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
|
||||
mlir::PatternRewriter &rewriter) const {
|
||||
auto oldInsertionPt = rewriter.saveInsertionPoint();
|
||||
// add alloca operation
|
||||
std::optional<fir::AllocaOp> alloca = insertAlloca(allocmem, rewriter);
|
||||
rewriter.restoreInsertionPoint(oldInsertionPt);
|
||||
if (!alloca)
|
||||
return mlir::failure();
|
||||
|
||||
// remove freemem operations
|
||||
for (mlir::Operation *user : allocmem.getOperation()->getUsers())
|
||||
if (mlir::isa<fir::FreeMemOp>(user))
|
||||
rewriter.eraseOp(user);
|
||||
|
||||
// replace references to heap allocation with references to stack allocation
|
||||
rewriter.replaceAllUsesWith(allocmem.getResult(), alloca->getResult());
|
||||
|
||||
// remove allocmem operation
|
||||
rewriter.eraseOp(allocmem.getOperation());
|
||||
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
// TODO: use mlir::blockIsInLoop once D141401 is merged
|
||||
static bool isInLoop(mlir::Block *block) {
|
||||
mlir::Operation *parent = block->getParentOp();
|
||||
|
||||
// The block could be inside a loop-like operation
|
||||
if (mlir::isa<mlir::LoopLikeOpInterface>(parent) ||
|
||||
parent->getParentOfType<mlir::LoopLikeOpInterface>())
|
||||
return true;
|
||||
|
||||
// This block might be nested inside another block, which is in a loop
|
||||
if (!mlir::isa<mlir::FunctionOpInterface>(parent))
|
||||
if (isInLoop(parent->getBlock()))
|
||||
return true;
|
||||
|
||||
// Or the block could be inside a control flow graph loop:
|
||||
// A block is in a control flow graph loop if it can reach itself in a graph
|
||||
// traversal
|
||||
llvm::DenseSet<mlir::Block *> visited;
|
||||
llvm::SmallVector<mlir::Block *> stack;
|
||||
stack.push_back(block);
|
||||
while (!stack.empty()) {
|
||||
mlir::Block *current = stack.pop_back_val();
|
||||
auto [it, inserted] = visited.insert(current);
|
||||
if (!inserted) {
|
||||
// loop detected
|
||||
if (current == block)
|
||||
return true;
|
||||
continue;
|
||||
}
|
||||
|
||||
stack.reserve(stack.size() + current->getNumSuccessors());
|
||||
for (mlir::Block *successor : current->getSuccessors())
|
||||
stack.push_back(successor);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isInLoop(mlir::Operation *op) {
|
||||
return isInLoop(op->getBlock()) ||
|
||||
op->getParentOfType<mlir::LoopLikeOpInterface>();
|
||||
}
|
||||
|
||||
InsertionPoint
|
||||
AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) {
|
||||
// Ideally the alloca should be inserted at the end of the function entry
|
||||
// block so that we do not allocate stack space in a loop. However,
|
||||
// the operands to the alloca may not be available that early, so insert it
|
||||
// after the last operand becomes available
|
||||
// If the old allocmem op was in an openmp region then it should not be moved
|
||||
// outside of that
|
||||
LLVM_DEBUG(llvm::dbgs() << "StackArrays: findAllocaInsertionPoint: "
|
||||
<< oldAlloc << "\n");
|
||||
|
||||
// check that an Operation or Block we are about to return is not in a loop
|
||||
auto checkReturn = [&](auto *point) -> InsertionPoint {
|
||||
if (isInLoop(point)) {
|
||||
mlir::Operation *oldAllocOp = oldAlloc.getOperation();
|
||||
if (isInLoop(oldAllocOp)) {
|
||||
// where we want to put it is in a loop, and even the old location is in
|
||||
// a loop. Give up.
|
||||
return findAllocaLoopInsertionPoint(oldAlloc);
|
||||
}
|
||||
return {oldAllocOp};
|
||||
}
|
||||
return {point};
|
||||
};
|
||||
|
||||
auto oldOmpRegion =
|
||||
oldAlloc->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
|
||||
|
||||
// Find when the last operand value becomes available
|
||||
mlir::Block *operandsBlock = nullptr;
|
||||
mlir::Operation *lastOperand = nullptr;
|
||||
for (mlir::Value operand : oldAlloc.getOperands()) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "--considering operand " << operand << "\n");
|
||||
mlir::Operation *op = operand.getDefiningOp();
|
||||
if (!op)
|
||||
return checkReturn(oldAlloc.getOperation());
|
||||
if (!operandsBlock)
|
||||
operandsBlock = op->getBlock();
|
||||
else if (operandsBlock != op->getBlock()) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "----operand declared in a different block!\n");
|
||||
// Operation::isBeforeInBlock requires the operations to be in the same
|
||||
// block. The best we can do is the location of the allocmem.
|
||||
return checkReturn(oldAlloc.getOperation());
|
||||
}
|
||||
if (!lastOperand || lastOperand->isBeforeInBlock(op))
|
||||
lastOperand = op;
|
||||
}
|
||||
|
||||
if (lastOperand) {
|
||||
// there were value operands to the allocmem so insert after the last one
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "--Placing after last operand: " << *lastOperand << "\n");
|
||||
// check we aren't moving out of an omp region
|
||||
auto lastOpOmpRegion =
|
||||
lastOperand->getParentOfType<mlir::omp::OutlineableOpenMPOpInterface>();
|
||||
if (lastOpOmpRegion == oldOmpRegion)
|
||||
return checkReturn(lastOperand);
|
||||
// Presumably this happened because the operands became ready before the
|
||||
// start of this openmp region. (lastOpOmpRegion != oldOmpRegion) should
|
||||
// imply that oldOmpRegion comes after lastOpOmpRegion.
|
||||
return checkReturn(oldOmpRegion.getAllocaBlock());
|
||||
}
|
||||
|
||||
// There were no value operands to the allocmem so we are safe to insert it
|
||||
// as early as we want
|
||||
|
||||
// handle openmp case
|
||||
if (oldOmpRegion)
|
||||
return checkReturn(oldOmpRegion.getAllocaBlock());
|
||||
|
||||
// fall back to the function entry block
|
||||
mlir::func::FuncOp func = oldAlloc->getParentOfType<mlir::func::FuncOp>();
|
||||
assert(func && "This analysis is run on func.func");
|
||||
mlir::Block &entryBlock = func.getBlocks().front();
|
||||
LLVM_DEBUG(llvm::dbgs() << "--Placing at the start of func entry block\n");
|
||||
return checkReturn(&entryBlock);
|
||||
}
|
||||
|
||||
InsertionPoint
|
||||
AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
|
||||
mlir::Operation *oldAllocOp = oldAlloc;
|
||||
// This is only called as a last resort. We should try to insert at the
|
||||
// location of the old allocation, which is inside of a loop, using
|
||||
// llvm.stacksave/llvm.stackrestore
|
||||
|
||||
// find freemem ops
|
||||
llvm::SmallVector<mlir::Operation *, 1> freeOps;
|
||||
for (mlir::Operation *user : oldAllocOp->getUsers())
|
||||
if (mlir::isa<fir::FreeMemOp>(user))
|
||||
freeOps.push_back(user);
|
||||
assert(freeOps.size() && "DFA should only return freed memory");
|
||||
|
||||
// Don't attempt to reason about a stacksave/stackrestore between different
|
||||
// blocks
|
||||
for (mlir::Operation *free : freeOps)
|
||||
if (free->getBlock() != oldAllocOp->getBlock())
|
||||
return {nullptr};
|
||||
|
||||
// Check that there aren't any other stack allocations in between the
|
||||
// stack save and stack restore
|
||||
// note: for flang generated temporaries there should only be one free op
|
||||
for (mlir::Operation *free : freeOps) {
|
||||
for (mlir::Operation *op = oldAlloc; op && op != free;
|
||||
op = op->getNextNode()) {
|
||||
if (mlir::isa<fir::AllocaOp>(op))
|
||||
return {nullptr};
|
||||
}
|
||||
}
|
||||
|
||||
return InsertionPoint{oldAllocOp, /*shouldStackSaveRestore=*/true};
|
||||
}
|
||||
|
||||
std::optional<fir::AllocaOp>
|
||||
AllocMemConversion::insertAlloca(fir::AllocMemOp &oldAlloc,
|
||||
mlir::PatternRewriter &rewriter) const {
|
||||
auto it = candidateOps.find(oldAlloc.getOperation());
|
||||
if (it == candidateOps.end())
|
||||
return {};
|
||||
InsertionPoint insertionPoint = it->second;
|
||||
if (!insertionPoint)
|
||||
return {};
|
||||
|
||||
if (insertionPoint.shouldSaveRestoreStack())
|
||||
insertStackSaveRestore(oldAlloc, rewriter);
|
||||
|
||||
mlir::Location loc = oldAlloc.getLoc();
|
||||
mlir::Type varTy = oldAlloc.getInType();
|
||||
if (mlir::Operation *op = insertionPoint.tryGetOperation()) {
|
||||
rewriter.setInsertionPointAfter(op);
|
||||
} else {
|
||||
mlir::Block *block = insertionPoint.tryGetBlock();
|
||||
assert(block && "There must be a valid insertion point");
|
||||
rewriter.setInsertionPointToStart(block);
|
||||
}
|
||||
|
||||
auto unpackName = [](std::optional<llvm::StringRef> opt) -> llvm::StringRef {
|
||||
if (opt)
|
||||
return *opt;
|
||||
return {};
|
||||
};
|
||||
|
||||
llvm::StringRef uniqName = unpackName(oldAlloc.getUniqName());
|
||||
llvm::StringRef bindcName = unpackName(oldAlloc.getBindcName());
|
||||
return rewriter.create<fir::AllocaOp>(loc, varTy, uniqName, bindcName,
|
||||
oldAlloc.getTypeparams(),
|
||||
oldAlloc.getShape());
|
||||
}
|
||||
|
||||
void AllocMemConversion::insertStackSaveRestore(
|
||||
fir::AllocMemOp &oldAlloc, mlir::PatternRewriter &rewriter) const {
|
||||
auto oldPoint = rewriter.saveInsertionPoint();
|
||||
auto mod = oldAlloc->getParentOfType<mlir::ModuleOp>();
|
||||
fir::KindMapping kindMap = fir::getKindMapping(mod);
|
||||
fir::FirOpBuilder builder{rewriter, kindMap};
|
||||
|
||||
mlir::func::FuncOp stackSaveFn = fir::factory::getLlvmStackSave(builder);
|
||||
mlir::SymbolRefAttr stackSaveSym =
|
||||
builder.getSymbolRefAttr(stackSaveFn.getName());
|
||||
|
||||
builder.setInsertionPoint(oldAlloc);
|
||||
mlir::Value sp =
|
||||
builder
|
||||
.create<fir::CallOp>(oldAlloc.getLoc(),
|
||||
stackSaveFn.getFunctionType().getResults(),
|
||||
stackSaveSym, mlir::ValueRange{})
|
||||
.getResult(0);
|
||||
|
||||
mlir::func::FuncOp stackRestoreFn =
|
||||
fir::factory::getLlvmStackRestore(builder);
|
||||
mlir::SymbolRefAttr stackRestoreSym =
|
||||
builder.getSymbolRefAttr(stackRestoreFn.getName());
|
||||
|
||||
for (mlir::Operation *user : oldAlloc->getUsers()) {
|
||||
if (mlir::isa<fir::FreeMemOp>(user)) {
|
||||
builder.setInsertionPoint(user);
|
||||
builder.create<fir::CallOp>(user->getLoc(),
|
||||
stackRestoreFn.getFunctionType().getResults(),
|
||||
stackRestoreSym, mlir::ValueRange{sp});
|
||||
}
|
||||
}
|
||||
|
||||
rewriter.restoreInsertionPoint(oldPoint);
|
||||
}
|
||||
|
||||
StackArraysPass::StackArraysPass(const StackArraysPass &pass)
|
||||
: fir::impl::StackArraysBase<StackArraysPass>(pass) {}
|
||||
|
||||
llvm::StringRef StackArraysPass::getDescription() const {
|
||||
return "Move heap allocated array temporaries to the stack";
|
||||
}
|
||||
|
||||
void StackArraysPass::runOnOperation() {
|
||||
mlir::ModuleOp mod = getOperation();
|
||||
|
||||
mod.walk([this](mlir::func::FuncOp func) { runOnFunc(func); });
|
||||
}
|
||||
|
||||
void StackArraysPass::runOnFunc(mlir::Operation *func) {
|
||||
assert(mlir::isa<mlir::func::FuncOp>(func));
|
||||
|
||||
auto &analysis = getAnalysis<StackArraysAnalysisWrapper>();
|
||||
const auto &candidateOps = analysis.getCandidateOps(func);
|
||||
if (analysis.hasErrors()) {
|
||||
signalPassFailure();
|
||||
return;
|
||||
}
|
||||
|
||||
if (candidateOps.empty())
|
||||
return;
|
||||
runCount += candidateOps.size();
|
||||
|
||||
mlir::MLIRContext &context = getContext();
|
||||
mlir::RewritePatternSet patterns(&context);
|
||||
mlir::ConversionTarget target(context);
|
||||
|
||||
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
|
||||
mlir::func::FuncDialect>();
|
||||
target.addDynamicallyLegalOp<fir::AllocMemOp>([&](fir::AllocMemOp alloc) {
|
||||
return !candidateOps.count(alloc.getOperation());
|
||||
});
|
||||
|
||||
patterns.insert<AllocMemConversion>(&context, candidateOps);
|
||||
if (mlir::failed(
|
||||
mlir::applyPartialConversion(func, target, std::move(patterns)))) {
|
||||
mlir::emitError(func->getLoc(), "error in stack arrays optimization\n");
|
||||
signalPassFailure();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<mlir::Pass> fir::createStackArraysPass() {
|
||||
return std::make_unique<StackArraysPass>();
|
||||
}
|
||||
@@ -20,7 +20,7 @@ subroutine allocation(x)
|
||||
! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
|
||||
! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
|
||||
! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
|
||||
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {uniq_name = "_QFallocationEx.alloc"}
|
||||
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFallocationEx.alloc"}
|
||||
! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
|
||||
! CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) typeparams %[[VAL_2]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
|
||||
! CHECK: fir.store %[[VAL_17]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
|
||||
@@ -84,7 +84,7 @@ subroutine alloc_comp(x)
|
||||
! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
|
||||
! CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : index
|
||||
! CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_8]], %[[VAL_9]] : index
|
||||
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {uniq_name = "_QEa.alloc"}
|
||||
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {fir.must_be_heap = true, uniq_name = "_QEa.alloc"}
|
||||
! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
|
||||
! CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
|
||||
! CHECK: fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
|
||||
|
||||
@@ -177,7 +177,7 @@ end
|
||||
! CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ptr<i32>
|
||||
! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
|
||||
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "ptr", uniq_name = "_QFc_loc_non_save_pointer_scalarEptr"}
|
||||
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
|
||||
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
|
||||
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<i32>) -> !fir.ptr<i32>
|
||||
! CHECK: fir.store %[[VAL_5]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
|
||||
! CHECK: %[[VAL_6:.*]] = arith.constant 10 : i32
|
||||
|
||||
@@ -43,7 +43,7 @@ subroutine ss(count)
|
||||
! CHECK: %[[V_6:[0-9]+]] = fir.alloca i64 {bindc_name = "count_rate_", fir.target, uniq_name = "_QFssEcount_rate_"}
|
||||
! CHECK: %[[V_7:[0-9]+]] = fir.convert %[[V_6]] : (!fir.ref<i64>) -> !fir.ptr<i64>
|
||||
! CHECK: fir.store %[[V_7]] to %[[V_4]] : !fir.ref<!fir.ptr<i64>>
|
||||
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {uniq_name = "_QFssEcount_max.alloc"}
|
||||
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {fir.must_be_heap = true, uniq_name = "_QFssEcount_max.alloc"}
|
||||
! CHECK: fir.store %[[V_8]] to %[[V_1]] : !fir.ref<!fir.heap<i64>>
|
||||
! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_4]] : !fir.ref<!fir.ptr<i64>>
|
||||
! CHECK: %[[V_10:[0-9]+]] = fir.load %[[V_1]] : !fir.ref<!fir.heap<i64>>
|
||||
|
||||
140
flang/test/Transforms/stack-arrays.f90
Normal file
140
flang/test/Transforms/stack-arrays.f90
Normal file
@@ -0,0 +1,140 @@
|
||||
! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s
|
||||
|
||||
! check simple array value copy case
|
||||
subroutine array_value_copy_simple(arr)
|
||||
integer, intent(inout) :: arr(4)
|
||||
arr(3:4) = arr(1:2)
|
||||
end subroutine
|
||||
! CHECK-LABEL: func.func @_QParray_value_copy_simple(%arg0: !fir.ref<!fir.array<4xi32>>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: fir.alloca !fir.array<4xi32>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
! check complex array value copy case
|
||||
module stuff
|
||||
type DerivedWithAllocatable
|
||||
integer, dimension(:), allocatable :: dat
|
||||
end type
|
||||
|
||||
contains
|
||||
subroutine array_value_copy_complex(arr)
|
||||
type(DerivedWithAllocatable), intent(inout) :: arr(:)
|
||||
arr(3:4) = arr(1:2)
|
||||
end subroutine
|
||||
end module
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: array_value_copy_complex
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: fir.alloca !fir.array<?x!fir.type<_QMstuffTderivedwithallocatable
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine parameter_array_init
|
||||
integer, parameter :: p(100) = 42
|
||||
call use_p(p)
|
||||
end subroutine
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: parameter_array_init
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: fir.alloca !fir.array<100xi32>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine test_vector_subscripted_section_to_box(v, x)
|
||||
interface
|
||||
subroutine takes_box(y)
|
||||
real :: y(:)
|
||||
end subroutine
|
||||
end interface
|
||||
|
||||
integer :: v(:)
|
||||
real :: x(:)
|
||||
call takes_box(x(v))
|
||||
end subroutine
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: test_vector_subscripted_section_to_box
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK: fir.alloca !fir.array<?xf32>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK: fir.call @_QPtakes_box
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine call_parenthesized_arg(x)
|
||||
integer :: x(100)
|
||||
call bar((x))
|
||||
end subroutine
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: call_parenthesized_arg
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK: fir.alloca !fir.array<100xi32>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK: fir.call @_QPbar
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine where_allocatable_assignments(a, b)
|
||||
integer :: a(:)
|
||||
integer, allocatable :: b(:)
|
||||
where(b > 0)
|
||||
b = a
|
||||
elsewhere
|
||||
b(:) = 0
|
||||
end where
|
||||
end subroutine
|
||||
! TODO: broken: passing allocation through fir.result
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: where_allocatable_assignments
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine array_constructor(a, b)
|
||||
real :: a(5), b
|
||||
real, external :: f
|
||||
a = [f(b), f(b+1), f(b+2), f(b+5), f(b+11)]
|
||||
end subroutine
|
||||
! TODO: broken: realloc
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: array_constructor
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine sequence(seq, n)
|
||||
integer :: n, seq(n)
|
||||
seq = [(i,i=1,n)]
|
||||
end subroutine
|
||||
! TODO: broken: realloc
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: sequence
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
|
||||
subroutine CFGLoop(x)
|
||||
integer, parameter :: k = 100, m=1000000, n = k*m
|
||||
integer :: x(n)
|
||||
logical :: has_error
|
||||
|
||||
do i=0,m-1
|
||||
x(k*i+1:k*(i+1)) = x(k*(i+1):k*i+1:-1)
|
||||
if (has_error(x, k)) stop
|
||||
end do
|
||||
end subroutine
|
||||
! CHECK: func.func
|
||||
! CHECK-SAME: cfgloop
|
||||
! CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<100000000xi32>
|
||||
! CHECK-NOT: fir.allocmem
|
||||
! CHECK-NOT: fir.freemem
|
||||
! CHECK: return
|
||||
! CHECK-NEXT: }
|
||||
309
flang/test/Transforms/stack-arrays.fir
Normal file
309
flang/test/Transforms/stack-arrays.fir
Normal file
@@ -0,0 +1,309 @@
|
||||
// RUN: fir-opt --stack-arrays %s | FileCheck %s
|
||||
|
||||
// Simplest transformation
|
||||
func.func @simple() {
|
||||
%0 = fir.allocmem !fir.array<42xi32>
|
||||
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @simple() {
|
||||
// CHECK-NEXT: fir.alloca !fir.array<42xi32>
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check fir.must_be_heap allocations are not moved
|
||||
func.func @must_be_heap() {
|
||||
%0 = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
|
||||
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @must_be_heap() {
|
||||
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
|
||||
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check the data-flow-analysis can detect cases where we aren't sure if memory
|
||||
// is freed by the end of the function
|
||||
func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
|
||||
%7 = arith.constant 42 : index
|
||||
%8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
|
||||
%9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
|
||||
%10 = fir.convert %9 : (!fir.logical<4>) -> i1
|
||||
fir.if %10 {
|
||||
fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
|
||||
} else {
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
|
||||
// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
|
||||
// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
|
||||
// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
|
||||
// CHECK-NEXT: fir.if %[[BOOL]] {
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
|
||||
// CHECK-NEXT: } else {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check scf.if (fir.if is not considered a branch operation)
|
||||
func.func @dfa2(%arg0: i1) {
|
||||
%a = fir.allocmem !fir.array<1xi8>
|
||||
scf.if %arg0 {
|
||||
fir.freemem %a : !fir.heap<!fir.array<1xi8>>
|
||||
} else {
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @dfa2(%arg0: i1) {
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<1xi8>
|
||||
// CHECK-NEXT: scf.if %arg0 {
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<1xi8>>
|
||||
// CHECK-NEXT: } else {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check the alloca is placed after all operands become available
|
||||
func.func @placement1() {
|
||||
// do some stuff with other ssa values
|
||||
%1 = arith.constant 1 : index
|
||||
%2 = arith.constant 2 : index
|
||||
%3 = arith.addi %1, %2 : index
|
||||
// operand is now available
|
||||
%4 = fir.allocmem !fir.array<?xi32>, %3
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @placement1() {
|
||||
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[ARG:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[ARG]]
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check that if there are no operands, then the alloca is placed early
|
||||
func.func @placement2() {
|
||||
// do some stuff with other ssa values
|
||||
%1 = arith.constant 1 : index
|
||||
%2 = arith.constant 2 : index
|
||||
%3 = arith.addi %1, %2 : index
|
||||
%4 = fir.allocmem !fir.array<42xi32>
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @placement2() {
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
|
||||
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check that stack allocations which must be placed in loops use stacksave
|
||||
func.func @placement3() {
|
||||
%c1 = arith.constant 1 : index
|
||||
%c1_i32 = fir.convert %c1 : (index) -> i32
|
||||
%c2 = arith.constant 2 : index
|
||||
%c10 = arith.constant 10 : index
|
||||
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
|
||||
%3 = arith.addi %c1, %c2 : index
|
||||
// operand is now available
|
||||
%4 = fir.allocmem !fir.array<?xi32>, %3
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
fir.result %3, %c1_i32 : index, i32
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @placement3() {
|
||||
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
|
||||
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
|
||||
// CHECK-NEXT: fir.do_loop
|
||||
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
|
||||
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
|
||||
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]])
|
||||
// CHECK-NEXT: fir.result
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check that stack save/restore are used in CFG loops
|
||||
func.func @placement4(%arg0 : i1) {
|
||||
%c1 = arith.constant 1 : index
|
||||
%c1_i32 = fir.convert %c1 : (index) -> i32
|
||||
%c2 = arith.constant 2 : index
|
||||
%c10 = arith.constant 10 : index
|
||||
cf.br ^bb1
|
||||
^bb1:
|
||||
%3 = arith.addi %c1, %c2 : index
|
||||
// operand is now available
|
||||
%4 = fir.allocmem !fir.array<?xi32>, %3
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
cf.cond_br %arg0, ^bb1, ^bb2
|
||||
^bb2:
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @placement4(%arg0: i1) {
|
||||
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
|
||||
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
|
||||
// CHECK-NEXT: cf.br ^bb1
|
||||
// CHECK-NEXT: ^bb1:
|
||||
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
|
||||
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
|
||||
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]]) : (!fir.ref<i8>) -> ()
|
||||
// CHECK-NEXT: cf.cond_br %arg0, ^bb1, ^bb2
|
||||
// CHECK-NEXT: ^bb2:
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check that stacksave is not used when there is an intervening alloca
|
||||
func.func @placement5() {
|
||||
%c1 = arith.constant 1 : index
|
||||
%c1_i32 = fir.convert %c1 : (index) -> i32
|
||||
%c2 = arith.constant 2 : index
|
||||
%c10 = arith.constant 10 : index
|
||||
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
|
||||
%3 = arith.addi %c1, %c2 : index
|
||||
// operand is now available
|
||||
%4 = fir.allocmem !fir.array<?xi32>, %3
|
||||
%5 = fir.alloca i32
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
fir.result %3, %c1_i32 : index, i32
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @placement5() {
|
||||
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
|
||||
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
|
||||
// CHECK-NEXT: fir.do_loop
|
||||
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SUM]]
|
||||
// CHECK-NEXT: %[[IDX:.*]] = fir.alloca i32
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
|
||||
// CHECK-NEXT: fir.result
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// check that stack save/restore are not used when the memalloc and freemem are
|
||||
// in different blocks
|
||||
func.func @placement6(%arg0: i1) {
|
||||
%c1 = arith.constant 1 : index
|
||||
%c1_i32 = fir.convert %c1 : (index) -> i32
|
||||
%c2 = arith.constant 2 : index
|
||||
%c10 = arith.constant 10 : index
|
||||
cf.br ^bb1
|
||||
^bb1:
|
||||
%3 = arith.addi %c1, %c2 : index
|
||||
// operand is now available
|
||||
%4 = fir.allocmem !fir.array<?xi32>, %3
|
||||
// ...
|
||||
cf.cond_br %arg0, ^bb2, ^bb3
|
||||
^bb2:
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
cf.br ^bb1
|
||||
^bb3:
|
||||
// ...
|
||||
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
|
||||
cf.br ^bb1
|
||||
}
|
||||
// CHECK: func.func @placement6(%arg0: i1) {
|
||||
// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index
|
||||
// CHECK-NEXT: %[[c1_i32:.*]] = fir.convert %[[c1]] : (index) -> i32
|
||||
// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index
|
||||
// CHECK-NEXT: %[[c10:.*]] = arith.constant 10 : index
|
||||
// CHECK-NEXT: cf.br ^bb1
|
||||
// CHECK-NEXT: ^bb1:
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[c1]], %[[c2]] : index
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[ADD]]
|
||||
// CHECK-NEXT: cf.cond_br %arg0, ^bb2, ^bb3
|
||||
// CHECK-NEXT: ^bb2:
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
|
||||
// CHECK-NEXT: cf.br ^bb1
|
||||
// CHECK-NEXT: ^bb3:
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
|
||||
// CHECK-NEXT: cf.br ^bb1
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check multiple returns, where the memory is always freed
|
||||
func.func @returns(%arg0: i1) {
|
||||
%0 = fir.allocmem !fir.array<42xi32>
|
||||
cf.cond_br %arg0, ^bb1, ^bb2
|
||||
^bb1:
|
||||
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
^bb2:
|
||||
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @returns(%[[COND:.*]]: i1) {
|
||||
// CHECK-NEXT: %[[ALLOC:.*]] = fir.alloca !fir.array<42xi32>
|
||||
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
|
||||
// CHECK-NEXT: ^bb1:
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: ^bb2:
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check multiple returns, where the memory is not freed on one branch
|
||||
func.func @returns2(%arg0: i1) {
|
||||
%0 = fir.allocmem !fir.array<42xi32>
|
||||
cf.cond_br %arg0, ^bb1, ^bb2
|
||||
^bb1:
|
||||
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
|
||||
return
|
||||
^bb2:
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @returns2(%[[COND:.*]]: i1) {
|
||||
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32>
|
||||
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
|
||||
// CHECK-NEXT: ^bb1:
|
||||
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: ^bb2:
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// Check allocations are not moved outside of an omp region
|
||||
func.func @omp_placement1() {
|
||||
omp.sections {
|
||||
omp.section {
|
||||
%mem = fir.allocmem !fir.array<42xi32>
|
||||
fir.freemem %mem : !fir.heap<!fir.array<42xi32>>
|
||||
omp.terminator
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK: func.func @omp_placement1() {
|
||||
// CHECK-NEXT: omp.sections {
|
||||
// CHECK-NEXT: omp.section {
|
||||
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
|
||||
// TODO: this allocation should be moved to the stack. Unfortunately, the data
|
||||
// flow analysis fails to propogate the lattice out of the omp region to the
|
||||
// return satement.
|
||||
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
|
||||
// CHECK-NEXT: omp.terminator
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: omp.terminator
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
Reference in New Issue
Block a user