diff --git a/mlir/include/mlir/Analysis/MLFunctionMatcher.h b/mlir/include/mlir/Analysis/MLFunctionMatcher.h index 0c6917a07496..5de1f6d729b2 100644 --- a/mlir/include/mlir/Analysis/MLFunctionMatcher.h +++ b/mlir/include/mlir/Analysis/MLFunctionMatcher.h @@ -63,6 +63,8 @@ struct MLFunctionMatches { iterator begin(); iterator end(); + EntryType &front(); + EntryType &back(); unsigned size() { return end() - begin(); } unsigned empty() { return size() == 0; } diff --git a/mlir/include/mlir/Analysis/VectorAnalysis.h b/mlir/include/mlir/Analysis/VectorAnalysis.h index c7f9e8a6155c..001bb4f99348 100644 --- a/mlir/include/mlir/Analysis/VectorAnalysis.h +++ b/mlir/include/mlir/Analysis/VectorAnalysis.h @@ -24,10 +24,17 @@ namespace mlir { +class AffineApplyOp; class AffineMap; class ForInst; +class FuncBuilder; +class Instruction; +class Location; class MemRefType; class OperationInst; +template class OperandIterator; +template class OpPointer; +class Value; class VectorType; /// Computes and returns the multi-dimensional ratio of `superShape` to @@ -121,6 +128,23 @@ AffineMap makePermutationMap(OperationInst *opInst, const llvm::DenseMap &loopToVectorDim); +/// Creates an AffineApplyOp that is normalized for super-vectorization. That is +/// an AffineApplyOp with a single result and an unbounded AffineMap. The +/// operands of the AffineApplyOp are either dims, symbols or constants but can +/// never be obtained from other AffineApplyOp. +/// This is achieved by performing a composition at the single-result AffineMap +/// level. +/// +/// Prerequisite: +/// 1. `map` is a single result, unbounded, AffineMap; +/// 2. `operands` can involve at most a length-1 chain of AffineApplyOp. The +/// affine map for each of these AffineApplyOp is itself single result and +/// unbounded. Essentially, all ancestor AffineApplyOp must have been +/// constructed as single-result, unbounded, AffineMaps. +OpPointer makeNormalizedAffineApply(FuncBuilder *b, Location loc, + AffineMap map, + ArrayRef operands); + namespace matcher { /// Matches vector_transfer_read, vector_transfer_write and ops that return a diff --git a/mlir/include/mlir/IR/AffineExpr.h b/mlir/include/mlir/IR/AffineExpr.h index fd218ebf6292..8723e1954512 100644 --- a/mlir/include/mlir/IR/AffineExpr.h +++ b/mlir/include/mlir/IR/AffineExpr.h @@ -24,7 +24,7 @@ #define MLIR_IR_AFFINE_EXPR_H #include "mlir/Support/LLVM.h" -#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/Casting.h" #include @@ -202,22 +202,6 @@ AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context); AffineExpr getAffineBinaryExpr(AffineExprKind kind, AffineExpr lhs, AffineExpr rhs); -/// This auxiliary free function allows conveniently capturing the LHS, RHS and -/// AffineExprBinaryOp in an AffineBinaryOpExpr. -/// In particular it is used to elegantly write compositions as such: -/// ```c++ -/// AffineMap g = /* Some affine map */; -/// if (auto binExpr = e.template dyn_cast()) { -/// AffineExpr lhs, rhs; -/// AffineExprBinaryOp binOp; -/// std::tie(lhs, rhs, binOp) = matchBinaryOpExpr(binExpr); -/// return binOp(compose(lhs, g), compose(rhs, g)); -/// } -/// ``` -using AffineExprBinaryOp = std::function; -std::tuple -matchBinaryOpExpr(AffineBinaryOpExpr e); - raw_ostream &operator<<(raw_ostream &os, AffineExpr &expr); template bool AffineExpr::isa() const { diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index 9275cc26b5f2..b1c4e154016d 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -367,10 +367,9 @@ AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims, /// `exprs.size()`. static AffineExpr substExprs(AffineExpr e, llvm::ArrayRef exprs) { if (auto binExpr = e.dyn_cast()) { - AffineExpr lhs, rhs; - AffineExprBinaryOp binOp; - std::tie(lhs, rhs, binOp) = matchBinaryOpExpr(binExpr); - return binOp(substExprs(lhs, exprs), substExprs(rhs, exprs)); + return getAffineBinaryExpr(binExpr.getKind(), + substExprs(binExpr.getLHS(), exprs), + substExprs(binExpr.getRHS(), exprs)); } if (auto dim = e.dyn_cast()) { assert(dim.getPosition() < exprs.size() && diff --git a/mlir/lib/Analysis/MLFunctionMatcher.cpp b/mlir/lib/Analysis/MLFunctionMatcher.cpp index 5bb4548e6705..f2bbcd2a5661 100644 --- a/mlir/lib/Analysis/MLFunctionMatcher.cpp +++ b/mlir/lib/Analysis/MLFunctionMatcher.cpp @@ -79,7 +79,14 @@ MLFunctionMatches::iterator MLFunctionMatches::begin() { MLFunctionMatches::iterator MLFunctionMatches::end() { return storage ? storage->matches.end() : nullptr; } - +MLFunctionMatches::EntryType &MLFunctionMatches::front() { + assert(storage && "null storage"); + return *storage->matches.begin(); +} +MLFunctionMatches::EntryType &MLFunctionMatches::back() { + assert(storage && "null storage"); + return *(storage->matches.begin() + size() - 1); +} /// Return the combination of multiple MLFunctionMatches as a new object. static MLFunctionMatches combine(ArrayRef matches) { MLFunctionMatches res; diff --git a/mlir/lib/Analysis/VectorAnalysis.cpp b/mlir/lib/Analysis/VectorAnalysis.cpp index f00ab4c4c93d..b496f4a9f7f5 100644 --- a/mlir/lib/Analysis/VectorAnalysis.cpp +++ b/mlir/lib/Analysis/VectorAnalysis.cpp @@ -16,6 +16,7 @@ // ============================================================================= #include "mlir/Analysis/VectorAnalysis.h" +#include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" @@ -27,6 +28,8 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" /// /// Implements Analysis functions specific to vectors which support @@ -35,6 +38,9 @@ using namespace mlir; +#define DEBUG_TYPE "vector-analysis" + +using llvm::dbgs; using llvm::SetVector; Optional> mlir::shapeRatio(ArrayRef superShape, @@ -237,3 +243,200 @@ bool mlir::matcher::operatesOnSuperVectors(const OperationInst &opInst, return true; } + +namespace { + +/// A `SingleResultAffineNormalizer` is a helper class that is not visible to +/// the user and supports renumbering operands of single-result AffineApplyOp. +/// This operates on the assumption that only single-result unbounded AffineMap +/// are used for all operands. +/// This acts as a reindexing map of Value* to positional dims or symbols and +/// allows simplifications such as: +/// +/// ```mlir +/// %1 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0) +/// ``` +/// +/// into: +/// +/// ```mlir +/// %1 = affine_apply () -> (0) +/// ``` +struct SingleResultAffineNormalizer { + SingleResultAffineNormalizer(AffineMap map, ArrayRef operands); + + /// Returns the single result, unbounded, AffineMap resulting from + /// normalization. + AffineMap getAffineMap() { + return AffineMap::get(reorderedDims.size(), reorderedSymbols.size(), {expr}, + {}); + } + + SmallVector getOperands() { + SmallVector res(reorderedDims); + res.append(reorderedSymbols.begin(), reorderedSymbols.end()); + return res; + } + +private: + /// Helper function to insert `v` into the coordinate system of the current + /// SingleResultAffineNormalizer (i.e. in the proper `xxxValueToPosition` and + /// the proper `reorderedXXX`). + /// Returns the AffineDimExpr or AffineSymbolExpr with the correponding + /// renumbered position. + template DimOrSymbol renumberOneIndex(Value *v); + + /// Given an `other` normalizer, this rewrites `other.expr` in the coordinate + /// system of the current SingleResultAffineNormalizer. + /// Returns the rewritten AffineExpr. + AffineExpr renumber(const SingleResultAffineNormalizer &other); + + /// Given an `app` with single result and unbounded AffineMap, this rewrites + /// the app's map single result AffineExpr in the coordinate system of the + /// current SingleResultAffineNormalizer. + /// Returns the rewritten AffineExpr. + AffineExpr renumber(AffineApplyOp *app); + + /// Maps of Value* to position in the `expr`. + DenseMap dimValueToPosition; + DenseMap symValueToPosition; + + /// Ordered dims and symbols matching positional dims and symbols in `expr`. + SmallVector reorderedDims; + SmallVector reorderedSymbols; + + AffineExpr expr; +}; + +} // namespace + +template +static DimOrSymbol make(unsigned position, MLIRContext *context); + +template <> AffineDimExpr make(unsigned position, MLIRContext *context) { + return getAffineDimExpr(position, context).cast(); +} + +template <> AffineSymbolExpr make(unsigned position, MLIRContext *context) { + return getAffineSymbolExpr(position, context).cast(); +} + +template +DimOrSymbol SingleResultAffineNormalizer::renumberOneIndex(Value *v) { + static_assert(std::is_same::value || + std::is_same::value, + "renumber(...) or renumber(...) " + "required"); + DenseMap &pos = + std::is_same::value ? symValueToPosition + : dimValueToPosition; + DenseMap::iterator iterPos; + bool inserted = false; + std::tie(iterPos, inserted) = pos.insert(std::make_pair(v, pos.size())); + if (inserted) { + std::is_same::value + ? reorderedDims.push_back(v) + : reorderedSymbols.push_back(v); + } + return make(iterPos->second, v->getFunction()->getContext()); +} + +AffineExpr SingleResultAffineNormalizer::renumber( + const SingleResultAffineNormalizer &other) { + SmallVector dimRemapping, symRemapping; + for (auto kvp : other.dimValueToPosition) { + if (dimRemapping.size() <= kvp.second) + dimRemapping.resize(kvp.second + 1); + dimRemapping[kvp.second] = renumberOneIndex(kvp.first); + } + for (auto kvp : other.symValueToPosition) { + if (symRemapping.size() <= kvp.second) + symRemapping.resize(kvp.second + 1); + symRemapping[kvp.second] = renumberOneIndex(kvp.first); + } + return other.expr.replaceDimsAndSymbols(dimRemapping, symRemapping); +} + +AffineExpr SingleResultAffineNormalizer::renumber(AffineApplyOp *app) { + // Sanity check, single result AffineApplyOp if one wants to use this. + assert(app->getNumResults() == 1 && "Not a single result AffineApplyOp"); + assert(app->getAffineMap().getRangeSizes().empty() && + "Non-empty range sizes"); + + // Create the SingleResultAffineNormalizer for the operands of this + // AffineApplyOp and combine it with the current SingleResultAffineNormalizer. + using ValueTy = decltype(*(app->getOperands().begin())); + SingleResultAffineNormalizer normalizer( + app->getAffineMap(), + functional::map([](ValueTy v) { return static_cast(v); }, + app->getOperands())); + + // We know this is a single result AffineMap, we need to append a + // renumbered AffineExpr. + return renumber(normalizer); +} + +SingleResultAffineNormalizer::SingleResultAffineNormalizer( + AffineMap map, ArrayRef operands) { + assert(map.getNumResults() == 1 && "Single-result map expected"); + assert(map.getRangeSizes().empty() && "Unbounded map expected"); + assert(map.getNumInputs() == operands.size() && + "number of operands does not match the number of map inputs"); + + if (operands.empty()) { + return; + } + + auto *context = operands[0]->getFunction()->getContext(); + SmallVector exprs; + for (auto en : llvm::enumerate(operands)) { + auto *t = en.value(); + assert(t->getType().isIndex()); + if (auto inst = t->getDefiningInst()) { + if (auto app = inst->dyn_cast()) { + // Sanity check, AffineApplyOp must always be composed by construction + // and there can only ever be a dependence chain of 1 AffineApply. So we + // can never get a second AffineApplyOp. + // This also guarantees we can build another + // SingleResultAffineNormalizer here that does not recurse a second + // time. + for (auto *pred : app->getOperands()) { + assert(!pred->getDefiningInst() || + !pred->getDefiningInst()->isa() && + "AffineApplyOp chain of length > 1"); + } + exprs.push_back(renumber(app)); + } else if (auto constant = inst->dyn_cast()) { + // Constants remain constants. + auto affineConstant = inst->cast(); + exprs.push_back( + getAffineConstantExpr(affineConstant->getValue(), context)); + } else { + // DimOp, top of the function symbols are all symbols. + exprs.push_back(renumberOneIndex(t)); + } + } else if (en.index() < map.getNumDims()) { + assert(isa(t) && "ForInst expected for AffineDimExpr"); + exprs.push_back(renumberOneIndex(t)); + } else { + assert(!isa(t) && "unexpectd ForInst for a AffineSymbolExpr"); + exprs.push_back(renumberOneIndex(t)); + } + } + auto exprsMap = AffineMap::get(dimValueToPosition.size(), + symValueToPosition.size(), exprs, {}); + + expr = composeWithUnboundedMap(map.getResult(0), exprsMap); + + LLVM_DEBUG(map.getResult(0).print(dbgs() << "\nCompose expr: ")); + LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: ")); + LLVM_DEBUG(expr.print(dbgs() << "\nResult: ")); +} + +OpPointer +mlir::makeNormalizedAffineApply(FuncBuilder *b, Location loc, AffineMap map, + ArrayRef operands) { + SingleResultAffineNormalizer normalizer(map, operands); + return b->create(loc, normalizer.getAffineMap(), + normalizer.getOperands()); +} diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index e660d55e02df..8343275b69ae 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -19,6 +19,7 @@ #include "AffineExprDetail.h" #include "mlir/IR/AffineExprVisitor.h" #include "mlir/Support/STLExtras.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" using namespace mlir; @@ -284,39 +285,6 @@ AffineExpr AffineExpr::operator%(AffineExpr other) const { return AffineBinaryOpExprStorage::get(AffineExprKind::Mod, expr, other.expr); } -std::tuple -mlir::matchBinaryOpExpr(AffineBinaryOpExpr e) { - switch (e.getKind()) { - case AffineExprKind::Add: - return std::make_tuple( - e.getLHS(), e.getRHS(), - [](AffineExpr e1, AffineExpr e2) { return e1 + e2; }); - case AffineExprKind::Mul: - return std::make_tuple( - e.getLHS(), e.getRHS(), - [](AffineExpr e1, AffineExpr e2) { return e1 * e2; }); - case AffineExprKind::Mod: - return std::make_tuple( - e.getLHS(), e.getRHS(), - [](AffineExpr e1, AffineExpr e2) { return e1 % e2; }); - case AffineExprKind::FloorDiv: - return std::make_tuple( - e.getLHS(), e.getRHS(), - [](AffineExpr e1, AffineExpr e2) { return e1.floorDiv(e2); }); - case AffineExprKind::CeilDiv: - return std::make_tuple( - e.getLHS(), e.getRHS(), - [](AffineExpr e1, AffineExpr e2) { return e1.ceilDiv(e2); }); - case AffineExprKind::DimId: - case AffineExprKind::SymbolId: - case AffineExprKind::Constant: - assert(false && "Not a binary expr"); - } - return std::make_tuple( - AffineExpr(), AffineExpr(), - [](AffineExpr e1, AffineExpr e2) { return AffineExpr(); }); -} - raw_ostream &operator<<(raw_ostream &os, AffineExpr &expr) { expr.print(os); return os; diff --git a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp index afacc6be9f28..0ca24b908e2f 100644 --- a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp +++ b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp @@ -23,6 +23,7 @@ #include "mlir/Analysis/MLFunctionMatcher.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Analysis/VectorAnalysis.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/StandardTypes.h" #include "mlir/Pass.h" @@ -66,6 +67,12 @@ static llvm::cl::opt clTestComposeMaps( "Specify to enable testing the composition of AffineMap where each " "AffineMap in the composition is specified as the affine_map attribute " "in a constant op.")); +static llvm::cl::opt clTestNormalizeMaps( + "normalize-maps", + llvm::cl::desc( + "Specify to enable testing the normalization of AffineAffineApplyOp " + "where each AffineAffineApplyOp in the composition is a single output " + "instruction.")); namespace { @@ -80,6 +87,7 @@ struct VectorizerTestPass : public FunctionPass { void testBackwardSlicing(Function *f); void testSlicing(Function *f); void testComposeMaps(Function *f); + void testNormalizeMaps(Function *f); // Thread-safe RAII contexts local to pass, BumpPtrAllocator freed on exit. MLFunctionMatcherContext MLContext; @@ -219,6 +227,47 @@ void VectorizerTestPass::testComposeMaps(Function *f) { res.print(outs() << "\nComposed map: "); } +bool affineApplyOp(const Instruction &inst) { + const auto &opInst = cast(inst); + return opInst.isa(); +} + +bool singleResultAffineApplyOpWithoutUses(const Instruction &inst) { + const auto &opInst = cast(inst); + auto app = opInst.dyn_cast(); + return app && (app->getNumResults() == 1) && + app->getResult(0)->getUses().end() == + app->getResult(0)->getUses().begin(); +} + +void VectorizerTestPass::testNormalizeMaps(Function *f) { + using matcher::Op; + + // Save matched AffineApplyOp that all need to be erased in the end. + auto pattern = Op(affineApplyOp); + auto toErase = pattern.match(f); + std::reverse(toErase.begin(), toErase.end()); + { + // Compose maps. + auto pattern = Op(singleResultAffineApplyOpWithoutUses); + for (auto m : pattern.match(f)) { + auto app = cast(m.first)->cast(); + FuncBuilder b(m.first); + + using ValueTy = decltype(*(app->getOperands().begin())); + SmallVector operands = + functional::map([](ValueTy v) { return static_cast(v); }, + app->getOperands().begin(), app->getOperands().end()); + makeNormalizedAffineApply(&b, app->getLoc(), app->getAffineMap(), + operands); + } + } + // We should now be able to erase everything in reverse order in this test. + for (auto m : toErase) { + m.first->erase(); + } +} + PassResult VectorizerTestPass::runOnFunction(Function *f) { // Only support single block functions at this point. if (f->getBlocks().size() != 1) @@ -239,6 +288,9 @@ PassResult VectorizerTestPass::runOnFunction(Function *f) { if (clTestComposeMaps) { testComposeMaps(f); } + if (clTestNormalizeMaps) { + testNormalizeMaps(f); + } return PassResult::Success; } diff --git a/mlir/test/Transforms/Vectorize/normalize_maps.mlir b/mlir/test/Transforms/Vectorize/normalize_maps.mlir new file mode 100644 index 000000000000..a8554dd40079 --- /dev/null +++ b/mlir/test/Transforms/Vectorize/normalize_maps.mlir @@ -0,0 +1,61 @@ +// RUN: mlir-opt %s -vectorizer-test -normalize-maps | FileCheck %s + +// CHECK-DAG: #[[ZERO:[a-zA-Z0-9]+]] = (d0) -> (0) +// CHECK-DAG: #[[ID1:[a-zA-Z0-9]+]] = (d0) -> (d0) +// CHECK-DAG: #[[D0TIMES2:[a-zA-Z0-9]+]] = (d0) -> (d0 * 2) +// CHECK-DAG: #[[D0PLUSD1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 + d1) +// CHECK-DAG: #[[MINSD0PLUSD1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 * -1 + d1) +// CHECK-DAG: #[[D0MINUSD1:[a-zA-Z0-9]+]] = (d0, d1) -> (d0 - d1) +// CHECK-DAG: #[[D0D1D2TOD0:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d0) +// CHECK-DAG: #[[D0D1D2TOD1:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d1) +// CHECK-DAG: #[[D0D1D2TOD2:[a-zA-Z0-9]+]] = (d0, d1, d2) -> (d2) + +// CHECK-LABEL: func @simple() +func @simple() { + for %i0 = 0 to 7 { + %0 = affine_apply (d0) -> (d0) (%i0) + %1 = affine_apply (d0) -> (d0) (%0) + %2 = affine_apply (d0, d1) -> (d0 + d1) (%0, %0) + %3 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0) + } + // CHECK-NEXT: for %i0 = 0 to 7 + // CHECK-NEXT: {{.*}} affine_apply #[[ID1]](%i0) + // CHECK-NEXT: {{.*}} affine_apply #[[D0TIMES2]](%i0) + // CHECK-NEXT: {{.*}} affine_apply #[[ZERO]](%i0) + + for %i1 = 0 to 7 { + for %i2 = 0 to 42 { + %20 = affine_apply (d0, d1) -> (d1) (%i1, %i2) + %21 = affine_apply (d0, d1) -> (d0) (%i1, %i2) + %22 = affine_apply (d0, d1) -> (d0 + d1) (%20, %21) + %23 = affine_apply (d0, d1) -> (d0 - d1) (%20, %21) + %24 = affine_apply (d0, d1) -> (-d0 + d1) (%20, %21) + } + } + // CHECK: for %i1 = 0 to 7 + // CHECK-NEXT: for %i2 = 0 to 42 + // CHECK-NEXT: {{.*}} affine_apply #[[D0PLUSD1]](%i2, %i1) + // CHECK-NEXT: {{.*}} affine_apply #[[D0MINUSD1]](%i2, %i1) + // CHECK-NEXT: {{.*}} affine_apply #[[MINSD0PLUSD1]](%i2, %i1) + + for %i3 = 0 to 16 { + for %i4 = 0 to 47 step 2 { + for %i5 = 0 to 78 step 16 { + %50 = affine_apply (d0) -> (d0) (%i3) + %51 = affine_apply (d0) -> (d0) (%i4) + %52 = affine_apply (d0) -> (d0) (%i5) + %53 = affine_apply (d0, d1, d2) -> (d0) (%50, %51, %52) + %54 = affine_apply (d0, d1, d2) -> (d1) (%50, %51, %52) + %55 = affine_apply (d0, d1, d2) -> (d2) (%50, %51, %52) + } + } + } + // CHECK: for %i3 = 0 to 16 + // CHECK-NEXT: for %i4 = 0 to 47 step 2 + // CHECK-NEXT: for %i5 = 0 to 78 step 16 + // CHECK-NEXT: {{.*}} affine_apply #[[D0D1D2TOD0]](%i3, %i4, %i5) + // CHECK-NEXT: {{.*}} affine_apply #[[D0D1D2TOD1]](%i3, %i4, %i5) + // CHECK-NEXT: {{.*}} affine_apply #[[D0D1D2TOD2]](%i3, %i4, %i5) + + return +}