2019-05-14 15:03:48 -07:00
|
|
|
//===- AffineStructures.cpp - MLIR Affine Structures Class-----------------===//
|
2018-08-21 10:32:24 -07:00
|
|
|
//
|
2020-01-26 03:58:30 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
2019-12-23 09:35:36 -08:00
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2018-08-21 10:32:24 -07:00
|
|
|
//
|
2019-12-23 09:35:36 -08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2018-08-21 10:32:24 -07:00
|
|
|
//
|
2020-03-13 21:15:07 -07:00
|
|
|
// Structures for affine/polyhedral analysis of affine dialect ops.
|
2018-08-21 10:32:24 -07:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2019-02-22 16:51:08 -08:00
|
|
|
#include "mlir/Analysis/AffineStructures.h"
|
2021-01-14 19:29:51 +01:00
|
|
|
#include "mlir/Analysis/LinearTransform.h"
|
2020-07-02 19:18:18 +05:30
|
|
|
#include "mlir/Analysis/Presburger/Simplex.h"
|
2020-03-20 14:18:47 -07:00
|
|
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
|
|
|
|
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
|
2020-02-21 11:54:49 -08:00
|
|
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
2018-10-08 11:10:11 -07:00
|
|
|
#include "mlir/IR/AffineExprVisitor.h"
|
2018-08-21 10:32:24 -07:00
|
|
|
#include "mlir/IR/IntegerSet.h"
|
2019-12-18 09:28:48 -08:00
|
|
|
#include "mlir/Support/LLVM.h"
|
2018-10-24 11:30:06 -07:00
|
|
|
#include "mlir/Support/MathExtras.h"
|
2021-01-22 21:04:05 +05:30
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2019-03-01 08:49:20 -08:00
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
2021-01-14 19:29:51 +01:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2018-10-25 08:33:02 -07:00
|
|
|
#include "llvm/Support/Debug.h"
|
2018-10-08 11:10:11 -07:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2018-08-21 10:32:24 -07:00
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
#define DEBUG_TYPE "affine-structures"
|
|
|
|
|
|
2018-09-04 15:55:38 -07:00
|
|
|
using namespace mlir;
|
2019-03-01 08:49:20 -08:00
|
|
|
using llvm::SmallDenseMap;
|
|
|
|
|
using llvm::SmallDenseSet;
|
2018-10-08 11:10:11 -07:00
|
|
|
|
2019-02-22 16:51:08 -08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
// See comments for SimpleAffineExprFlattener.
|
|
|
|
|
// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording
|
|
|
|
|
// constraint information associated with mod's, floordiv's, and ceildiv's
|
2019-02-25 16:11:30 -08:00
|
|
|
// in FlatAffineConstraints 'localVarCst'.
|
2019-02-22 16:51:08 -08:00
|
|
|
struct AffineExprFlattener : public SimpleAffineExprFlattener {
|
|
|
|
|
public:
|
|
|
|
|
// Constraints connecting newly introduced local variables (for mod's and
|
|
|
|
|
// div's) to existing (dimensional and symbolic) ones. These are always
|
|
|
|
|
// inequalities.
|
|
|
|
|
FlatAffineConstraints localVarCst;
|
|
|
|
|
|
|
|
|
|
AffineExprFlattener(unsigned nDims, unsigned nSymbols, MLIRContext *ctx)
|
2019-02-25 16:11:30 -08:00
|
|
|
: SimpleAffineExprFlattener(nDims, nSymbols) {
|
2019-02-22 16:51:08 -08:00
|
|
|
localVarCst.reset(nDims, nSymbols, /*numLocals=*/0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
// Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
|
|
|
|
|
// The local identifier added is always a floordiv of a pure add/mul affine
|
|
|
|
|
// function of other identifiers, coefficients of which are specified in
|
2019-02-25 16:11:30 -08:00
|
|
|
// `dividend' and with respect to the positive constant `divisor'. localExpr
|
|
|
|
|
// is the simplified tree expression (AffineExpr) corresponding to the
|
|
|
|
|
// quantifier.
|
2019-02-22 16:51:08 -08:00
|
|
|
void addLocalFloorDivId(ArrayRef<int64_t> dividend, int64_t divisor,
|
|
|
|
|
AffineExpr localExpr) override {
|
|
|
|
|
SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr);
|
|
|
|
|
// Update localVarCst.
|
|
|
|
|
localVarCst.addLocalFloorDiv(dividend, divisor);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
2019-03-07 22:14:47 -08:00
|
|
|
// Flattens the expressions in map. Returns failure if 'expr' was unable to be
|
|
|
|
|
// flattened (i.e., semi-affine expressions not handled yet).
|
2019-12-18 09:28:48 -08:00
|
|
|
static LogicalResult
|
|
|
|
|
getFlattenedAffineExprs(ArrayRef<AffineExpr> exprs, unsigned numDims,
|
|
|
|
|
unsigned numSymbols,
|
|
|
|
|
std::vector<SmallVector<int64_t, 8>> *flattenedExprs,
|
|
|
|
|
FlatAffineConstraints *localVarCst) {
|
2019-02-22 16:51:08 -08:00
|
|
|
if (exprs.empty()) {
|
|
|
|
|
localVarCst->reset(numDims, numSymbols);
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AffineExprFlattener flattener(numDims, numSymbols, exprs[0].getContext());
|
|
|
|
|
// Use the same flattener to simplify each expression successively. This way
|
|
|
|
|
// local identifiers / expressions are shared.
|
|
|
|
|
for (auto expr : exprs) {
|
|
|
|
|
if (!expr.isPureAffine())
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-02-22 16:51:08 -08:00
|
|
|
|
|
|
|
|
flattener.walkPostOrder(expr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(flattener.operandExprStack.size() == exprs.size());
|
|
|
|
|
flattenedExprs->clear();
|
|
|
|
|
flattenedExprs->assign(flattener.operandExprStack.begin(),
|
|
|
|
|
flattener.operandExprStack.end());
|
|
|
|
|
|
2020-03-13 21:15:07 -07:00
|
|
|
if (localVarCst)
|
2019-02-22 16:51:08 -08:00
|
|
|
localVarCst->clearAndCopyFrom(flattener.localVarCst);
|
|
|
|
|
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-07 22:14:47 -08:00
|
|
|
// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to
|
|
|
|
|
// be flattened (semi-affine expressions not handled yet).
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult
|
2019-03-07 22:14:47 -08:00
|
|
|
mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
|
|
|
|
|
unsigned numSymbols,
|
2019-12-18 09:28:48 -08:00
|
|
|
SmallVectorImpl<int64_t> *flattenedExpr,
|
2019-03-07 22:14:47 -08:00
|
|
|
FlatAffineConstraints *localVarCst) {
|
2019-02-22 16:51:08 -08:00
|
|
|
std::vector<SmallVector<int64_t, 8>> flattenedExprs;
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols,
|
|
|
|
|
&flattenedExprs, localVarCst);
|
2019-02-22 16:51:08 -08:00
|
|
|
*flattenedExpr = flattenedExprs[0];
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-07 22:14:47 -08:00
|
|
|
/// Flattens the expressions in map. Returns failure if 'expr' was unable to be
|
|
|
|
|
/// flattened (i.e., semi-affine expressions not handled yet).
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult mlir::getFlattenedAffineExprs(
|
2019-12-18 09:28:48 -08:00
|
|
|
AffineMap map, std::vector<SmallVector<int64_t, 8>> *flattenedExprs,
|
2019-02-22 16:51:08 -08:00
|
|
|
FlatAffineConstraints *localVarCst) {
|
|
|
|
|
if (map.getNumResults() == 0) {
|
|
|
|
|
localVarCst->reset(map.getNumDims(), map.getNumSymbols());
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(),
|
|
|
|
|
map.getNumSymbols(), flattenedExprs,
|
|
|
|
|
localVarCst);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult mlir::getFlattenedAffineExprs(
|
2019-12-18 09:28:48 -08:00
|
|
|
IntegerSet set, std::vector<SmallVector<int64_t, 8>> *flattenedExprs,
|
2019-02-22 16:51:08 -08:00
|
|
|
FlatAffineConstraints *localVarCst) {
|
|
|
|
|
if (set.getNumConstraints() == 0) {
|
|
|
|
|
localVarCst->reset(set.getNumDims(), set.getNumSymbols());
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(),
|
|
|
|
|
set.getNumSymbols(), flattenedExprs,
|
|
|
|
|
localVarCst);
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// FlatAffineConstraints.
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
// Clones this object.
|
|
|
|
|
std::unique_ptr<FlatAffineConstraints> FlatAffineConstraints::clone() const {
|
2019-08-17 11:05:35 -07:00
|
|
|
return std::make_unique<FlatAffineConstraints>(*this);
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Construct from an IntegerSet.
|
2018-10-24 11:30:06 -07:00
|
|
|
FlatAffineConstraints::FlatAffineConstraints(IntegerSet set)
|
2021-07-01 20:12:56 +05:30
|
|
|
: numIds(set.getNumDims() + set.getNumSymbols()), numDims(set.getNumDims()),
|
|
|
|
|
numSymbols(set.getNumSymbols()),
|
|
|
|
|
equalities(0, numIds + 1, set.getNumEqualities(), numIds + 1),
|
|
|
|
|
inequalities(0, numIds + 1, set.getNumInequalities(), numIds + 1) {
|
2018-11-16 20:12:06 -08:00
|
|
|
ids.resize(numIds, None);
|
2018-10-25 08:33:02 -07:00
|
|
|
|
2018-12-13 16:00:25 -08:00
|
|
|
// Flatten expressions and add them to the constraint system.
|
|
|
|
|
std::vector<SmallVector<int64_t, 8>> flatExprs;
|
2018-12-17 20:16:37 -08:00
|
|
|
FlatAffineConstraints localVarCst;
|
2019-03-07 22:14:47 -08:00
|
|
|
if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) {
|
2018-12-13 16:00:25 -08:00
|
|
|
assert(false && "flattening unimplemented for semi-affine integer sets");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
assert(flatExprs.size() == set.getNumConstraints());
|
2018-12-17 20:16:37 -08:00
|
|
|
for (unsigned l = 0, e = localVarCst.getNumLocalIds(); l < e; l++) {
|
2018-12-13 16:00:25 -08:00
|
|
|
addLocalId(getNumLocalIds());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) {
|
|
|
|
|
const auto &flatExpr = flatExprs[i];
|
|
|
|
|
assert(flatExpr.size() == getNumCols());
|
2018-10-24 11:30:06 -07:00
|
|
|
if (set.getEqFlags()[i]) {
|
2018-12-13 16:00:25 -08:00
|
|
|
addEquality(flatExpr);
|
2018-10-24 11:30:06 -07:00
|
|
|
} else {
|
2018-12-13 16:00:25 -08:00
|
|
|
addInequality(flatExpr);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
}
|
2018-12-13 16:00:25 -08:00
|
|
|
// Add the other constraints involving local id's from flattening.
|
2018-12-17 20:16:37 -08:00
|
|
|
append(localVarCst);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
2018-10-30 13:45:10 -07:00
|
|
|
|
Introduce memref bound checking.
Introduce analysis to check memref accesses (in MLFunctions) for out of bound
ones. It works as follows:
$ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir
/tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1
%y = load %B[%idy] : memref<128 x i32>
^
/tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1
%y = load %B[%idy] : memref<128 x i32>
^
#map0 = (d0, d1) -> (d0, d1)
#map1 = (d0, d1) -> (d0 * 128 - d1)
mlfunc @test() {
%0 = alloc() : memref<9x9xi32>
%1 = alloc() : memref<128xi32>
for %i0 = -1 to 9 {
for %i1 = -1 to 9 {
%2 = affine_apply #map0(%i0, %i1)
%3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32>
%4 = affine_apply #map1(%i0, %i1)
%5 = load %1[%4] : memref<128xi32>
}
}
return
}
- Improves productivity while manually / semi-automatically developing MLIR for
testing / prototyping; also provides an indirect way to catch errors in
transformations.
- This pass is an easy way to test the underlying affine analysis
machinery including low level routines.
Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256.
While on this:
- create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/
- fix a bug in AffineAnalysis.cpp::toAffineExpr
TODO: extend to non-constant loop bounds (straightforward). Will transparently
work for all accesses once floordiv, mod, ceildiv are supported in the
AffineMap -> FlatAffineConstraints conversion.
PiperOrigin-RevId: 219397961
2018-10-30 17:43:06 -07:00
|
|
|
void FlatAffineConstraints::reset(unsigned numReservedInequalities,
|
|
|
|
|
unsigned numReservedEqualities,
|
|
|
|
|
unsigned newNumReservedCols,
|
|
|
|
|
unsigned newNumDims, unsigned newNumSymbols,
|
2018-11-16 20:12:06 -08:00
|
|
|
unsigned newNumLocals,
|
2019-12-23 14:45:01 -08:00
|
|
|
ArrayRef<Value> idArgs) {
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(newNumReservedCols >= newNumDims + newNumSymbols + newNumLocals + 1 &&
|
|
|
|
|
"minimum 1 column");
|
2021-07-01 20:12:56 +05:30
|
|
|
SmallVector<Optional<Value>, 8> newIds;
|
|
|
|
|
if (!idArgs.empty())
|
|
|
|
|
newIds.assign(idArgs.begin(), idArgs.end());
|
|
|
|
|
|
|
|
|
|
*this = FlatAffineConstraints(numReservedInequalities, numReservedEqualities,
|
|
|
|
|
newNumReservedCols, newNumDims, newNumSymbols,
|
|
|
|
|
newNumLocals, newIds);
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::reset(unsigned newNumDims, unsigned newNumSymbols,
|
2018-11-16 20:12:06 -08:00
|
|
|
unsigned newNumLocals,
|
2019-12-23 14:45:01 -08:00
|
|
|
ArrayRef<Value> idArgs) {
|
2018-11-01 15:41:08 -07:00
|
|
|
reset(0, 0, newNumDims + newNumSymbols + newNumLocals + 1, newNumDims,
|
2018-11-16 20:12:06 -08:00
|
|
|
newNumSymbols, newNumLocals, idArgs);
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::append(const FlatAffineConstraints &other) {
|
|
|
|
|
assert(other.getNumCols() == getNumCols());
|
|
|
|
|
assert(other.getNumDimIds() == getNumDimIds());
|
2018-12-13 16:00:25 -08:00
|
|
|
assert(other.getNumSymbolIds() == getNumSymbolIds());
|
2018-11-01 15:41:08 -07:00
|
|
|
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.reserveRows(inequalities.getNumRows() +
|
|
|
|
|
other.getNumInequalities());
|
|
|
|
|
equalities.reserveRows(equalities.getNumRows() + other.getNumEqualities());
|
2018-11-01 15:41:08 -07:00
|
|
|
|
|
|
|
|
for (unsigned r = 0, e = other.getNumInequalities(); r < e; r++) {
|
|
|
|
|
addInequality(other.getInequality(r));
|
|
|
|
|
}
|
|
|
|
|
for (unsigned r = 0, e = other.getNumEqualities(); r < e; r++) {
|
|
|
|
|
addEquality(other.getEquality(r));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::addLocalId(unsigned pos) {
|
|
|
|
|
addId(IdKind::Local, pos);
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
void FlatAffineConstraints::addDimId(unsigned pos, Value id) {
|
2018-11-16 20:12:06 -08:00
|
|
|
addId(IdKind::Dimension, pos, id);
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
void FlatAffineConstraints::addSymbolId(unsigned pos, Value id) {
|
2018-12-18 06:43:20 -08:00
|
|
|
addId(IdKind::Symbol, pos, id);
|
Introduce memref bound checking.
Introduce analysis to check memref accesses (in MLFunctions) for out of bound
ones. It works as follows:
$ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir
/tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2
%x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32>
^
/tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1
%y = load %B[%idy] : memref<128 x i32>
^
/tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1
%y = load %B[%idy] : memref<128 x i32>
^
#map0 = (d0, d1) -> (d0, d1)
#map1 = (d0, d1) -> (d0 * 128 - d1)
mlfunc @test() {
%0 = alloc() : memref<9x9xi32>
%1 = alloc() : memref<128xi32>
for %i0 = -1 to 9 {
for %i1 = -1 to 9 {
%2 = affine_apply #map0(%i0, %i1)
%3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32>
%4 = affine_apply #map1(%i0, %i1)
%5 = load %1[%4] : memref<128xi32>
}
}
return
}
- Improves productivity while manually / semi-automatically developing MLIR for
testing / prototyping; also provides an indirect way to catch errors in
transformations.
- This pass is an easy way to test the underlying affine analysis
machinery including low level routines.
Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256.
While on this:
- create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/
- fix a bug in AffineAnalysis.cpp::toAffineExpr
TODO: extend to non-constant loop bounds (straightforward). Will transparently
work for all accesses once floordiv, mod, ceildiv are supported in the
AffineMap -> FlatAffineConstraints conversion.
PiperOrigin-RevId: 219397961
2018-10-30 17:43:06 -07:00
|
|
|
}
|
|
|
|
|
|
2018-10-30 13:45:10 -07:00
|
|
|
/// Adds a dimensional identifier. The added column is initialized to
|
|
|
|
|
/// zero.
|
2019-12-23 14:45:01 -08:00
|
|
|
void FlatAffineConstraints::addId(IdKind kind, unsigned pos, Value id) {
|
2020-03-13 21:15:07 -07:00
|
|
|
if (kind == IdKind::Dimension)
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(pos <= getNumDimIds());
|
2020-03-13 21:15:07 -07:00
|
|
|
else if (kind == IdKind::Symbol)
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(pos <= getNumSymbolIds());
|
2020-03-13 21:15:07 -07:00
|
|
|
else
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(pos <= getNumLocalIds());
|
2018-10-30 13:45:10 -07:00
|
|
|
|
2019-05-03 19:48:57 -07:00
|
|
|
int absolutePos;
|
2018-11-01 15:41:08 -07:00
|
|
|
if (kind == IdKind::Dimension) {
|
2018-11-16 20:12:06 -08:00
|
|
|
absolutePos = pos;
|
2018-11-01 15:41:08 -07:00
|
|
|
numDims++;
|
|
|
|
|
} else if (kind == IdKind::Symbol) {
|
2018-11-16 20:12:06 -08:00
|
|
|
absolutePos = pos + getNumDimIds();
|
2018-11-01 15:41:08 -07:00
|
|
|
numSymbols++;
|
|
|
|
|
} else {
|
2018-11-16 20:12:06 -08:00
|
|
|
absolutePos = pos + getNumDimIds() + getNumSymbolIds();
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
2018-10-30 13:45:10 -07:00
|
|
|
numIds++;
|
|
|
|
|
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.insertColumn(absolutePos);
|
|
|
|
|
equalities.insertColumn(absolutePos);
|
2018-11-16 20:12:06 -08:00
|
|
|
|
|
|
|
|
// If an 'id' is provided, insert it; otherwise use None.
|
2020-03-13 21:15:07 -07:00
|
|
|
if (id)
|
2018-11-16 20:12:06 -08:00
|
|
|
ids.insert(ids.begin() + absolutePos, id);
|
2020-03-13 21:15:07 -07:00
|
|
|
else
|
2018-11-16 20:12:06 -08:00
|
|
|
ids.insert(ids.begin() + absolutePos, None);
|
|
|
|
|
assert(ids.size() == getNumIds());
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
2019-03-01 08:49:20 -08:00
|
|
|
/// Checks if two constraint systems are in the same space, i.e., if they are
|
|
|
|
|
/// associated with the same set of identifiers, appearing in the same order.
|
2021-07-19 18:15:49 +05:30
|
|
|
static bool areIdsAligned(const FlatAffineConstraints &a,
|
|
|
|
|
const FlatAffineConstraints &b) {
|
|
|
|
|
return a.getNumDimIds() == b.getNumDimIds() &&
|
|
|
|
|
a.getNumSymbolIds() == b.getNumSymbolIds() &&
|
|
|
|
|
a.getNumIds() == b.getNumIds() && a.getIds().equals(b.getIds());
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
|
2019-05-29 14:02:14 -07:00
|
|
|
/// Calls areIdsAligned to check if two constraint systems have the same set
|
|
|
|
|
/// of identifiers in the same order.
|
|
|
|
|
bool FlatAffineConstraints::areIdsAlignedWithOther(
|
|
|
|
|
const FlatAffineConstraints &other) {
|
|
|
|
|
return areIdsAligned(*this, other);
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-01 08:49:20 -08:00
|
|
|
/// Checks if the SSA values associated with `cst''s identifiers are unique.
|
2019-05-25 10:55:20 -07:00
|
|
|
static bool LLVM_ATTRIBUTE_UNUSED
|
|
|
|
|
areIdsUnique(const FlatAffineConstraints &cst) {
|
2019-12-23 14:45:01 -08:00
|
|
|
SmallPtrSet<Value, 8> uniqueIds;
|
2019-03-01 08:49:20 -08:00
|
|
|
for (auto id : cst.getIds()) {
|
|
|
|
|
if (id.hasValue() && !uniqueIds.insert(id.getValue()).second)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-06 16:41:32 -08:00
|
|
|
/// Merge and align the identifiers of A and B starting at 'offset', so that
|
|
|
|
|
/// both constraint systems get the union of the contained identifiers that is
|
|
|
|
|
/// dimension-wise and symbol-wise unique; both constraint systems are updated
|
|
|
|
|
/// so that they have the union of all identifiers, with A's original
|
|
|
|
|
/// identifiers appearing first followed by any of B's identifiers that didn't
|
|
|
|
|
/// appear in A. Local identifiers of each system are by design separate/local
|
|
|
|
|
/// and are placed one after other (A's followed by B's).
|
2019-03-01 08:49:20 -08:00
|
|
|
// Eg: Input: A has ((%i %j) [%M %N]) and B has (%k, %j) [%P, %N, %M])
|
|
|
|
|
// Output: both A, B have (%i, %j, %k) [%M, %N, %P]
|
|
|
|
|
//
|
2021-07-19 18:15:49 +05:30
|
|
|
static void mergeAndAlignIds(unsigned offset, FlatAffineConstraints *a,
|
|
|
|
|
FlatAffineConstraints *b) {
|
|
|
|
|
assert(offset <= a->getNumDimIds() && offset <= b->getNumDimIds());
|
2019-03-01 08:49:20 -08:00
|
|
|
// A merge/align isn't meaningful if a cst's ids aren't distinct.
|
2021-07-19 18:15:49 +05:30
|
|
|
assert(areIdsUnique(*a) && "A's id values aren't unique");
|
|
|
|
|
assert(areIdsUnique(*b) && "B's id values aren't unique");
|
2019-03-01 08:49:20 -08:00
|
|
|
|
2021-07-19 18:15:49 +05:30
|
|
|
assert(std::all_of(a->getIds().begin() + offset,
|
|
|
|
|
a->getIds().begin() + a->getNumDimAndSymbolIds(),
|
2019-12-23 14:45:01 -08:00
|
|
|
[](Optional<Value> id) { return id.hasValue(); }));
|
2019-03-01 08:49:20 -08:00
|
|
|
|
2021-07-19 18:15:49 +05:30
|
|
|
assert(std::all_of(b->getIds().begin() + offset,
|
|
|
|
|
b->getIds().begin() + b->getNumDimAndSymbolIds(),
|
2019-12-23 14:45:01 -08:00
|
|
|
[](Optional<Value> id) { return id.hasValue(); }));
|
2019-03-01 08:49:20 -08:00
|
|
|
|
|
|
|
|
// Place local id's of A after local id's of B.
|
2021-07-19 18:15:49 +05:30
|
|
|
for (unsigned l = 0, e = a->getNumLocalIds(); l < e; l++) {
|
|
|
|
|
b->addLocalId(0);
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
2021-07-19 18:15:49 +05:30
|
|
|
for (unsigned t = 0, e = b->getNumLocalIds() - a->getNumLocalIds(); t < e;
|
2019-03-01 08:49:20 -08:00
|
|
|
t++) {
|
2021-07-19 18:15:49 +05:30
|
|
|
a->addLocalId(a->getNumLocalIds());
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
SmallVector<Value, 4> aDimValues, aSymValues;
|
2021-07-19 18:15:49 +05:30
|
|
|
a->getIdValues(offset, a->getNumDimIds(), &aDimValues);
|
|
|
|
|
a->getIdValues(a->getNumDimIds(), a->getNumDimAndSymbolIds(), &aSymValues);
|
2019-03-01 08:49:20 -08:00
|
|
|
{
|
|
|
|
|
// Merge dims from A into B.
|
2019-03-06 16:41:32 -08:00
|
|
|
unsigned d = offset;
|
2019-12-22 21:59:55 -08:00
|
|
|
for (auto aDimValue : aDimValues) {
|
2019-03-01 08:49:20 -08:00
|
|
|
unsigned loc;
|
2021-07-19 18:15:49 +05:30
|
|
|
if (b->findId(aDimValue, &loc)) {
|
2019-03-06 16:41:32 -08:00
|
|
|
assert(loc >= offset && "A's dim appears in B's aligned range");
|
2021-07-19 18:15:49 +05:30
|
|
|
assert(loc < b->getNumDimIds() &&
|
2019-03-01 08:49:20 -08:00
|
|
|
"A's dim appears in B's non-dim position");
|
2021-07-19 18:15:49 +05:30
|
|
|
b->swapId(d, loc);
|
2019-03-01 08:49:20 -08:00
|
|
|
} else {
|
2021-07-19 18:15:49 +05:30
|
|
|
b->addDimId(d);
|
|
|
|
|
b->setIdValue(d, aDimValue);
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
d++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Dimensions that are in B, but not in A, are added at the end.
|
2021-07-19 18:15:49 +05:30
|
|
|
for (unsigned t = a->getNumDimIds(), e = b->getNumDimIds(); t < e; t++) {
|
|
|
|
|
a->addDimId(a->getNumDimIds());
|
|
|
|
|
a->setIdValue(a->getNumDimIds() - 1, b->getIdValue(t));
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
// Merge symbols: merge A's symbols into B first.
|
2021-07-19 18:15:49 +05:30
|
|
|
unsigned s = b->getNumDimIds();
|
2019-12-22 21:59:55 -08:00
|
|
|
for (auto aSymValue : aSymValues) {
|
2019-03-01 08:49:20 -08:00
|
|
|
unsigned loc;
|
2021-07-19 18:15:49 +05:30
|
|
|
if (b->findId(aSymValue, &loc)) {
|
|
|
|
|
assert(loc >= b->getNumDimIds() && loc < b->getNumDimAndSymbolIds() &&
|
2019-03-01 08:49:20 -08:00
|
|
|
"A's symbol appears in B's non-symbol position");
|
2021-07-19 18:15:49 +05:30
|
|
|
b->swapId(s, loc);
|
2019-03-01 08:49:20 -08:00
|
|
|
} else {
|
2021-07-19 18:15:49 +05:30
|
|
|
b->addSymbolId(s - b->getNumDimIds());
|
|
|
|
|
b->setIdValue(s, aSymValue);
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
s++;
|
|
|
|
|
}
|
|
|
|
|
// Symbols that are in B, but not in A, are added at the end.
|
2021-07-19 18:15:49 +05:30
|
|
|
for (unsigned t = a->getNumDimAndSymbolIds(),
|
|
|
|
|
e = b->getNumDimAndSymbolIds();
|
2019-03-01 08:49:20 -08:00
|
|
|
t < e; t++) {
|
2021-07-19 18:15:49 +05:30
|
|
|
a->addSymbolId(a->getNumSymbolIds());
|
|
|
|
|
a->setIdValue(a->getNumDimAndSymbolIds() - 1, b->getIdValue(t));
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
2018-12-13 16:00:25 -08:00
|
|
|
}
|
2021-07-19 18:15:49 +05:30
|
|
|
assert(areIdsAligned(*a, *b) && "IDs expected to be aligned");
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
2018-10-30 13:45:10 -07:00
|
|
|
|
2019-05-29 14:02:14 -07:00
|
|
|
// Call 'mergeAndAlignIds' to align constraint systems of 'this' and 'other'.
|
|
|
|
|
void FlatAffineConstraints::mergeAndAlignIdsWithOther(
|
|
|
|
|
unsigned offset, FlatAffineConstraints *other) {
|
|
|
|
|
mergeAndAlignIds(offset, this, other);
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-03 12:13:59 -07:00
|
|
|
LogicalResult FlatAffineConstraints::composeMap(const AffineValueMap *vMap) {
|
2021-08-11 15:48:21 +09:00
|
|
|
return composeMatchingMap(
|
|
|
|
|
computeAlignedMap(vMap->getAffineMap(), vMap->getOperands()));
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
2021-08-11 15:48:21 +09:00
|
|
|
// Similar to `composeMap` except that no Values need be associated with the
|
|
|
|
|
// constraint system nor are they looked at -- the dimensions and symbols of
|
|
|
|
|
// `other` are expected to correspond 1:1 to `this` system.
|
2019-09-03 12:13:59 -07:00
|
|
|
LogicalResult FlatAffineConstraints::composeMatchingMap(AffineMap other) {
|
|
|
|
|
assert(other.getNumDims() == getNumDimIds() && "dim mismatch");
|
|
|
|
|
assert(other.getNumSymbols() == getNumSymbolIds() && "symbol mismatch");
|
|
|
|
|
|
|
|
|
|
std::vector<SmallVector<int64_t, 8>> flatExprs;
|
|
|
|
|
FlatAffineConstraints localCst;
|
|
|
|
|
if (failed(getFlattenedAffineExprs(other, &flatExprs, &localCst))) {
|
|
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "composition unimplemented for semi-affine maps\n");
|
|
|
|
|
return failure();
|
|
|
|
|
}
|
|
|
|
|
assert(flatExprs.size() == other.getNumResults());
|
|
|
|
|
|
|
|
|
|
// Add localCst information.
|
|
|
|
|
if (localCst.getNumLocalIds() > 0) {
|
2021-08-11 15:48:21 +09:00
|
|
|
unsigned numLocalIds = getNumLocalIds();
|
|
|
|
|
// Insert local dims of localCst at the beginning.
|
|
|
|
|
for (unsigned l = 0, e = localCst.getNumLocalIds(); l < e; ++l)
|
2019-09-03 12:13:59 -07:00
|
|
|
addLocalId(0);
|
2021-08-11 15:48:21 +09:00
|
|
|
// Insert local dims of `this` at the end of localCst.
|
|
|
|
|
for (unsigned l = 0; l < numLocalIds; ++l)
|
|
|
|
|
localCst.addLocalId(localCst.getNumLocalIds());
|
|
|
|
|
// Dimensions of localCst and this constraint set match. Append localCst to
|
|
|
|
|
// this constraint set.
|
2019-09-03 12:13:59 -07:00
|
|
|
append(localCst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add dimensions corresponding to the map's results.
|
|
|
|
|
for (unsigned t = 0, e = other.getNumResults(); t < e; t++) {
|
|
|
|
|
addDimId(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We add one equality for each result connecting the result dim of the map to
|
|
|
|
|
// the other identifiers.
|
|
|
|
|
// For eg: if the expression is 16*i0 + i1, and this is the r^th
|
|
|
|
|
// iteration/result of the value map, we are adding the equality:
|
|
|
|
|
// d_r - 16*i0 - i1 = 0. Hence, when flattening say (i0 + 1, i0 + 8*i2), we
|
|
|
|
|
// add two equalities overall: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0.
|
|
|
|
|
for (unsigned r = 0, e = flatExprs.size(); r < e; r++) {
|
|
|
|
|
const auto &flatExpr = flatExprs[r];
|
|
|
|
|
assert(flatExpr.size() >= other.getNumInputs() + 1);
|
|
|
|
|
|
|
|
|
|
// eqToAdd is the equality corresponding to the flattened affine expression.
|
|
|
|
|
SmallVector<int64_t, 8> eqToAdd(getNumCols(), 0);
|
|
|
|
|
// Set the coefficient for this result to one.
|
|
|
|
|
eqToAdd[r] = 1;
|
|
|
|
|
|
|
|
|
|
// Dims and symbols.
|
|
|
|
|
for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) {
|
|
|
|
|
// Negate 'eq[r]' since the newly added dimension will be set to this one.
|
|
|
|
|
eqToAdd[e + i] = -flatExpr[i];
|
|
|
|
|
}
|
|
|
|
|
// Local vars common to eq and localCst are at the beginning.
|
|
|
|
|
unsigned j = getNumDimIds() + getNumSymbolIds();
|
|
|
|
|
unsigned end = flatExpr.size() - 1;
|
|
|
|
|
for (unsigned i = other.getNumInputs(); i < end; i++, j++) {
|
|
|
|
|
eqToAdd[j] = -flatExpr[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Constant term.
|
|
|
|
|
eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1];
|
|
|
|
|
|
|
|
|
|
// Add the equality connecting the result of the map to this constraint set.
|
|
|
|
|
addEquality(eqToAdd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-01 17:06:25 -08:00
|
|
|
// Turn a symbol into a dimension.
|
2019-12-23 14:45:01 -08:00
|
|
|
static void turnSymbolIntoDim(FlatAffineConstraints *cst, Value id) {
|
2019-03-01 17:06:25 -08:00
|
|
|
unsigned pos;
|
|
|
|
|
if (cst->findId(id, &pos) && pos >= cst->getNumDimIds() &&
|
|
|
|
|
pos < cst->getNumDimAndSymbolIds()) {
|
2020-09-16 16:04:09 +01:00
|
|
|
cst->swapId(pos, cst->getNumDimIds());
|
2019-03-01 17:06:25 -08:00
|
|
|
cst->setDimSymbolSeparation(cst->getNumSymbolIds() - 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-05 20:33:30 -08:00
|
|
|
// Changes all symbol identifiers which are loop IVs to dim identifiers.
|
|
|
|
|
void FlatAffineConstraints::convertLoopIVSymbolsToDims() {
|
|
|
|
|
// Gather all symbols which are loop IVs.
|
2019-12-23 14:45:01 -08:00
|
|
|
SmallVector<Value, 4> loopIVs;
|
2019-03-05 20:33:30 -08:00
|
|
|
for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++) {
|
|
|
|
|
if (ids[i].hasValue() && getForInductionVarOwner(ids[i].getValue()))
|
|
|
|
|
loopIVs.push_back(ids[i].getValue());
|
|
|
|
|
}
|
|
|
|
|
// Turn each symbol in 'loopIVs' into a dim identifier.
|
2019-12-22 21:59:55 -08:00
|
|
|
for (auto iv : loopIVs) {
|
2020-01-11 08:54:04 -08:00
|
|
|
turnSymbolIntoDim(this, iv);
|
2019-03-05 20:33:30 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-10 10:16:50 +09:00
|
|
|
void FlatAffineConstraints::addInductionVarOrTerminalSymbol(Value id) {
|
2020-01-11 08:54:04 -08:00
|
|
|
if (containsId(id))
|
2019-03-08 13:29:00 -08:00
|
|
|
return;
|
2019-03-12 10:52:09 -07:00
|
|
|
|
|
|
|
|
// Caller is expected to fully compose map/operands if necessary.
|
2021-08-10 10:16:50 +09:00
|
|
|
assert((isTopLevelValue(id) || isForInductionVar(id)) &&
|
2019-03-12 10:52:09 -07:00
|
|
|
"non-terminal symbol / loop IV expected");
|
|
|
|
|
// Outer loop IVs could be used in forOp's bounds.
|
|
|
|
|
if (auto loop = getForInductionVarOwner(id)) {
|
2019-03-08 13:29:00 -08:00
|
|
|
addDimId(getNumDimIds(), id);
|
2019-03-12 10:52:09 -07:00
|
|
|
if (failed(this->addAffineForOpDomain(loop)))
|
|
|
|
|
LLVM_DEBUG(
|
2019-03-25 11:13:31 -07:00
|
|
|
loop.emitWarning("failed to add domain info to constraint system"));
|
2019-03-12 10:52:09 -07:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// Add top level symbol.
|
|
|
|
|
addSymbolId(getNumSymbolIds(), id);
|
|
|
|
|
// Check if the symbol is a constant.
|
2020-05-09 17:52:35 -07:00
|
|
|
if (auto constOp = id.getDefiningOp<ConstantIndexOp>())
|
2020-01-11 08:54:04 -08:00
|
|
|
setIdToConstant(id, constOp.getValue());
|
2019-03-08 13:29:00 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-24 19:53:05 -07:00
|
|
|
LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
|
2019-02-22 16:51:08 -08:00
|
|
|
unsigned pos;
|
|
|
|
|
// Pre-condition for this method.
|
2020-01-11 08:54:04 -08:00
|
|
|
if (!findId(forOp.getInductionVar(), &pos)) {
|
2019-03-08 13:29:00 -08:00
|
|
|
assert(false && "Value not found");
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-25 11:13:31 -07:00
|
|
|
int64_t step = forOp.getStep();
|
2019-03-12 10:52:09 -07:00
|
|
|
if (step != 1) {
|
2019-03-25 11:13:31 -07:00
|
|
|
if (!forOp.hasConstantLowerBound())
|
|
|
|
|
forOp.emitWarning("domain conservatively approximated");
|
2019-03-12 10:52:09 -07:00
|
|
|
else {
|
|
|
|
|
// Add constraints for the stride.
|
|
|
|
|
// (iv - lb) % step = 0 can be written as:
|
|
|
|
|
// (iv - lb) - step * q = 0 where q = (iv - lb) / step.
|
|
|
|
|
// Add local variable 'q' and add the above equality.
|
|
|
|
|
// The first constraint is q = (iv - lb) floordiv step
|
|
|
|
|
SmallVector<int64_t, 8> dividend(getNumCols(), 0);
|
2019-03-25 11:13:31 -07:00
|
|
|
int64_t lb = forOp.getConstantLowerBound();
|
2019-03-12 10:52:09 -07:00
|
|
|
dividend[pos] = 1;
|
|
|
|
|
dividend.back() -= lb;
|
|
|
|
|
addLocalFloorDiv(dividend, step);
|
|
|
|
|
// Second constraint: (iv - lb) - step * q = 0.
|
|
|
|
|
SmallVector<int64_t, 8> eq(getNumCols(), 0);
|
|
|
|
|
eq[pos] = 1;
|
|
|
|
|
eq.back() -= lb;
|
|
|
|
|
// For the local var just added above.
|
|
|
|
|
eq[getNumCols() - 2] = -step;
|
|
|
|
|
addEquality(eq);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-02-22 16:51:08 -08:00
|
|
|
|
2019-03-25 11:13:31 -07:00
|
|
|
if (forOp.hasConstantLowerBound()) {
|
|
|
|
|
addConstantLowerBound(pos, forOp.getConstantLowerBound());
|
2019-02-22 16:51:08 -08:00
|
|
|
} else {
|
|
|
|
|
// Non-constant lower bound case.
|
2020-03-13 21:15:07 -07:00
|
|
|
if (failed(addLowerOrUpperBound(pos, forOp.getLowerBoundMap(),
|
|
|
|
|
forOp.getLowerBoundOperands(),
|
2019-03-08 13:29:00 -08:00
|
|
|
/*eq=*/false, /*lower=*/true)))
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-25 11:13:31 -07:00
|
|
|
if (forOp.hasConstantUpperBound()) {
|
|
|
|
|
addConstantUpperBound(pos, forOp.getConstantUpperBound() - 1);
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
// Non-constant upper bound case.
|
2020-03-13 21:15:07 -07:00
|
|
|
return addLowerOrUpperBound(pos, forOp.getUpperBoundMap(),
|
|
|
|
|
forOp.getUpperBoundOperands(),
|
2019-03-08 13:29:00 -08:00
|
|
|
/*eq=*/false, /*lower=*/false);
|
2019-02-22 16:51:08 -08:00
|
|
|
}
|
|
|
|
|
|
2021-01-21 00:41:51 +02:00
|
|
|
/// Adds constraints (lower and upper bounds) for each loop in the loop nest
|
|
|
|
|
/// described by the bound maps 'lbMaps' and 'ubMaps' of a computation slice.
|
|
|
|
|
/// Every pair ('lbMaps[i]', 'ubMaps[i]') describes the bounds of a loop in
|
|
|
|
|
/// the nest, sorted outer-to-inner. 'operands' contains the bound operands
|
|
|
|
|
/// for a single bound map. All the bound maps will use the same bound
|
|
|
|
|
/// operands. Note that some loops described by a computation slice might not
|
|
|
|
|
/// exist yet in the IR so the Value attached to those dimension identifiers
|
|
|
|
|
/// might be empty. For that reason, this method doesn't perform Value
|
|
|
|
|
/// look-ups to retrieve the dimension identifier positions. Instead, it
|
|
|
|
|
/// assumes the position of the dim identifiers in the constraint system is
|
|
|
|
|
/// the same as the position of the loop in the loop nest.
|
|
|
|
|
LogicalResult
|
|
|
|
|
FlatAffineConstraints::addDomainFromSliceMaps(ArrayRef<AffineMap> lbMaps,
|
|
|
|
|
ArrayRef<AffineMap> ubMaps,
|
|
|
|
|
ArrayRef<Value> operands) {
|
|
|
|
|
assert(lbMaps.size() == ubMaps.size());
|
|
|
|
|
assert(lbMaps.size() <= getNumDimIds());
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = lbMaps.size(); i < e; ++i) {
|
|
|
|
|
AffineMap lbMap = lbMaps[i];
|
|
|
|
|
AffineMap ubMap = ubMaps[i];
|
|
|
|
|
assert(!lbMap || lbMap.getNumInputs() == operands.size());
|
|
|
|
|
assert(!ubMap || ubMap.getNumInputs() == operands.size());
|
|
|
|
|
|
|
|
|
|
// Check if this slice is just an equality along this dimension. If so,
|
|
|
|
|
// retrieve the existing loop it equates to and add it to the system.
|
|
|
|
|
if (lbMap && ubMap && lbMap.getNumResults() == 1 &&
|
|
|
|
|
ubMap.getNumResults() == 1 &&
|
|
|
|
|
lbMap.getResult(0) + 1 == ubMap.getResult(0) &&
|
|
|
|
|
// The condition above will be true for maps describing a single
|
|
|
|
|
// iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
|
|
|
|
|
// Make sure we skip those cases by checking that the lb result is not
|
|
|
|
|
// just a constant.
|
|
|
|
|
!lbMap.getResult(0).isa<AffineConstantExpr>()) {
|
|
|
|
|
// Limited support: we expect the lb result to be just a loop dimension.
|
|
|
|
|
// Not supported otherwise for now.
|
|
|
|
|
AffineDimExpr result = lbMap.getResult(0).dyn_cast<AffineDimExpr>();
|
|
|
|
|
if (!result)
|
|
|
|
|
return failure();
|
|
|
|
|
|
|
|
|
|
AffineForOp loop =
|
|
|
|
|
getForInductionVarOwner(operands[result.getPosition()]);
|
|
|
|
|
if (!loop)
|
|
|
|
|
return failure();
|
|
|
|
|
|
|
|
|
|
if (failed(addAffineForOpDomain(loop)))
|
|
|
|
|
return failure();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This slice refers to a loop that doesn't exist in the IR yet. Add its
|
|
|
|
|
// bounds to the system assuming its dimension identifier position is the
|
|
|
|
|
// same as the position of the loop in the loop nest.
|
|
|
|
|
if (lbMap && failed(addLowerOrUpperBound(i, lbMap, operands, /*eq=*/false,
|
|
|
|
|
/*lower=*/true)))
|
|
|
|
|
return failure();
|
|
|
|
|
|
|
|
|
|
if (ubMap && failed(addLowerOrUpperBound(i, ubMap, operands, /*eq=*/false,
|
|
|
|
|
/*lower=*/false)))
|
|
|
|
|
return failure();
|
|
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-09 03:11:44 +05:30
|
|
|
void FlatAffineConstraints::addAffineIfOpDomain(AffineIfOp ifOp) {
|
|
|
|
|
// Create the base constraints from the integer set attached to ifOp.
|
|
|
|
|
FlatAffineConstraints cst(ifOp.getIntegerSet());
|
|
|
|
|
|
|
|
|
|
// Bind ids in the constraints to ifOp operands.
|
|
|
|
|
SmallVector<Value, 4> operands = ifOp.getOperands();
|
|
|
|
|
cst.setIdValues(0, cst.getNumDimAndSymbolIds(), operands);
|
|
|
|
|
|
2020-08-28 00:27:36 +05:30
|
|
|
// Merge the constraints from ifOp to the current domain. We need first merge
|
|
|
|
|
// and align the IDs from both constraints, and then append the constraints
|
|
|
|
|
// from the ifOp into the current one.
|
2020-08-09 03:11:44 +05:30
|
|
|
mergeAndAlignIdsWithOther(0, &cst);
|
2020-08-28 00:27:36 +05:30
|
|
|
append(cst);
|
2020-08-09 03:11:44 +05:30
|
|
|
}
|
|
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
// Searches for a constraint with a non-zero coefficient at 'colIdx' in
|
|
|
|
|
// equality (isEq=true) or inequality (isEq=false) constraints.
|
|
|
|
|
// Returns true and sets row found in search in 'rowIdx'.
|
|
|
|
|
// Returns false otherwise.
|
2020-03-23 19:36:18 +05:30
|
|
|
static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst,
|
|
|
|
|
unsigned colIdx, bool isEq,
|
|
|
|
|
unsigned *rowIdx) {
|
|
|
|
|
assert(colIdx < cst.getNumCols() && "position out of bounds");
|
2018-10-24 11:30:06 -07:00
|
|
|
auto at = [&](unsigned rowIdx) -> int64_t {
|
2020-03-23 19:36:18 +05:30
|
|
|
return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx);
|
2018-10-24 11:30:06 -07:00
|
|
|
};
|
2020-03-23 19:36:18 +05:30
|
|
|
unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities();
|
2019-01-04 14:52:21 -08:00
|
|
|
for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) {
|
|
|
|
|
if (at(*rowIdx) != 0) {
|
2018-10-24 11:30:06 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Normalizes the coefficient values across all columns in 'rowIDx' by their
|
2019-10-20 00:11:03 -07:00
|
|
|
// GCD in equality or inequality constraints as specified by 'isEq'.
|
2018-12-10 12:59:53 -08:00
|
|
|
template <bool isEq>
|
2018-10-24 11:30:06 -07:00
|
|
|
static void normalizeConstraintByGCD(FlatAffineConstraints *constraints,
|
2018-12-10 12:59:53 -08:00
|
|
|
unsigned rowIdx) {
|
2018-10-24 11:30:06 -07:00
|
|
|
auto at = [&](unsigned colIdx) -> int64_t {
|
|
|
|
|
return isEq ? constraints->atEq(rowIdx, colIdx)
|
|
|
|
|
: constraints->atIneq(rowIdx, colIdx);
|
|
|
|
|
};
|
|
|
|
|
uint64_t gcd = std::abs(at(0));
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned j = 1, e = constraints->getNumCols(); j < e; ++j) {
|
2018-10-24 11:30:06 -07:00
|
|
|
gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(at(j)));
|
|
|
|
|
}
|
|
|
|
|
if (gcd > 0 && gcd != 1) {
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned j = 0, e = constraints->getNumCols(); j < e; ++j) {
|
2018-10-24 11:30:06 -07:00
|
|
|
int64_t v = at(j) / static_cast<int64_t>(gcd);
|
|
|
|
|
isEq ? constraints->atEq(rowIdx, j) = v
|
|
|
|
|
: constraints->atIneq(rowIdx, j) = v;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-10 12:59:53 -08:00
|
|
|
void FlatAffineConstraints::normalizeConstraintsByGCD() {
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
|
|
|
|
normalizeConstraintByGCD</*isEq=*/true>(this, i);
|
|
|
|
|
}
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
|
|
|
|
normalizeConstraintByGCD</*isEq=*/false>(this, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-04 13:09:45 -08:00
|
|
|
bool FlatAffineConstraints::hasConsistentState() const {
|
2021-07-01 20:12:56 +05:30
|
|
|
if (!inequalities.hasConsistentState())
|
2018-12-04 13:09:45 -08:00
|
|
|
return false;
|
2021-07-01 20:12:56 +05:30
|
|
|
if (!equalities.hasConsistentState())
|
2018-12-04 13:09:45 -08:00
|
|
|
return false;
|
|
|
|
|
if (ids.size() != getNumIds())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Catches errors where numDims, numSymbols, numIds aren't consistent.
|
|
|
|
|
if (numDims > numIds || numSymbols > numIds || numDims + numSymbols > numIds)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Checks all rows of equality/inequality constraints for trivial
|
|
|
|
|
/// contradictions (for example: 1 == 0, 0 >= 1), which may have surfaced
|
|
|
|
|
/// after elimination. Returns 'true' if an invalid constraint is found;
|
2018-12-10 12:59:53 -08:00
|
|
|
/// 'false' otherwise.
|
2018-12-04 13:09:45 -08:00
|
|
|
bool FlatAffineConstraints::hasInvalidConstraint() const {
|
|
|
|
|
assert(hasConsistentState());
|
2018-10-30 13:45:10 -07:00
|
|
|
auto check = [&](bool isEq) -> bool {
|
2018-12-04 13:09:45 -08:00
|
|
|
unsigned numCols = getNumCols();
|
|
|
|
|
unsigned numRows = isEq ? getNumEqualities() : getNumInequalities();
|
2018-10-24 11:30:06 -07:00
|
|
|
for (unsigned i = 0, e = numRows; i < e; ++i) {
|
|
|
|
|
unsigned j;
|
|
|
|
|
for (j = 0; j < numCols - 1; ++j) {
|
2018-12-04 13:09:45 -08:00
|
|
|
int64_t v = isEq ? atEq(i, j) : atIneq(i, j);
|
2018-10-24 11:30:06 -07:00
|
|
|
// Skip rows with non-zero variable coefficients.
|
|
|
|
|
if (v != 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (j < numCols - 1) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// Check validity of constant term at 'numCols - 1' w.r.t 'isEq'.
|
|
|
|
|
// Example invalid constraints include: '1 == 0' or '-1 >= 0'
|
2018-12-04 13:09:45 -08:00
|
|
|
int64_t v = isEq ? atEq(i, numCols - 1) : atIneq(i, numCols - 1);
|
2018-10-24 11:30:06 -07:00
|
|
|
if ((isEq && v != 0) || (!isEq && v < 0)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
|
if (check(/*isEq=*/true))
|
|
|
|
|
return true;
|
|
|
|
|
return check(/*isEq=*/false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Eliminate identifier from constraint at 'rowIdx' based on coefficient at
|
|
|
|
|
// pivotRow, pivotCol. Columns in range [elimColStart, pivotCol) will not be
|
|
|
|
|
// updated as they have already been eliminated.
|
|
|
|
|
static void eliminateFromConstraint(FlatAffineConstraints *constraints,
|
|
|
|
|
unsigned rowIdx, unsigned pivotRow,
|
|
|
|
|
unsigned pivotCol, unsigned elimColStart,
|
|
|
|
|
bool isEq) {
|
|
|
|
|
// Skip if equality 'rowIdx' if same as 'pivotRow'.
|
|
|
|
|
if (isEq && rowIdx == pivotRow)
|
|
|
|
|
return;
|
|
|
|
|
auto at = [&](unsigned i, unsigned j) -> int64_t {
|
|
|
|
|
return isEq ? constraints->atEq(i, j) : constraints->atIneq(i, j);
|
|
|
|
|
};
|
|
|
|
|
int64_t leadCoeff = at(rowIdx, pivotCol);
|
|
|
|
|
// Skip if leading coefficient at 'rowIdx' is already zero.
|
|
|
|
|
if (leadCoeff == 0)
|
|
|
|
|
return;
|
|
|
|
|
int64_t pivotCoeff = constraints->atEq(pivotRow, pivotCol);
|
|
|
|
|
int64_t sign = (leadCoeff * pivotCoeff > 0) ? -1 : 1;
|
|
|
|
|
int64_t lcm = mlir::lcm(pivotCoeff, leadCoeff);
|
|
|
|
|
int64_t pivotMultiplier = sign * (lcm / std::abs(pivotCoeff));
|
|
|
|
|
int64_t rowMultiplier = lcm / std::abs(leadCoeff);
|
|
|
|
|
|
|
|
|
|
unsigned numCols = constraints->getNumCols();
|
|
|
|
|
for (unsigned j = 0; j < numCols; ++j) {
|
|
|
|
|
// Skip updating column 'j' if it was just eliminated.
|
|
|
|
|
if (j >= elimColStart && j < pivotCol)
|
|
|
|
|
continue;
|
|
|
|
|
int64_t v = pivotMultiplier * constraints->atEq(pivotRow, j) +
|
|
|
|
|
rowMultiplier * at(rowIdx, j);
|
|
|
|
|
isEq ? constraints->atEq(rowIdx, j) = v
|
|
|
|
|
: constraints->atIneq(rowIdx, j) = v;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-04 15:09:52 -08:00
|
|
|
// Removes identifiers in column range [idStart, idLimit), and copies any
|
2018-10-30 13:45:10 -07:00
|
|
|
// remaining valid data into place, and updates member variables.
|
2018-12-04 15:09:52 -08:00
|
|
|
void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) {
|
2019-01-22 09:52:56 -08:00
|
|
|
assert(idLimit < getNumCols() && "invalid id limit");
|
|
|
|
|
|
|
|
|
|
if (idStart >= idLimit)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// We are going to be removing one or more identifiers from the range.
|
|
|
|
|
assert(idStart < numIds && "invalid idStart position");
|
|
|
|
|
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: Make 'removeIdRange' a lambda called from here.
|
2021-07-01 20:12:56 +05:30
|
|
|
// Remove eliminated identifiers from the constraints..
|
|
|
|
|
equalities.removeColumns(idStart, idLimit - idStart);
|
|
|
|
|
inequalities.removeColumns(idStart, idLimit - idStart);
|
2019-01-22 09:52:56 -08:00
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
// Update members numDims, numSymbols and numIds.
|
|
|
|
|
unsigned numDimsEliminated = 0;
|
2019-01-22 09:52:56 -08:00
|
|
|
unsigned numLocalsEliminated = 0;
|
|
|
|
|
unsigned numColsEliminated = idLimit - idStart;
|
2018-12-04 15:09:52 -08:00
|
|
|
if (idStart < numDims) {
|
|
|
|
|
numDimsEliminated = std::min(numDims, idLimit) - idStart;
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
2019-01-22 09:52:56 -08:00
|
|
|
// Check how many local id's were removed. Note that our identifier order is
|
|
|
|
|
// [dims, symbols, locals]. Local id start at position numDims + numSymbols.
|
|
|
|
|
if (idLimit > numDims + numSymbols) {
|
|
|
|
|
numLocalsEliminated = std::min(
|
|
|
|
|
idLimit - std::max(idStart, numDims + numSymbols), getNumLocalIds());
|
|
|
|
|
}
|
2018-10-24 11:30:06 -07:00
|
|
|
unsigned numSymbolsEliminated =
|
2019-01-22 09:52:56 -08:00
|
|
|
numColsEliminated - numDimsEliminated - numLocalsEliminated;
|
|
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
numDims -= numDimsEliminated;
|
|
|
|
|
numSymbols -= numSymbolsEliminated;
|
|
|
|
|
numIds = numIds - numColsEliminated;
|
2019-01-22 09:52:56 -08:00
|
|
|
|
2018-12-04 15:09:52 -08:00
|
|
|
ids.erase(ids.begin() + idStart, ids.begin() + idLimit);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
|
2018-12-29 15:51:30 -08:00
|
|
|
/// Returns the position of the identifier that has the minimum <number of lower
|
|
|
|
|
/// bounds> times <number of upper bounds> from the specified range of
|
|
|
|
|
/// identifiers [start, end). It is often best to eliminate in the increasing
|
|
|
|
|
/// order of these counts when doing Fourier-Motzkin elimination since FM adds
|
|
|
|
|
/// that many new constraints.
|
|
|
|
|
static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
|
|
|
|
|
unsigned start, unsigned end) {
|
|
|
|
|
assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
|
|
|
|
|
|
|
|
|
|
auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
|
|
|
|
|
unsigned numLb = 0;
|
|
|
|
|
unsigned numUb = 0;
|
|
|
|
|
for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (cst.atIneq(r, pos) > 0) {
|
|
|
|
|
++numLb;
|
|
|
|
|
} else if (cst.atIneq(r, pos) < 0) {
|
|
|
|
|
++numUb;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return numLb * numUb;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
unsigned minLoc = start;
|
|
|
|
|
unsigned min = getProductOfNumLowerUpperBounds(start);
|
|
|
|
|
for (unsigned c = start + 1; c < end; c++) {
|
|
|
|
|
unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
|
|
|
|
|
if (numLbUbProduct < min) {
|
|
|
|
|
min = numLbUbProduct;
|
|
|
|
|
minLoc = c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return minLoc;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-10 12:59:53 -08:00
|
|
|
// Checks for emptiness of the set by eliminating identifiers successively and
|
|
|
|
|
// using the GCD test (on all equality constraints) and checking for trivially
|
2018-12-18 06:43:20 -08:00
|
|
|
// invalid constraints. Returns 'true' if the constraint system is found to be
|
2018-12-10 12:59:53 -08:00
|
|
|
// empty; false otherwise.
|
2018-10-25 08:33:02 -07:00
|
|
|
bool FlatAffineConstraints::isEmpty() const {
|
2018-12-10 12:59:53 -08:00
|
|
|
if (isEmptyByGCDTest() || hasInvalidConstraint())
|
2018-10-25 16:32:53 -07:00
|
|
|
return true;
|
2018-12-10 12:59:53 -08:00
|
|
|
|
2018-12-29 15:51:30 -08:00
|
|
|
// First, eliminate as many identifiers as possible using Gaussian
|
|
|
|
|
// elimination.
|
|
|
|
|
FlatAffineConstraints tmpCst(*this);
|
|
|
|
|
unsigned currentPos = 0;
|
|
|
|
|
while (currentPos < tmpCst.getNumIds()) {
|
|
|
|
|
tmpCst.gaussianEliminateIds(currentPos, tmpCst.getNumIds());
|
|
|
|
|
++currentPos;
|
2018-12-10 12:59:53 -08:00
|
|
|
// We check emptiness through trivial checks after eliminating each ID to
|
|
|
|
|
// detect emptiness early. Since the checks isEmptyByGCDTest() and
|
|
|
|
|
// hasInvalidConstraint() are linear time and single sweep on the constraint
|
|
|
|
|
// buffer, this appears reasonable - but can optimize in the future.
|
2018-12-29 15:51:30 -08:00
|
|
|
if (tmpCst.hasInvalidConstraint() || tmpCst.isEmptyByGCDTest())
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Eliminate the remaining using FM.
|
|
|
|
|
for (unsigned i = 0, e = tmpCst.getNumIds(); i < e; i++) {
|
2021-07-19 18:15:49 +05:30
|
|
|
tmpCst.fourierMotzkinEliminate(
|
2018-12-29 15:51:30 -08:00
|
|
|
getBestIdToEliminate(tmpCst, 0, tmpCst.getNumIds()));
|
2019-01-10 12:13:18 -08:00
|
|
|
// Check for a constraint explosion. This rarely happens in practice, but
|
|
|
|
|
// this check exists as a safeguard against improperly constructed
|
2019-10-20 00:11:03 -07:00
|
|
|
// constraint systems or artificially created arbitrarily complex systems
|
2019-01-10 12:13:18 -08:00
|
|
|
// that aren't the intended use case for FlatAffineConstraints. This is
|
|
|
|
|
// needed since FM has a worst case exponential complexity in theory.
|
|
|
|
|
if (tmpCst.getNumConstraints() >= kExplosionFactor * getNumIds()) {
|
2019-02-27 13:43:08 -08:00
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM constraint explosion detected\n");
|
2019-01-10 12:13:18 -08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-29 15:51:30 -08:00
|
|
|
// FM wouldn't have modified the equalities in any way. So no need to again
|
|
|
|
|
// run GCD test. Check for trivial invalid constraints.
|
|
|
|
|
if (tmpCst.hasInvalidConstraint())
|
|
|
|
|
return true;
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
2018-10-24 11:30:06 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-25 16:32:53 -07:00
|
|
|
// Runs the GCD test on all equality constraints. Returns 'true' if this test
|
|
|
|
|
// fails on any equality. Returns 'false' otherwise.
|
|
|
|
|
// This test can be used to disprove the existence of a solution. If it returns
|
|
|
|
|
// true, no integer solution to the equality constraints can exist.
|
|
|
|
|
//
|
|
|
|
|
// GCD test definition:
|
|
|
|
|
//
|
|
|
|
|
// The equality constraint:
|
|
|
|
|
//
|
|
|
|
|
// c_1*x_1 + c_2*x_2 + ... + c_n*x_n = c_0
|
|
|
|
|
//
|
|
|
|
|
// has an integer solution iff:
|
|
|
|
|
//
|
|
|
|
|
// GCD of c_1, c_2, ..., c_n divides c_0.
|
|
|
|
|
//
|
|
|
|
|
bool FlatAffineConstraints::isEmptyByGCDTest() const {
|
2018-12-04 13:09:45 -08:00
|
|
|
assert(hasConsistentState());
|
2018-10-25 16:32:53 -07:00
|
|
|
unsigned numCols = getNumCols();
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
|
|
|
|
uint64_t gcd = std::abs(atEq(i, 0));
|
|
|
|
|
for (unsigned j = 1; j < numCols - 1; ++j) {
|
|
|
|
|
gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(atEq(i, j)));
|
|
|
|
|
}
|
|
|
|
|
int64_t v = std::abs(atEq(i, numCols - 1));
|
|
|
|
|
if (gcd > 0 && (v % gcd != 0)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-14 19:29:51 +01:00
|
|
|
// Returns a matrix where each row is a vector along which the polytope is
|
|
|
|
|
// bounded. The span of the returned vectors is guaranteed to contain all
|
|
|
|
|
// such vectors. The returned vectors are NOT guaranteed to be linearly
|
|
|
|
|
// independent. This function should not be called on empty sets.
|
2020-07-02 19:18:18 +05:30
|
|
|
//
|
2021-01-14 19:29:51 +01:00
|
|
|
// It is sufficient to check the perpendiculars of the constraints, as the set
|
|
|
|
|
// of perpendiculars which are bounded must span all bounded directions.
|
|
|
|
|
Matrix FlatAffineConstraints::getBoundedDirections() const {
|
|
|
|
|
// Note that it is necessary to add the equalities too (which the constructor
|
|
|
|
|
// does) even though we don't need to check if they are bounded; whether an
|
|
|
|
|
// inequality is bounded or not depends on what other constraints, including
|
|
|
|
|
// equalities, are present.
|
|
|
|
|
Simplex simplex(*this);
|
|
|
|
|
|
|
|
|
|
assert(!simplex.isEmpty() && "It is not meaningful to ask whether a "
|
|
|
|
|
"direction is bounded in an empty set.");
|
|
|
|
|
|
|
|
|
|
SmallVector<unsigned, 8> boundedIneqs;
|
|
|
|
|
// The constructor adds the inequalities to the simplex first, so this
|
|
|
|
|
// processes all the inequalities.
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
|
|
|
|
if (simplex.isBoundedAlongConstraint(i))
|
|
|
|
|
boundedIneqs.push_back(i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The direction vector is given by the coefficients and does not include the
|
|
|
|
|
// constant term, so the matrix has one fewer column.
|
|
|
|
|
unsigned dirsNumCols = getNumCols() - 1;
|
|
|
|
|
Matrix dirs(boundedIneqs.size() + getNumEqualities(), dirsNumCols);
|
|
|
|
|
|
|
|
|
|
// Copy the bounded inequalities.
|
|
|
|
|
unsigned row = 0;
|
|
|
|
|
for (unsigned i : boundedIneqs) {
|
|
|
|
|
for (unsigned col = 0; col < dirsNumCols; ++col)
|
|
|
|
|
dirs(row, col) = atIneq(i, col);
|
|
|
|
|
++row;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy the equalities. All the equalities' perpendiculars are bounded.
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
|
|
|
|
for (unsigned col = 0; col < dirsNumCols; ++col)
|
|
|
|
|
dirs(row, col) = atEq(i, col);
|
|
|
|
|
++row;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dirs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool eqInvolvesSuffixDims(const FlatAffineConstraints &fac, unsigned eqIndex,
|
|
|
|
|
unsigned numDims) {
|
2021-04-08 20:59:58 +05:30
|
|
|
for (unsigned e = fac.getNumIds(), j = e - numDims; j < e; ++j)
|
2021-01-14 19:29:51 +01:00
|
|
|
if (fac.atEq(eqIndex, j) != 0)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
bool ineqInvolvesSuffixDims(const FlatAffineConstraints &fac,
|
|
|
|
|
unsigned ineqIndex, unsigned numDims) {
|
2021-04-08 20:59:58 +05:30
|
|
|
for (unsigned e = fac.getNumIds(), j = e - numDims; j < e; ++j)
|
2021-01-14 19:29:51 +01:00
|
|
|
if (fac.atIneq(ineqIndex, j) != 0)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void removeConstraintsInvolvingSuffixDims(FlatAffineConstraints &fac,
|
|
|
|
|
unsigned unboundedDims) {
|
|
|
|
|
// We iterate backwards so that whether we remove constraint i - 1 or not, the
|
|
|
|
|
// next constraint to be tested is always i - 2.
|
|
|
|
|
for (unsigned i = fac.getNumEqualities(); i > 0; i--)
|
|
|
|
|
if (eqInvolvesSuffixDims(fac, i - 1, unboundedDims))
|
|
|
|
|
fac.removeEquality(i - 1);
|
|
|
|
|
for (unsigned i = fac.getNumInequalities(); i > 0; i--)
|
|
|
|
|
if (ineqInvolvesSuffixDims(fac, i - 1, unboundedDims))
|
|
|
|
|
fac.removeInequality(i - 1);
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
bool FlatAffineConstraints::isIntegerEmpty() const {
|
|
|
|
|
return !findIntegerSample().hasValue();
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-14 19:29:51 +01:00
|
|
|
/// Let this set be S. If S is bounded then we directly call into the GBR
|
|
|
|
|
/// sampling algorithm. Otherwise, there are some unbounded directions, i.e.,
|
2021-01-22 21:04:05 +05:30
|
|
|
/// vectors v such that S extends to infinity along v or -v. In this case we
|
2021-01-14 19:29:51 +01:00
|
|
|
/// use an algorithm described in the integer set library (isl) manual and used
|
|
|
|
|
/// by the isl_set_sample function in that library. The algorithm is:
|
|
|
|
|
///
|
|
|
|
|
/// 1) Apply a unimodular transform T to S to obtain S*T, such that all
|
|
|
|
|
/// dimensions in which S*T is bounded lie in the linear span of a prefix of the
|
|
|
|
|
/// dimensions.
|
|
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// 2) Construct a set B by removing all constraints that involve
|
|
|
|
|
/// the unbounded dimensions and then deleting the unbounded dimensions. Note
|
|
|
|
|
/// that B is a Bounded set.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// 3) Try to obtain a sample from B using the GBR sampling
|
|
|
|
|
/// algorithm. If no sample is found, return that S is empty.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// 4) Otherwise, substitute the obtained sample into S*T to obtain a set
|
|
|
|
|
/// C. C is a full-dimensional Cone and always contains a sample.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// 5) Obtain an integer sample from C.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// 6) Return T*v, where v is the concatenation of the samples from B and C.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// The following is a sketch of a proof that
|
|
|
|
|
/// a) If the algorithm returns empty, then S is empty.
|
|
|
|
|
/// b) If the algorithm returns a sample, it is a valid sample in S.
|
2021-01-14 19:29:51 +01:00
|
|
|
///
|
2021-01-22 21:04:05 +05:30
|
|
|
/// The algorithm returns empty only if B is empty, in which case S*T is
|
|
|
|
|
/// certainly empty since B was obtained by removing constraints and then
|
|
|
|
|
/// deleting unconstrained dimensions from S*T. Since T is unimodular, a vector
|
|
|
|
|
/// v is in S*T iff T*v is in S. So in this case, since
|
|
|
|
|
/// S*T is empty, S is empty too.
|
|
|
|
|
///
|
|
|
|
|
/// Otherwise, the algorithm substitutes the sample from B into S*T. All the
|
|
|
|
|
/// constraints of S*T that did not involve unbounded dimensions are satisfied
|
|
|
|
|
/// by this substitution. All dimensions in the linear span of the dimensions
|
|
|
|
|
/// outside the prefix are unbounded in S*T (step 1). Substituting values for
|
|
|
|
|
/// the bounded dimensions cannot make these dimensions bounded, and these are
|
|
|
|
|
/// the only remaining dimensions in C, so C is unbounded along every vector (in
|
|
|
|
|
/// the positive or negative direction, or both). C is hence a full-dimensional
|
|
|
|
|
/// cone and therefore always contains an integer point.
|
|
|
|
|
///
|
|
|
|
|
/// Concatenating the samples from B and C gives a sample v in S*T, so the
|
|
|
|
|
/// returned sample T*v is a sample in S.
|
|
|
|
|
Optional<SmallVector<int64_t, 8>>
|
|
|
|
|
FlatAffineConstraints::findIntegerSample() const {
|
2021-01-14 19:29:51 +01:00
|
|
|
// First, try the GCD test heuristic.
|
2020-07-02 19:18:18 +05:30
|
|
|
if (isEmptyByGCDTest())
|
2021-01-22 21:04:05 +05:30
|
|
|
return {};
|
2020-07-02 19:18:18 +05:30
|
|
|
|
|
|
|
|
Simplex simplex(*this);
|
2021-01-14 19:29:51 +01:00
|
|
|
if (simplex.isEmpty())
|
2021-01-22 21:04:05 +05:30
|
|
|
return {};
|
2021-01-14 19:29:51 +01:00
|
|
|
|
|
|
|
|
// For a bounded set, we directly call into the GBR sampling algorithm.
|
|
|
|
|
if (!simplex.isUnbounded())
|
2021-01-22 21:04:05 +05:30
|
|
|
return simplex.findIntegerSample();
|
2021-01-14 19:29:51 +01:00
|
|
|
|
|
|
|
|
// The set is unbounded. We cannot directly use the GBR algorithm.
|
|
|
|
|
//
|
|
|
|
|
// m is a matrix containing, in each row, a vector in which S is
|
|
|
|
|
// bounded, such that the linear span of all these dimensions contains all
|
|
|
|
|
// bounded dimensions in S.
|
|
|
|
|
Matrix m = getBoundedDirections();
|
|
|
|
|
// In column echelon form, each row of m occupies only the first rank(m)
|
|
|
|
|
// columns and has zeros on the other columns. The transform T that brings S
|
|
|
|
|
// to column echelon form is unimodular as well, so this is a suitable
|
|
|
|
|
// transform to use in step 1 of the algorithm.
|
|
|
|
|
std::pair<unsigned, LinearTransform> result =
|
|
|
|
|
LinearTransform::makeTransformToColumnEchelon(std::move(m));
|
2021-01-22 21:04:05 +05:30
|
|
|
const LinearTransform &transform = result.second;
|
|
|
|
|
// 1) Apply T to S to obtain S*T.
|
|
|
|
|
FlatAffineConstraints transformedSet = transform.applyTo(*this);
|
2021-01-14 19:29:51 +01:00
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
// 2) Remove the unbounded dimensions and constraints involving them to
|
|
|
|
|
// obtain a bounded set.
|
|
|
|
|
FlatAffineConstraints boundedSet = transformedSet;
|
2021-01-14 19:29:51 +01:00
|
|
|
unsigned numBoundedDims = result.first;
|
|
|
|
|
unsigned numUnboundedDims = getNumIds() - numBoundedDims;
|
2021-01-22 21:04:05 +05:30
|
|
|
removeConstraintsInvolvingSuffixDims(boundedSet, numUnboundedDims);
|
|
|
|
|
boundedSet.removeIdRange(numBoundedDims, boundedSet.getNumIds());
|
|
|
|
|
|
|
|
|
|
// 3) Try to obtain a sample from the bounded set.
|
|
|
|
|
Optional<SmallVector<int64_t, 8>> boundedSample =
|
|
|
|
|
Simplex(boundedSet).findIntegerSample();
|
|
|
|
|
if (!boundedSample)
|
|
|
|
|
return {};
|
|
|
|
|
assert(boundedSet.containsPoint(*boundedSample) &&
|
|
|
|
|
"Simplex returned an invalid sample!");
|
|
|
|
|
|
|
|
|
|
// 4) Substitute the values of the bounded dimensions into S*T to obtain a
|
|
|
|
|
// full-dimensional cone, which necessarily contains an integer sample.
|
|
|
|
|
transformedSet.setAndEliminate(0, *boundedSample);
|
|
|
|
|
FlatAffineConstraints &cone = transformedSet;
|
|
|
|
|
|
|
|
|
|
// 5) Obtain an integer sample from the cone.
|
|
|
|
|
//
|
|
|
|
|
// We shrink the cone such that for any rational point in the shrunken cone,
|
|
|
|
|
// rounding up each of the point's coordinates produces a point that still
|
|
|
|
|
// lies in the original cone.
|
|
|
|
|
//
|
|
|
|
|
// Rounding up a point x adds a number e_i in [0, 1) to each coordinate x_i.
|
|
|
|
|
// For each inequality sum_i a_i x_i + c >= 0 in the original cone, the
|
|
|
|
|
// shrunken cone will have the inequality tightened by some amount s, such
|
|
|
|
|
// that if x satisfies the shrunken cone's tightened inequality, then x + e
|
|
|
|
|
// satisfies the original inequality, i.e.,
|
|
|
|
|
//
|
|
|
|
|
// sum_i a_i x_i + c + s >= 0 implies sum_i a_i (x_i + e_i) + c >= 0
|
|
|
|
|
//
|
|
|
|
|
// for any e_i values in [0, 1). In fact, we will handle the slightly more
|
|
|
|
|
// general case where e_i can be in [0, 1]. For example, consider the
|
|
|
|
|
// inequality 2x_1 - 3x_2 - 7x_3 - 6 >= 0, and let x = (3, 0, 0). How low
|
|
|
|
|
// could the LHS go if we added a number in [0, 1] to each coordinate? The LHS
|
|
|
|
|
// is minimized when we add 1 to the x_i with negative coefficient a_i and
|
|
|
|
|
// keep the other x_i the same. In the example, we would get x = (3, 1, 1),
|
|
|
|
|
// changing the value of the LHS by -3 + -7 = -10.
|
|
|
|
|
//
|
|
|
|
|
// In general, the value of the LHS can change by at most the sum of the
|
|
|
|
|
// negative a_i, so we accomodate this by shifting the inequality by this
|
|
|
|
|
// amount for the shrunken cone.
|
|
|
|
|
for (unsigned i = 0, e = cone.getNumInequalities(); i < e; ++i) {
|
|
|
|
|
for (unsigned j = 0; j < cone.numIds; ++j) {
|
|
|
|
|
int64_t coeff = cone.atIneq(i, j);
|
|
|
|
|
if (coeff < 0)
|
|
|
|
|
cone.atIneq(i, cone.numIds) += coeff;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-01-14 19:29:51 +01:00
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
// Obtain an integer sample in the cone by rounding up a rational point from
|
|
|
|
|
// the shrunken cone. Shrinking the cone amounts to shifting its apex
|
|
|
|
|
// "inwards" without changing its "shape"; the shrunken cone is still a
|
|
|
|
|
// full-dimensional cone and is hence non-empty.
|
|
|
|
|
Simplex shrunkenConeSimplex(cone);
|
|
|
|
|
assert(!shrunkenConeSimplex.isEmpty() && "Shrunken cone cannot be empty!");
|
|
|
|
|
SmallVector<Fraction, 8> shrunkenConeSample =
|
|
|
|
|
shrunkenConeSimplex.getRationalSample();
|
2021-01-14 19:29:51 +01:00
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
SmallVector<int64_t, 8> coneSample(llvm::map_range(shrunkenConeSample, ceil));
|
2020-07-02 19:18:18 +05:30
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
// 6) Return transform * concat(boundedSample, coneSample).
|
|
|
|
|
SmallVector<int64_t, 8> &sample = boundedSample.getValue();
|
|
|
|
|
sample.append(coneSample.begin(), coneSample.end());
|
|
|
|
|
return transform.preMultiplyColumn(sample);
|
2020-07-02 19:18:18 +05:30
|
|
|
}
|
|
|
|
|
|
2020-10-07 17:16:11 +02:00
|
|
|
/// Helper to evaluate an affine expression at a point.
|
|
|
|
|
/// The expression is a list of coefficients for the dimensions followed by the
|
|
|
|
|
/// constant term.
|
|
|
|
|
static int64_t valueAt(ArrayRef<int64_t> expr, ArrayRef<int64_t> point) {
|
|
|
|
|
assert(expr.size() == 1 + point.size() &&
|
2021-01-08 02:09:48 +09:00
|
|
|
"Dimensionalities of point and expression don't match!");
|
2020-10-07 17:16:11 +02:00
|
|
|
int64_t value = expr.back();
|
|
|
|
|
for (unsigned i = 0; i < point.size(); ++i)
|
|
|
|
|
value += expr[i] * point[i];
|
|
|
|
|
return value;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// A point satisfies an equality iff the value of the equality at the
|
|
|
|
|
/// expression is zero, and it satisfies an inequality iff the value of the
|
|
|
|
|
/// inequality at that point is non-negative.
|
|
|
|
|
bool FlatAffineConstraints::containsPoint(ArrayRef<int64_t> point) const {
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
|
|
|
|
if (valueAt(getEquality(i), point) != 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
|
|
|
|
if (valueAt(getInequality(i), point) < 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-01 15:41:08 -07:00
|
|
|
/// Tightens inequalities given that we are dealing with integer spaces. This is
|
2018-12-10 12:59:53 -08:00
|
|
|
/// analogous to the GCD test but applied to inequalities. The constant term can
|
2018-11-01 15:41:08 -07:00
|
|
|
/// be reduced to the preceding multiple of the GCD of the coefficients, i.e.,
|
|
|
|
|
/// 64*i - 100 >= 0 => 64*i - 128 >= 0 (since 'i' is an integer). This is a
|
|
|
|
|
/// fast method - linear in the number of coefficients.
|
|
|
|
|
// Example on how this affects practical cases: consider the scenario:
|
|
|
|
|
// 64*i >= 100, j = 64*i; without a tightening, elimination of i would yield
|
|
|
|
|
// j >= 100 instead of the tighter (exact) j >= 128.
|
2021-07-19 18:15:49 +05:30
|
|
|
void FlatAffineConstraints::gcdTightenInequalities() {
|
2018-11-01 15:41:08 -07:00
|
|
|
unsigned numCols = getNumCols();
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
|
|
|
|
uint64_t gcd = std::abs(atIneq(i, 0));
|
|
|
|
|
for (unsigned j = 1; j < numCols - 1; ++j) {
|
|
|
|
|
gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(atIneq(i, j)));
|
|
|
|
|
}
|
2019-02-27 13:43:08 -08:00
|
|
|
if (gcd > 0 && gcd != 1) {
|
2018-12-05 20:34:23 -08:00
|
|
|
int64_t gcdI = static_cast<int64_t>(gcd);
|
2019-02-27 13:43:08 -08:00
|
|
|
// Tighten the constant term and normalize the constraint by the GCD.
|
|
|
|
|
atIneq(i, numCols - 1) = mlir::floorDiv(atIneq(i, numCols - 1), gcdI);
|
|
|
|
|
for (unsigned j = 0, e = numCols - 1; j < e; ++j)
|
|
|
|
|
atIneq(i, j) /= gcdI;
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-20 00:11:03 -07:00
|
|
|
// Eliminates all identifier variables in column range [posStart, posLimit).
|
2018-10-24 11:30:06 -07:00
|
|
|
// Returns the number of variables eliminated.
|
2018-10-25 08:33:02 -07:00
|
|
|
unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart,
|
2018-10-24 11:30:06 -07:00
|
|
|
unsigned posLimit) {
|
|
|
|
|
// Return if identifier positions to eliminate are out of range.
|
2018-12-29 15:51:30 -08:00
|
|
|
assert(posLimit <= numIds);
|
2018-12-04 13:09:45 -08:00
|
|
|
assert(hasConsistentState());
|
2018-10-30 13:45:10 -07:00
|
|
|
|
|
|
|
|
if (posStart >= posLimit)
|
2018-10-24 11:30:06 -07:00
|
|
|
return 0;
|
2018-10-30 13:45:10 -07:00
|
|
|
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
2018-11-01 15:41:08 -07:00
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
unsigned pivotCol = 0;
|
|
|
|
|
for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) {
|
|
|
|
|
// Find a row which has a non-zero coefficient in column 'j'.
|
|
|
|
|
unsigned pivotRow;
|
|
|
|
|
if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true,
|
2019-01-04 14:52:21 -08:00
|
|
|
&pivotRow)) {
|
2018-10-24 11:30:06 -07:00
|
|
|
// No pivot row in equalities with non-zero at 'pivotCol'.
|
|
|
|
|
if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false,
|
2019-01-04 14:52:21 -08:00
|
|
|
&pivotRow)) {
|
2018-12-10 12:59:53 -08:00
|
|
|
// If inequalities are also non-zero in 'pivotCol', it can be
|
|
|
|
|
// eliminated.
|
2018-10-24 11:30:06 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Eliminate identifier at 'pivotCol' from each equality row.
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
|
|
|
|
eliminateFromConstraint(this, i, pivotRow, pivotCol, posStart,
|
|
|
|
|
/*isEq=*/true);
|
2018-12-10 12:59:53 -08:00
|
|
|
normalizeConstraintByGCD</*isEq=*/true>(this, i);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Eliminate identifier at 'pivotCol' from each inequality row.
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
|
|
|
|
eliminateFromConstraint(this, i, pivotRow, pivotCol, posStart,
|
|
|
|
|
/*isEq=*/false);
|
2018-12-10 12:59:53 -08:00
|
|
|
normalizeConstraintByGCD</*isEq=*/false>(this, i);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
removeEquality(pivotRow);
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
// Update position limit based on number eliminated.
|
|
|
|
|
posLimit = pivotCol;
|
|
|
|
|
// Remove eliminated columns from all constraints.
|
2018-12-04 15:09:52 -08:00
|
|
|
removeIdRange(posStart, posLimit);
|
2018-10-24 11:30:06 -07:00
|
|
|
return posLimit - posStart;
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-23 12:25:09 +05:30
|
|
|
// Determine whether the identifier at 'pos' (say id_r) can be expressed as
|
|
|
|
|
// modulo of another known identifier (say id_n) w.r.t a constant. For example,
|
|
|
|
|
// if the following constraints hold true:
|
|
|
|
|
// ```
|
|
|
|
|
// 0 <= id_r <= divisor - 1
|
|
|
|
|
// id_n - (divisor * q_expr) = id_r
|
|
|
|
|
// ```
|
|
|
|
|
// where `id_n` is a known identifier (called dividend), and `q_expr` is an
|
|
|
|
|
// `AffineExpr` (called the quotient expression), `id_r` can be written as:
|
|
|
|
|
//
|
|
|
|
|
// `id_r = id_n mod divisor`.
|
|
|
|
|
//
|
|
|
|
|
// Additionally, in a special case of the above constaints where `q_expr` is an
|
|
|
|
|
// identifier itself that is not yet known (say `id_q`), it can be written as a
|
|
|
|
|
// floordiv in the following way:
|
|
|
|
|
//
|
|
|
|
|
// `id_q = id_n floordiv divisor`.
|
|
|
|
|
//
|
|
|
|
|
// Returns true if the above mod or floordiv are detected, updating 'memo' with
|
|
|
|
|
// these new expressions. Returns false otherwise.
|
2019-01-07 17:34:26 -08:00
|
|
|
static bool detectAsMod(const FlatAffineConstraints &cst, unsigned pos,
|
|
|
|
|
int64_t lbConst, int64_t ubConst,
|
2021-06-23 12:25:09 +05:30
|
|
|
SmallVectorImpl<AffineExpr> &memo,
|
|
|
|
|
MLIRContext *context) {
|
2019-01-07 17:34:26 -08:00
|
|
|
assert(pos < cst.getNumIds() && "invalid position");
|
|
|
|
|
|
2021-06-23 12:25:09 +05:30
|
|
|
// Check if a divisor satisfying the condition `0 <= id_r <= divisor - 1` can
|
|
|
|
|
// be determined.
|
2019-01-07 17:34:26 -08:00
|
|
|
if (lbConst != 0 || ubConst < 1)
|
|
|
|
|
return false;
|
|
|
|
|
int64_t divisor = ubConst + 1;
|
|
|
|
|
|
2021-06-23 12:25:09 +05:30
|
|
|
// Check for the aforementioned conditions in each equality.
|
|
|
|
|
for (unsigned curEquality = 0, numEqualities = cst.getNumEqualities();
|
|
|
|
|
curEquality < numEqualities; curEquality++) {
|
|
|
|
|
int64_t coefficientAtPos = cst.atEq(curEquality, pos);
|
|
|
|
|
// If current equality does not involve `id_r`, continue to the next
|
|
|
|
|
// equality.
|
|
|
|
|
if (coefficientAtPos == 0)
|
2018-12-17 09:57:14 -08:00
|
|
|
continue;
|
2021-06-23 12:25:09 +05:30
|
|
|
|
|
|
|
|
// Constant term should be 0 in this equality.
|
|
|
|
|
if (cst.atEq(curEquality, cst.getNumCols() - 1) != 0)
|
2019-03-05 11:08:41 -08:00
|
|
|
continue;
|
2021-06-23 12:25:09 +05:30
|
|
|
|
|
|
|
|
// Traverse through the equality and construct the dividend expression
|
|
|
|
|
// `dividendExpr`, to contain all the identifiers which are known and are
|
|
|
|
|
// not divisible by `(coefficientAtPos * divisor)`. Hope here is that the
|
|
|
|
|
// `dividendExpr` gets simplified into a single identifier `id_n` discussed
|
|
|
|
|
// above.
|
|
|
|
|
auto dividendExpr = getAffineConstantExpr(0, context);
|
|
|
|
|
|
|
|
|
|
// Track the terms that go into quotient expression, later used to detect
|
|
|
|
|
// additional floordiv.
|
|
|
|
|
unsigned quotientCount = 0;
|
|
|
|
|
int quotientPosition = -1;
|
|
|
|
|
int quotientSign = 1;
|
|
|
|
|
|
|
|
|
|
// Consider each term in the current equality.
|
|
|
|
|
unsigned curId, e;
|
|
|
|
|
for (curId = 0, e = cst.getNumDimAndSymbolIds(); curId < e; ++curId) {
|
|
|
|
|
// Ignore id_r.
|
|
|
|
|
if (curId == pos)
|
|
|
|
|
continue;
|
|
|
|
|
int64_t coefficientOfCurId = cst.atEq(curEquality, curId);
|
|
|
|
|
// Ignore ids that do not contribute to the current equality.
|
|
|
|
|
if (coefficientOfCurId == 0)
|
|
|
|
|
continue;
|
|
|
|
|
// Check if the current id goes into the quotient expression.
|
|
|
|
|
if (coefficientOfCurId % (divisor * coefficientAtPos) == 0) {
|
|
|
|
|
quotientCount++;
|
|
|
|
|
quotientPosition = curId;
|
|
|
|
|
quotientSign = (coefficientOfCurId * coefficientAtPos) > 0 ? 1 : -1;
|
2019-03-05 11:08:41 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
2021-06-23 12:25:09 +05:30
|
|
|
// Identifiers that are part of dividendExpr should be known.
|
|
|
|
|
if (!memo[curId])
|
2019-03-05 11:08:41 -08:00
|
|
|
break;
|
2021-06-23 12:25:09 +05:30
|
|
|
// Append the current identifier to the dividend expression.
|
|
|
|
|
dividendExpr = dividendExpr + memo[curId] * coefficientOfCurId;
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
2021-06-23 12:25:09 +05:30
|
|
|
|
|
|
|
|
// Can't construct expression as it depends on a yet uncomputed id.
|
|
|
|
|
if (curId < e)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Express `id_r` in terms of the other ids collected so far.
|
|
|
|
|
if (coefficientAtPos > 0)
|
|
|
|
|
dividendExpr = (-dividendExpr).floorDiv(coefficientAtPos);
|
|
|
|
|
else
|
|
|
|
|
dividendExpr = dividendExpr.floorDiv(-coefficientAtPos);
|
|
|
|
|
|
|
|
|
|
// Simplify the expression.
|
|
|
|
|
dividendExpr = simplifyAffineExpr(dividendExpr, cst.getNumDimIds(),
|
|
|
|
|
cst.getNumSymbolIds());
|
|
|
|
|
// Only if the final dividend expression is just a single id (which we call
|
|
|
|
|
// `id_n`), we can proceed.
|
|
|
|
|
// TODO: Handle AffineSymbolExpr as well. There is no reason to restrict it
|
|
|
|
|
// to dims themselves.
|
|
|
|
|
auto dimExpr = dividendExpr.dyn_cast<AffineDimExpr>();
|
|
|
|
|
if (!dimExpr)
|
2019-03-05 11:08:41 -08:00
|
|
|
continue;
|
|
|
|
|
|
2021-06-23 12:25:09 +05:30
|
|
|
// Express `id_r` as `id_n % divisor` and store the expression in `memo`.
|
|
|
|
|
if (quotientCount >= 1) {
|
|
|
|
|
auto ub = cst.getConstantUpperBound(dimExpr.getPosition());
|
|
|
|
|
// If `id_n` has an upperbound that is less than the divisor, mod can be
|
|
|
|
|
// eliminated altogether.
|
2019-03-05 11:08:41 -08:00
|
|
|
if (ub.hasValue() && ub.getValue() < divisor)
|
2021-06-23 12:25:09 +05:30
|
|
|
memo[pos] = dimExpr;
|
2019-03-05 11:08:41 -08:00
|
|
|
else
|
2021-06-23 12:25:09 +05:30
|
|
|
memo[pos] = dimExpr % divisor;
|
|
|
|
|
// If a unique quotient `id_q` was seen, it can be expressed as
|
|
|
|
|
// `id_n floordiv divisor`.
|
|
|
|
|
if (quotientCount == 1 && !memo[quotientPosition])
|
|
|
|
|
memo[quotientPosition] = dimExpr.floorDiv(divisor) * quotientSign;
|
2019-03-05 11:08:41 -08:00
|
|
|
|
2019-01-07 17:34:26 -08:00
|
|
|
return true;
|
2018-12-17 09:57:14 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-01-07 17:34:26 -08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
/// Gather all lower and upper bounds of the identifier at `pos`, and
|
|
|
|
|
/// optionally any equalities on it. In addition, the bounds are to be
|
|
|
|
|
/// independent of identifiers in position range [`offset`, `offset` + `num`).
|
|
|
|
|
void FlatAffineConstraints::getLowerAndUpperBoundIndices(
|
|
|
|
|
unsigned pos, SmallVectorImpl<unsigned> *lbIndices,
|
|
|
|
|
SmallVectorImpl<unsigned> *ubIndices, SmallVectorImpl<unsigned> *eqIndices,
|
|
|
|
|
unsigned offset, unsigned num) const {
|
|
|
|
|
assert(pos < getNumIds() && "invalid position");
|
|
|
|
|
assert(offset + num < getNumCols() && "invalid range");
|
2019-02-19 18:17:19 -08:00
|
|
|
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
// Checks for a constraint that has a non-zero coeff for the identifiers in
|
|
|
|
|
// the position range [offset, offset + num) while ignoring `pos`.
|
|
|
|
|
auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) {
|
2020-03-24 09:52:41 +05:30
|
|
|
unsigned c, f;
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
auto cst = isEq ? getEquality(r) : getInequality(r);
|
2020-03-24 09:52:41 +05:30
|
|
|
for (c = offset, f = offset + num; c < f; ++c) {
|
|
|
|
|
if (c == pos)
|
|
|
|
|
continue;
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
if (cst[c] != 0)
|
2020-03-24 09:52:41 +05:30
|
|
|
break;
|
|
|
|
|
}
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
return c < f;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Gather all lower bounds and upper bounds of the variable. Since the
|
|
|
|
|
// canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower
|
|
|
|
|
// bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
// The bounds are to be independent of [offset, offset + num) columns.
|
|
|
|
|
if (containsConstraintDependentOnRange(r, /*isEq=*/false))
|
2020-03-24 09:52:41 +05:30
|
|
|
continue;
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
if (atIneq(r, pos) >= 1) {
|
2019-02-19 18:17:19 -08:00
|
|
|
// Lower bound.
|
|
|
|
|
lbIndices->push_back(r);
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
} else if (atIneq(r, pos) <= -1) {
|
2019-02-19 18:17:19 -08:00
|
|
|
// Upper bound.
|
|
|
|
|
ubIndices->push_back(r);
|
|
|
|
|
}
|
|
|
|
|
}
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
|
|
|
|
|
// An equality is both a lower and upper bound. Record any equalities
|
|
|
|
|
// involving the pos^th identifier.
|
|
|
|
|
if (!eqIndices)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
|
|
|
|
|
if (atEq(r, pos) == 0)
|
|
|
|
|
continue;
|
|
|
|
|
if (containsConstraintDependentOnRange(r, /*isEq=*/true))
|
|
|
|
|
continue;
|
|
|
|
|
eqIndices->push_back(r);
|
|
|
|
|
}
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
|
|
|
|
|
2020-03-13 21:15:07 -07:00
|
|
|
/// Check if the pos^th identifier can be expressed as a floordiv of an affine
|
|
|
|
|
/// function of other identifiers (where the divisor is a positive constant)
|
|
|
|
|
/// given the initial set of expressions in `exprs`. If it can be, the
|
|
|
|
|
/// corresponding position in `exprs` is set as the detected affine expr. For
|
|
|
|
|
/// eg: 4q <= i + j <= 4q + 3 <=> q = (i + j) floordiv 4. An equality can
|
|
|
|
|
/// also yield a floordiv: eg. 4q = i + j <=> q = (i + j) floordiv 4. 32q + 28
|
|
|
|
|
/// <= i <= 32q + 31 => q = i floordiv 32.
|
2020-01-14 14:06:12 +01:00
|
|
|
static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos,
|
2020-03-13 21:15:07 -07:00
|
|
|
MLIRContext *context,
|
|
|
|
|
SmallVectorImpl<AffineExpr> &exprs) {
|
2019-01-07 17:34:26 -08:00
|
|
|
assert(pos < cst.getNumIds() && "invalid position");
|
2019-01-04 14:52:21 -08:00
|
|
|
|
2019-02-19 18:17:19 -08:00
|
|
|
SmallVector<unsigned, 4> lbIndices, ubIndices;
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices);
|
2019-01-07 17:34:26 -08:00
|
|
|
|
|
|
|
|
// Check if any lower bound, upper bound pair is of the form:
|
|
|
|
|
// divisor * id >= expr - (divisor - 1) <-- Lower bound for 'id'
|
|
|
|
|
// divisor * id <= expr <-- Upper bound for 'id'
|
|
|
|
|
// Then, 'id' is equivalent to 'expr floordiv divisor'. (where divisor > 1).
|
|
|
|
|
//
|
2021-08-02 17:47:46 +05:30
|
|
|
// For example:
|
|
|
|
|
// 32*k >= 16*i + j - 31 <-- Lower bound for 'k'
|
|
|
|
|
// 32*k <= 16*i + j <-- Upper bound for 'k'
|
|
|
|
|
// expr = 16*i + j, divisor = 32
|
|
|
|
|
// k = ( 16*i + j ) floordiv 32
|
|
|
|
|
//
|
|
|
|
|
// 4q >= i + j - 2 <-- Lower bound for 'q'
|
|
|
|
|
// 4q <= i + j + 1 <-- Upper bound for 'q'
|
|
|
|
|
// expr = i + j + 1, divisor = 4
|
|
|
|
|
// q = (i + j + 1) floordiv 4
|
2019-01-07 17:34:26 -08:00
|
|
|
for (auto ubPos : ubIndices) {
|
|
|
|
|
for (auto lbPos : lbIndices) {
|
2021-08-02 17:47:46 +05:30
|
|
|
// Due to the form of the inequalities, the sum of constants of upper
|
|
|
|
|
// bound and lower bound is divisor - 1. The 'divisor' here is
|
|
|
|
|
// cst.atIneq(lbPos, pos) and we already know that it's positive (since
|
|
|
|
|
// cst.Ineq(lbPos, ...) is a lower bound expr for 'pos'.
|
|
|
|
|
// Check if this sum of constants is divisor - 1.
|
2020-03-13 21:15:07 -07:00
|
|
|
int64_t divisor = cst.atIneq(lbPos, pos);
|
2021-08-02 17:47:46 +05:30
|
|
|
int64_t constantSum = cst.atIneq(lbPos, cst.getNumCols() - 1) +
|
|
|
|
|
cst.atIneq(ubPos, cst.getNumCols() - 1);
|
|
|
|
|
if (constantSum != divisor - 1)
|
2019-01-07 17:34:26 -08:00
|
|
|
continue;
|
|
|
|
|
// For the remaining part, check if the lower bound expr's coeff's are
|
|
|
|
|
// negations of corresponding upper bound ones'.
|
|
|
|
|
unsigned c, f;
|
2021-08-02 17:47:46 +05:30
|
|
|
for (c = 0, f = cst.getNumCols() - 1; c < f; ++c)
|
2019-01-07 17:34:26 -08:00
|
|
|
if (cst.atIneq(lbPos, c) != -cst.atIneq(ubPos, c))
|
|
|
|
|
break;
|
|
|
|
|
// Lb coeff's aren't negative of ub coeff's (for the non constant term
|
|
|
|
|
// part).
|
|
|
|
|
if (c < f)
|
|
|
|
|
continue;
|
2021-08-02 17:47:46 +05:30
|
|
|
// Due to the form of the upper bound inequality, the constant term of
|
|
|
|
|
// `expr` is the constant term of upper bound inequality.
|
|
|
|
|
int64_t divConstantTerm = cst.atIneq(ubPos, cst.getNumCols() - 1);
|
|
|
|
|
// Construct the dividend expression.
|
|
|
|
|
auto dividendExpr = getAffineConstantExpr(divConstantTerm, context);
|
|
|
|
|
for (c = 0, f = cst.getNumCols() - 1; c < f; ++c) {
|
|
|
|
|
if (c == pos)
|
2019-01-07 17:34:26 -08:00
|
|
|
continue;
|
2021-08-02 17:47:46 +05:30
|
|
|
int64_t ubVal = cst.atIneq(ubPos, c);
|
|
|
|
|
if (ubVal == 0)
|
|
|
|
|
continue;
|
|
|
|
|
if (!exprs[c])
|
|
|
|
|
break;
|
|
|
|
|
dividendExpr = dividendExpr + ubVal * exprs[c];
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
2021-08-02 17:47:46 +05:30
|
|
|
// Expression can't be constructed as it depends on a yet unknown
|
|
|
|
|
// identifier.
|
|
|
|
|
// TODO: Visit/compute the identifiers in an order so that this doesn't
|
|
|
|
|
// happen. More complex but much more efficient.
|
|
|
|
|
if (c < f)
|
|
|
|
|
continue;
|
|
|
|
|
// Successfully detected the floordiv.
|
|
|
|
|
exprs[pos] = dividendExpr.floorDiv(divisor);
|
|
|
|
|
return true;
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-19 18:17:19 -08:00
|
|
|
// Fills an inequality row with the value 'val'.
|
|
|
|
|
static inline void fillInequality(FlatAffineConstraints *cst, unsigned r,
|
|
|
|
|
int64_t val) {
|
|
|
|
|
for (unsigned c = 0, f = cst->getNumCols(); c < f; c++) {
|
|
|
|
|
cst->atIneq(r, c) = val;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Negates an inequality.
|
|
|
|
|
static inline void negateInequality(FlatAffineConstraints *cst, unsigned r) {
|
|
|
|
|
for (unsigned c = 0, f = cst->getNumCols(); c < f; c++) {
|
|
|
|
|
cst->atIneq(r, c) = -cst->atIneq(r, c);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-27 13:43:08 -08:00
|
|
|
// A more complex check to eliminate redundant inequalities. Uses FourierMotzkin
|
|
|
|
|
// to check if a constraint is redundant.
|
2019-02-19 18:17:19 -08:00
|
|
|
void FlatAffineConstraints::removeRedundantInequalities() {
|
|
|
|
|
SmallVector<bool, 32> redun(getNumInequalities(), false);
|
|
|
|
|
// To check if an inequality is redundant, we replace the inequality by its
|
|
|
|
|
// complement (for eg., i - 1 >= 0 by i <= 0), and check if the resulting
|
|
|
|
|
// system is empty. If it is, the inequality is redundant.
|
|
|
|
|
FlatAffineConstraints tmpCst(*this);
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
// Change the inequality to its complement.
|
|
|
|
|
negateInequality(&tmpCst, r);
|
|
|
|
|
tmpCst.atIneq(r, tmpCst.getNumCols() - 1)--;
|
|
|
|
|
if (tmpCst.isEmpty()) {
|
|
|
|
|
redun[r] = true;
|
|
|
|
|
// Zero fill the redundant inequality.
|
|
|
|
|
fillInequality(this, r, /*val=*/0);
|
|
|
|
|
fillInequality(&tmpCst, r, /*val=*/0);
|
|
|
|
|
} else {
|
|
|
|
|
// Reverse the change (to avoid recreating tmpCst each time).
|
|
|
|
|
tmpCst.atIneq(r, tmpCst.getNumCols() - 1)++;
|
|
|
|
|
negateInequality(&tmpCst, r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Scan to get rid of all rows marked redundant, in-place.
|
|
|
|
|
auto copyRow = [&](unsigned src, unsigned dest) {
|
|
|
|
|
if (src == dest)
|
|
|
|
|
return;
|
|
|
|
|
for (unsigned c = 0, e = getNumCols(); c < e; c++) {
|
|
|
|
|
atIneq(dest, c) = atIneq(src, c);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
unsigned pos = 0;
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (!redun[r])
|
|
|
|
|
copyRow(r, pos++);
|
|
|
|
|
}
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.resizeVertically(pos);
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
|
|
|
|
|
2020-08-20 13:37:44 +05:30
|
|
|
// A more complex check to eliminate redundant inequalities and equalities. Uses
|
|
|
|
|
// Simplex to check if a constraint is redundant.
|
|
|
|
|
void FlatAffineConstraints::removeRedundantConstraints() {
|
2021-07-19 18:15:49 +05:30
|
|
|
// First, we run gcdTightenInequalities. This allows us to catch some
|
2020-08-20 13:37:44 +05:30
|
|
|
// constraints which are not redundant when considering rational solutions
|
|
|
|
|
// but are redundant in terms of integer solutions.
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
2020-08-20 13:37:44 +05:30
|
|
|
Simplex simplex(*this);
|
|
|
|
|
simplex.detectRedundant();
|
|
|
|
|
|
|
|
|
|
auto copyInequality = [&](unsigned src, unsigned dest) {
|
|
|
|
|
if (src == dest)
|
|
|
|
|
return;
|
|
|
|
|
for (unsigned c = 0, e = getNumCols(); c < e; c++)
|
|
|
|
|
atIneq(dest, c) = atIneq(src, c);
|
|
|
|
|
};
|
|
|
|
|
unsigned pos = 0;
|
|
|
|
|
unsigned numIneqs = getNumInequalities();
|
|
|
|
|
// Scan to get rid of all inequalities marked redundant, in-place. In Simplex,
|
|
|
|
|
// the first constraints added are the inequalities.
|
|
|
|
|
for (unsigned r = 0; r < numIneqs; r++) {
|
|
|
|
|
if (!simplex.isMarkedRedundant(r))
|
|
|
|
|
copyInequality(r, pos++);
|
|
|
|
|
}
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.resizeVertically(pos);
|
2020-08-20 13:37:44 +05:30
|
|
|
|
|
|
|
|
// Scan to get rid of all equalities marked redundant, in-place. In Simplex,
|
|
|
|
|
// after the inequalities, a pair of constraints for each equality is added.
|
|
|
|
|
// An equality is redundant if both the inequalities in its pair are
|
|
|
|
|
// redundant.
|
|
|
|
|
auto copyEquality = [&](unsigned src, unsigned dest) {
|
|
|
|
|
if (src == dest)
|
|
|
|
|
return;
|
|
|
|
|
for (unsigned c = 0, e = getNumCols(); c < e; c++)
|
|
|
|
|
atEq(dest, c) = atEq(src, c);
|
|
|
|
|
};
|
|
|
|
|
pos = 0;
|
|
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
|
|
|
|
|
if (!(simplex.isMarkedRedundant(numIneqs + 2 * r) &&
|
|
|
|
|
simplex.isMarkedRedundant(numIneqs + 2 * r + 1)))
|
|
|
|
|
copyEquality(r, pos++);
|
|
|
|
|
}
|
2021-07-01 20:12:56 +05:30
|
|
|
equalities.resizeVertically(pos);
|
2020-08-20 13:37:44 +05:30
|
|
|
}
|
|
|
|
|
|
2019-02-19 18:17:19 -08:00
|
|
|
std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
|
2019-06-17 09:59:35 -07:00
|
|
|
unsigned pos, unsigned offset, unsigned num, unsigned symStartPos,
|
2019-12-06 16:16:32 -08:00
|
|
|
ArrayRef<AffineExpr> localExprs, MLIRContext *context) const {
|
2019-06-17 09:59:35 -07:00
|
|
|
assert(pos + offset < getNumDimIds() && "invalid dim start pos");
|
|
|
|
|
assert(symStartPos >= (pos + offset) && "invalid sym start pos");
|
2019-02-19 18:17:19 -08:00
|
|
|
assert(getNumLocalIds() == localExprs.size() &&
|
|
|
|
|
"incorrect local exprs count");
|
|
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
SmallVector<unsigned, 4> lbIndices, ubIndices, eqIndices;
|
|
|
|
|
getLowerAndUpperBoundIndices(pos + offset, &lbIndices, &ubIndices, &eqIndices,
|
|
|
|
|
offset, num);
|
2019-06-17 09:59:35 -07:00
|
|
|
|
|
|
|
|
/// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos).
|
|
|
|
|
auto addCoeffs = [&](ArrayRef<int64_t> a, SmallVectorImpl<int64_t> &b) {
|
|
|
|
|
b.clear();
|
|
|
|
|
for (unsigned i = 0, e = a.size(); i < e; ++i) {
|
|
|
|
|
if (i < offset || i >= offset + num)
|
|
|
|
|
b.push_back(a[i]);
|
|
|
|
|
}
|
|
|
|
|
};
|
2019-02-19 18:17:19 -08:00
|
|
|
|
|
|
|
|
SmallVector<int64_t, 8> lb, ub;
|
2020-04-01 12:00:26 +05:30
|
|
|
SmallVector<AffineExpr, 4> lbExprs;
|
2019-06-17 09:59:35 -07:00
|
|
|
unsigned dimCount = symStartPos - num;
|
2019-02-19 18:17:19 -08:00
|
|
|
unsigned symCount = getNumDimAndSymbolIds() - symStartPos;
|
2020-04-01 12:00:26 +05:30
|
|
|
lbExprs.reserve(lbIndices.size() + eqIndices.size());
|
2019-02-19 18:17:19 -08:00
|
|
|
// Lower bound expressions.
|
|
|
|
|
for (auto idx : lbIndices) {
|
|
|
|
|
auto ineq = getInequality(idx);
|
|
|
|
|
// Extract the lower bound (in terms of other coeff's + const), i.e., if
|
|
|
|
|
// i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j
|
|
|
|
|
// - 1.
|
2019-06-17 09:59:35 -07:00
|
|
|
addCoeffs(ineq, lb);
|
2019-02-19 18:17:19 -08:00
|
|
|
std::transform(lb.begin(), lb.end(), lb.begin(), std::negate<int64_t>());
|
2020-03-06 22:36:28 -08:00
|
|
|
auto expr =
|
|
|
|
|
getAffineExprFromFlatForm(lb, dimCount, symCount, localExprs, context);
|
2020-04-01 12:00:26 +05:30
|
|
|
// expr ceildiv divisor is (expr + divisor - 1) floordiv divisor
|
|
|
|
|
int64_t divisor = std::abs(ineq[pos + offset]);
|
|
|
|
|
expr = (expr + divisor - 1).floorDiv(divisor);
|
|
|
|
|
lbExprs.push_back(expr);
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
|
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
SmallVector<AffineExpr, 4> ubExprs;
|
|
|
|
|
ubExprs.reserve(ubIndices.size() + eqIndices.size());
|
2019-02-19 18:17:19 -08:00
|
|
|
// Upper bound expressions.
|
|
|
|
|
for (auto idx : ubIndices) {
|
|
|
|
|
auto ineq = getInequality(idx);
|
|
|
|
|
// Extract the upper bound (in terms of other coeff's + const).
|
2019-06-17 09:59:35 -07:00
|
|
|
addCoeffs(ineq, ub);
|
2020-03-06 22:36:28 -08:00
|
|
|
auto expr =
|
|
|
|
|
getAffineExprFromFlatForm(ub, dimCount, symCount, localExprs, context);
|
2020-04-01 12:00:26 +05:30
|
|
|
expr = expr.floorDiv(std::abs(ineq[pos + offset]));
|
2019-02-19 18:17:19 -08:00
|
|
|
// Upper bound is exclusive.
|
2020-04-01 12:00:26 +05:30
|
|
|
ubExprs.push_back(expr + 1);
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
2020-04-01 12:00:26 +05:30
|
|
|
|
|
|
|
|
// Equalities. It's both a lower and a upper bound.
|
|
|
|
|
SmallVector<int64_t, 4> b;
|
|
|
|
|
for (auto idx : eqIndices) {
|
|
|
|
|
auto eq = getEquality(idx);
|
|
|
|
|
addCoeffs(eq, b);
|
|
|
|
|
if (eq[pos + offset] > 0)
|
|
|
|
|
std::transform(b.begin(), b.end(), b.begin(), std::negate<int64_t>());
|
|
|
|
|
|
|
|
|
|
// Extract the upper bound (in terms of other coeff's + const).
|
|
|
|
|
auto expr =
|
|
|
|
|
getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context);
|
|
|
|
|
expr = expr.floorDiv(std::abs(eq[pos + offset]));
|
|
|
|
|
// Upper bound is exclusive.
|
|
|
|
|
ubExprs.push_back(expr + 1);
|
|
|
|
|
// Lower bound.
|
|
|
|
|
expr =
|
|
|
|
|
getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context);
|
|
|
|
|
expr = expr.ceilDiv(std::abs(eq[pos + offset]));
|
|
|
|
|
lbExprs.push_back(expr);
|
|
|
|
|
}
|
|
|
|
|
|
[MLIR] Improve support for 0-dimensional Affine Maps.
Summary:
Modified AffineMap::get to remove support for the overload which allowed
an ArrayRef of AffineExpr but no context (and gathered the context from a
presumed first entry, resulting in bugs when there were 0 results).
Instead, we support only a ArrayRef and a context, and a version which
takes a single AffineExpr.
Additionally, removed some now needless case logic which previously
special cased which call to AffineMap::get to use.
Reviewers: flaub, bondhugula, rriddle!, nicolasvasilache, ftynse, ulysseB, mravishankar, antiagainst, aartbik
Subscribers: mehdi_amini, jpienaar, burmako, shauheen, antiagainst, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, bader, grosul1, frgossen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D78226
2020-04-15 11:12:47 -07:00
|
|
|
auto lbMap = AffineMap::get(dimCount, symCount, lbExprs, context);
|
|
|
|
|
auto ubMap = AffineMap::get(dimCount, symCount, ubExprs, context);
|
2019-02-19 18:17:19 -08:00
|
|
|
|
|
|
|
|
return {lbMap, ubMap};
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-07 17:34:26 -08:00
|
|
|
/// Computes the lower and upper bounds of the first 'num' dimensional
|
2019-06-17 09:59:35 -07:00
|
|
|
/// identifiers (starting at 'offset') as affine maps of the remaining
|
|
|
|
|
/// identifiers (dimensional and symbolic identifiers). Local identifiers are
|
|
|
|
|
/// themselves explicitly computed as affine functions of other identifiers in
|
|
|
|
|
/// this process if needed.
|
|
|
|
|
void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num,
|
|
|
|
|
MLIRContext *context,
|
2019-01-07 17:34:26 -08:00
|
|
|
SmallVectorImpl<AffineMap> *lbMaps,
|
|
|
|
|
SmallVectorImpl<AffineMap> *ubMaps) {
|
|
|
|
|
assert(num < getNumDimIds() && "invalid range");
|
|
|
|
|
|
|
|
|
|
// Basic simplification.
|
|
|
|
|
normalizeConstraintsByGCD();
|
|
|
|
|
|
2019-03-05 11:08:41 -08:00
|
|
|
LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num
|
|
|
|
|
<< " identifiers\n");
|
2019-01-07 17:34:26 -08:00
|
|
|
LLVM_DEBUG(dump());
|
|
|
|
|
|
|
|
|
|
// Record computed/detected identifiers.
|
2019-05-21 01:33:53 -07:00
|
|
|
SmallVector<AffineExpr, 8> memo(getNumIds());
|
2019-01-07 17:34:26 -08:00
|
|
|
// Initialize dimensional and symbolic identifiers.
|
2019-06-17 09:59:35 -07:00
|
|
|
for (unsigned i = 0, e = getNumDimIds(); i < e; i++) {
|
|
|
|
|
if (i < offset)
|
|
|
|
|
memo[i] = getAffineDimExpr(i, context);
|
|
|
|
|
else if (i >= offset + num)
|
|
|
|
|
memo[i] = getAffineDimExpr(i - num, context);
|
|
|
|
|
}
|
2019-01-07 17:34:26 -08:00
|
|
|
for (unsigned i = getNumDimIds(), e = getNumDimAndSymbolIds(); i < e; i++)
|
|
|
|
|
memo[i] = getAffineSymbolExpr(i - getNumDimIds(), context);
|
|
|
|
|
|
|
|
|
|
bool changed;
|
|
|
|
|
do {
|
|
|
|
|
changed = false;
|
|
|
|
|
// Identify yet unknown identifiers as constants or mod's / floordiv's of
|
|
|
|
|
// other identifiers if possible.
|
|
|
|
|
for (unsigned pos = 0; pos < getNumIds(); pos++) {
|
|
|
|
|
if (memo[pos])
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
auto lbConst = getConstantLowerBound(pos);
|
|
|
|
|
auto ubConst = getConstantUpperBound(pos);
|
|
|
|
|
if (lbConst.hasValue() && ubConst.hasValue()) {
|
|
|
|
|
// Detect equality to a constant.
|
|
|
|
|
if (lbConst.getValue() == ubConst.getValue()) {
|
|
|
|
|
memo[pos] = getAffineConstantExpr(lbConst.getValue(), context);
|
|
|
|
|
changed = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Detect an identifier as modulo of another identifier w.r.t a
|
|
|
|
|
// constant.
|
|
|
|
|
if (detectAsMod(*this, pos, lbConst.getValue(), ubConst.getValue(),
|
2021-06-23 12:25:09 +05:30
|
|
|
memo, context)) {
|
2019-01-07 17:34:26 -08:00
|
|
|
changed = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-13 21:15:07 -07:00
|
|
|
// Detect an identifier as a floordiv of an affine function of other
|
|
|
|
|
// identifiers (divisor is a positive constant).
|
|
|
|
|
if (detectAsFloorDiv(*this, pos, context, memo)) {
|
2019-01-07 17:34:26 -08:00
|
|
|
changed = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Detect an identifier as an expression of other identifiers.
|
|
|
|
|
unsigned idx;
|
|
|
|
|
if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Build AffineExpr solving for identifier 'pos' in terms of all others.
|
|
|
|
|
auto expr = getAffineConstantExpr(0, context);
|
|
|
|
|
unsigned j, e;
|
|
|
|
|
for (j = 0, e = getNumIds(); j < e; ++j) {
|
|
|
|
|
if (j == pos)
|
|
|
|
|
continue;
|
|
|
|
|
int64_t c = atEq(idx, j);
|
|
|
|
|
if (c == 0)
|
|
|
|
|
continue;
|
|
|
|
|
// If any of the involved IDs hasn't been found yet, we can't proceed.
|
|
|
|
|
if (!memo[j])
|
|
|
|
|
break;
|
|
|
|
|
expr = expr + memo[j] * c;
|
|
|
|
|
}
|
|
|
|
|
if (j < e)
|
|
|
|
|
// Can't construct expression as it depends on a yet uncomputed
|
|
|
|
|
// identifier.
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Add constant term to AffineExpr.
|
|
|
|
|
expr = expr + atEq(idx, getNumIds());
|
|
|
|
|
int64_t vPos = atEq(idx, pos);
|
|
|
|
|
assert(vPos != 0 && "expected non-zero here");
|
|
|
|
|
if (vPos > 0)
|
|
|
|
|
expr = (-expr).floorDiv(vPos);
|
|
|
|
|
else
|
|
|
|
|
// vPos < 0.
|
|
|
|
|
expr = expr.floorDiv(-vPos);
|
|
|
|
|
// Successfully constructed expression.
|
|
|
|
|
memo[pos] = expr;
|
|
|
|
|
changed = true;
|
|
|
|
|
}
|
|
|
|
|
// This loop is guaranteed to reach a fixed point - since once an
|
|
|
|
|
// identifier's explicit form is computed (in memo[pos]), it's not updated
|
|
|
|
|
// again.
|
|
|
|
|
} while (changed);
|
|
|
|
|
|
|
|
|
|
// Set the lower and upper bound maps for all the identifiers that were
|
|
|
|
|
// computed as affine expressions of the rest as the "detected expr" and
|
2019-05-21 01:33:53 -07:00
|
|
|
// "detected expr + 1" respectively; set the undetected ones to null.
|
2019-02-19 18:17:19 -08:00
|
|
|
Optional<FlatAffineConstraints> tmpClone;
|
2019-01-07 17:34:26 -08:00
|
|
|
for (unsigned pos = 0; pos < num; pos++) {
|
|
|
|
|
unsigned numMapDims = getNumDimIds() - num;
|
|
|
|
|
unsigned numMapSymbols = getNumSymbolIds();
|
2019-06-17 09:59:35 -07:00
|
|
|
AffineExpr expr = memo[pos + offset];
|
2019-01-07 17:34:26 -08:00
|
|
|
if (expr)
|
|
|
|
|
expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols);
|
|
|
|
|
|
2019-02-26 17:32:47 -08:00
|
|
|
AffineMap &lbMap = (*lbMaps)[pos];
|
|
|
|
|
AffineMap &ubMap = (*ubMaps)[pos];
|
|
|
|
|
|
2019-01-07 17:34:26 -08:00
|
|
|
if (expr) {
|
2019-05-29 14:56:41 -07:00
|
|
|
lbMap = AffineMap::get(numMapDims, numMapSymbols, expr);
|
|
|
|
|
ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + 1);
|
2019-01-07 17:34:26 -08:00
|
|
|
} else {
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: Whenever there are local identifiers in the dependence
|
|
|
|
|
// constraints, we'll conservatively over-approximate, since we don't
|
|
|
|
|
// always explicitly compute them above (in the while loop).
|
2019-02-19 18:17:19 -08:00
|
|
|
if (getNumLocalIds() == 0) {
|
|
|
|
|
// Work on a copy so that we don't update this constraint system.
|
|
|
|
|
if (!tmpClone) {
|
|
|
|
|
tmpClone.emplace(FlatAffineConstraints(*this));
|
2019-10-20 00:11:03 -07:00
|
|
|
// Removing redundant inequalities is necessary so that we don't get
|
2019-02-19 18:17:19 -08:00
|
|
|
// redundant loop bounds.
|
|
|
|
|
tmpClone->removeRedundantInequalities();
|
|
|
|
|
}
|
2019-02-26 17:32:47 -08:00
|
|
|
std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound(
|
2020-04-01 12:00:26 +05:30
|
|
|
pos, offset, num, getNumDimIds(), /*localExprs=*/{}, context);
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the above fails, we'll just use the constant lower bound and the
|
|
|
|
|
// constant upper bound (if they exist) as the slice bounds.
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: being conservative for the moment in cases that
|
2019-02-26 17:32:47 -08:00
|
|
|
// lead to multiple bounds - until getConstDifference in LoopFusion.cpp is
|
|
|
|
|
// fixed (b/126426796).
|
|
|
|
|
if (!lbMap || lbMap.getNumResults() > 1) {
|
2019-02-19 18:17:19 -08:00
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "WARNING: Potentially over-approximating slice lb\n");
|
2019-06-17 09:59:35 -07:00
|
|
|
auto lbConst = getConstantLowerBound(pos + offset);
|
2019-02-19 18:17:19 -08:00
|
|
|
if (lbConst.hasValue()) {
|
2019-02-26 17:32:47 -08:00
|
|
|
lbMap = AffineMap::get(
|
2019-02-19 18:17:19 -08:00
|
|
|
numMapDims, numMapSymbols,
|
2019-05-29 14:56:41 -07:00
|
|
|
getAffineConstantExpr(lbConst.getValue(), context));
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-02-26 17:32:47 -08:00
|
|
|
if (!ubMap || ubMap.getNumResults() > 1) {
|
2019-02-19 18:17:19 -08:00
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "WARNING: Potentially over-approximating slice ub\n");
|
2019-06-17 09:59:35 -07:00
|
|
|
auto ubConst = getConstantUpperBound(pos + offset);
|
2019-02-19 18:17:19 -08:00
|
|
|
if (ubConst.hasValue()) {
|
2019-02-26 17:32:47 -08:00
|
|
|
(ubMap) = AffineMap::get(
|
2019-02-19 18:17:19 -08:00
|
|
|
numMapDims, numMapSymbols,
|
2019-05-29 14:56:41 -07:00
|
|
|
getAffineConstantExpr(ubConst.getValue() + 1, context));
|
2019-02-19 18:17:19 -08:00
|
|
|
}
|
2019-01-16 09:55:02 -08:00
|
|
|
}
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
2019-06-17 09:59:35 -07:00
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "lb map for pos = " << Twine(pos + offset) << ", expr: ");
|
2019-02-26 17:32:47 -08:00
|
|
|
LLVM_DEBUG(lbMap.dump(););
|
2019-06-17 09:59:35 -07:00
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "ub map for pos = " << Twine(pos + offset) << ", expr: ");
|
2019-02-26 17:32:47 -08:00
|
|
|
LLVM_DEBUG(ubMap.dump(););
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
2018-12-17 09:57:14 -08:00
|
|
|
}
|
|
|
|
|
|
2021-08-11 15:08:02 +09:00
|
|
|
LogicalResult FlatAffineConstraints::addLowerOrUpperBound(unsigned pos,
|
|
|
|
|
AffineMap boundMap,
|
|
|
|
|
bool eq, bool lower) {
|
|
|
|
|
assert(boundMap.getNumDims() == getNumDimIds() && "dim mismatch");
|
|
|
|
|
assert(boundMap.getNumSymbols() == getNumSymbolIds() && "symbol mismatch");
|
|
|
|
|
assert(pos < getNumDimAndSymbolIds() && "invalid position");
|
|
|
|
|
|
|
|
|
|
// Equality follows the logic of lower bound except that we add an equality
|
|
|
|
|
// instead of an inequality.
|
|
|
|
|
assert((!eq || boundMap.getNumResults() == 1) && "single result expected");
|
|
|
|
|
if (eq)
|
|
|
|
|
lower = true;
|
|
|
|
|
|
|
|
|
|
std::vector<SmallVector<int64_t, 8>> flatExprs;
|
|
|
|
|
FlatAffineConstraints localCst;
|
|
|
|
|
if (failed(getFlattenedAffineExprs(boundMap, &flatExprs, &localCst))) {
|
|
|
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
|
|
|
<< "composition unimplemented for semi-affine maps\n");
|
|
|
|
|
return failure();
|
|
|
|
|
}
|
|
|
|
|
assert(flatExprs.size() == boundMap.getNumResults());
|
|
|
|
|
|
|
|
|
|
// Add localCst information.
|
|
|
|
|
if (localCst.getNumLocalIds() > 0) {
|
|
|
|
|
unsigned numLocalIds = getNumLocalIds();
|
|
|
|
|
// Insert local dims of localCst at the beginning.
|
|
|
|
|
for (unsigned l = 0, e = localCst.getNumLocalIds(); l < e; ++l)
|
|
|
|
|
addLocalId(0);
|
|
|
|
|
// Insert local dims of `this` at the end of localCst.
|
|
|
|
|
for (unsigned l = 0; l < numLocalIds; ++l)
|
|
|
|
|
localCst.addLocalId(localCst.getNumLocalIds());
|
|
|
|
|
// Dimensions of localCst and this constraint set match. Append localCst to
|
|
|
|
|
// this constraint set.
|
|
|
|
|
append(localCst);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add one (in)equality for each result.
|
|
|
|
|
for (const auto &flatExpr : flatExprs) {
|
|
|
|
|
SmallVector<int64_t> ineq(getNumCols(), 0);
|
|
|
|
|
// Dims and symbols.
|
|
|
|
|
for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
|
|
|
|
|
ineq[j] = lower ? -flatExpr[j] : flatExpr[j];
|
|
|
|
|
}
|
|
|
|
|
// Invalid bound: pos appears in `boundMap`.
|
|
|
|
|
// TODO: This should be an assertion. Fix `addDomainFromSliceMaps` and/or
|
|
|
|
|
// its callers to prevent invalid bounds from being added.
|
|
|
|
|
if (ineq[pos] != 0)
|
|
|
|
|
continue;
|
|
|
|
|
ineq[pos] = lower ? 1 : -1;
|
|
|
|
|
// Local vars common to eq and localCst are at the beginning.
|
|
|
|
|
unsigned j = getNumDimIds() + getNumSymbolIds();
|
|
|
|
|
unsigned end = flatExpr.size() - 1;
|
|
|
|
|
for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) {
|
|
|
|
|
ineq[j] = lower ? -flatExpr[i] : flatExpr[i];
|
|
|
|
|
}
|
|
|
|
|
// Constant term.
|
|
|
|
|
ineq[getNumCols() - 1] =
|
|
|
|
|
lower ? -flatExpr[flatExpr.size() - 1]
|
|
|
|
|
// Upper bound in flattenedExpr is an exclusive one.
|
|
|
|
|
: flatExpr[flatExpr.size() - 1] - 1;
|
|
|
|
|
eq ? addEquality(ineq) : addInequality(ineq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-11 15:48:21 +09:00
|
|
|
AffineMap FlatAffineConstraints::computeAlignedMap(AffineMap map,
|
|
|
|
|
ValueRange operands) const {
|
|
|
|
|
assert(map.getNumInputs() == operands.size() && "number of inputs mismatch");
|
2019-03-08 13:29:00 -08:00
|
|
|
|
2021-08-11 15:15:30 +09:00
|
|
|
SmallVector<Value> dims, syms;
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
SmallVector<Value> newSyms;
|
|
|
|
|
SmallVector<Value> *newSymsPtr = &newSyms;
|
|
|
|
|
#else
|
|
|
|
|
SmallVector<Value> *newSymsPtr = nullptr;
|
|
|
|
|
#endif // NDEBUG
|
|
|
|
|
|
|
|
|
|
dims.reserve(numDims);
|
|
|
|
|
syms.reserve(numSymbols);
|
|
|
|
|
for (unsigned i = 0; i < numDims; ++i)
|
|
|
|
|
dims.push_back(ids[i] ? *ids[i] : Value());
|
|
|
|
|
for (unsigned i = numDims, e = numDims + numSymbols; i < e; ++i)
|
|
|
|
|
syms.push_back(ids[i] ? *ids[i] : Value());
|
|
|
|
|
|
|
|
|
|
AffineMap alignedMap =
|
|
|
|
|
alignAffineMapWithValues(map, operands, dims, syms, newSymsPtr);
|
|
|
|
|
// All symbols are already part of this FlatAffineConstraints.
|
|
|
|
|
assert(syms.size() == newSymsPtr->size() && "unexpected new/missing symbols");
|
|
|
|
|
assert(std::equal(syms.begin(), syms.end(), newSymsPtr->begin()) &&
|
|
|
|
|
"unexpected new/missing symbols");
|
2021-08-11 15:48:21 +09:00
|
|
|
return alignedMap;
|
|
|
|
|
}
|
2021-08-11 15:15:30 +09:00
|
|
|
|
2021-08-11 15:48:21 +09:00
|
|
|
LogicalResult
|
|
|
|
|
FlatAffineConstraints::addLowerOrUpperBound(unsigned pos, AffineMap boundMap,
|
|
|
|
|
ValueRange boundOperands, bool eq,
|
|
|
|
|
bool lower) {
|
|
|
|
|
// Fully compose map and operands; canonicalize and simplify so that we
|
|
|
|
|
// transitively get to terminal symbols or loop IVs.
|
|
|
|
|
auto map = boundMap;
|
|
|
|
|
SmallVector<Value, 4> operands(boundOperands.begin(), boundOperands.end());
|
|
|
|
|
fullyComposeAffineMapAndOperands(&map, &operands);
|
|
|
|
|
map = simplifyAffineMap(map);
|
|
|
|
|
canonicalizeMapAndOperands(&map, &operands);
|
|
|
|
|
for (auto operand : operands)
|
|
|
|
|
addInductionVarOrTerminalSymbol(operand);
|
|
|
|
|
return addLowerOrUpperBound(pos, computeAlignedMap(map, operands), eq, lower);
|
2019-03-08 13:29:00 -08:00
|
|
|
}
|
|
|
|
|
|
2019-02-27 11:01:49 -08:00
|
|
|
// Adds slice lower bounds represented by lower bounds in 'lbMaps' and upper
|
|
|
|
|
// bounds in 'ubMaps' to each value in `values' that appears in the constraint
|
|
|
|
|
// system. Note that both lower/upper bounds share the same operand list
|
|
|
|
|
// 'operands'.
|
|
|
|
|
// This function assumes 'values.size' == 'lbMaps.size' == 'ubMaps.size', and
|
|
|
|
|
// skips any null AffineMaps in 'lbMaps' or 'ubMaps'.
|
2019-02-06 11:01:10 -08:00
|
|
|
// Note that both lower/upper bounds use operands from 'operands'.
|
2019-03-08 16:04:42 -08:00
|
|
|
// Returns failure for unimplemented cases such as semi-affine expressions or
|
|
|
|
|
// expressions with mod/floordiv.
|
2019-12-23 14:45:01 -08:00
|
|
|
LogicalResult FlatAffineConstraints::addSliceBounds(ArrayRef<Value> values,
|
|
|
|
|
ArrayRef<AffineMap> lbMaps,
|
|
|
|
|
ArrayRef<AffineMap> ubMaps,
|
|
|
|
|
ArrayRef<Value> operands) {
|
2019-02-27 11:01:49 -08:00
|
|
|
assert(values.size() == lbMaps.size());
|
2019-02-06 11:01:10 -08:00
|
|
|
assert(lbMaps.size() == ubMaps.size());
|
2019-02-27 11:01:49 -08:00
|
|
|
|
2019-02-06 11:01:10 -08:00
|
|
|
for (unsigned i = 0, e = lbMaps.size(); i < e; ++i) {
|
2019-02-27 11:01:49 -08:00
|
|
|
unsigned pos;
|
2020-01-11 08:54:04 -08:00
|
|
|
if (!findId(values[i], &pos))
|
2019-02-27 11:01:49 -08:00
|
|
|
continue;
|
2019-02-06 11:01:10 -08:00
|
|
|
|
2019-03-01 17:06:25 -08:00
|
|
|
AffineMap lbMap = lbMaps[i];
|
|
|
|
|
AffineMap ubMap = ubMaps[i];
|
|
|
|
|
assert(!lbMap || lbMap.getNumInputs() == operands.size());
|
|
|
|
|
assert(!ubMap || ubMap.getNumInputs() == operands.size());
|
|
|
|
|
|
|
|
|
|
// Check if this slice is just an equality along this dimension.
|
|
|
|
|
if (lbMap && ubMap && lbMap.getNumResults() == 1 &&
|
|
|
|
|
ubMap.getNumResults() == 1 &&
|
|
|
|
|
lbMap.getResult(0) + 1 == ubMap.getResult(0)) {
|
2019-03-08 13:29:00 -08:00
|
|
|
if (failed(addLowerOrUpperBound(pos, lbMap, operands, /*eq=*/true,
|
|
|
|
|
/*lower=*/true)))
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-03-01 17:06:25 -08:00
|
|
|
continue;
|
2019-03-01 11:50:25 -08:00
|
|
|
}
|
2019-03-01 17:06:25 -08:00
|
|
|
|
2021-04-01 14:38:24 +05:30
|
|
|
// If lower or upper bound maps are null or provide no results, it implies
|
|
|
|
|
// that the source loop was not at all sliced, and the entire loop will be a
|
|
|
|
|
// part of the slice.
|
|
|
|
|
if (lbMap && lbMap.getNumResults() != 0 && ubMap &&
|
|
|
|
|
ubMap.getNumResults() != 0) {
|
|
|
|
|
if (failed(addLowerOrUpperBound(pos, lbMap, operands, /*eq=*/false,
|
|
|
|
|
/*lower=*/true)))
|
|
|
|
|
return failure();
|
|
|
|
|
if (failed(addLowerOrUpperBound(pos, ubMap, operands, /*eq=*/false,
|
|
|
|
|
/*lower=*/false)))
|
|
|
|
|
return failure();
|
|
|
|
|
} else {
|
|
|
|
|
auto loop = getForInductionVarOwner(values[i]);
|
|
|
|
|
if (failed(this->addAffineForOpDomain(loop)))
|
|
|
|
|
return failure();
|
|
|
|
|
}
|
2019-02-27 11:01:49 -08:00
|
|
|
}
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-02-06 11:01:10 -08:00
|
|
|
}
|
|
|
|
|
|
2018-08-30 17:35:15 -07:00
|
|
|
void FlatAffineConstraints::addEquality(ArrayRef<int64_t> eq) {
|
|
|
|
|
assert(eq.size() == getNumCols());
|
2021-07-01 20:12:56 +05:30
|
|
|
unsigned row = equalities.appendExtraRow();
|
|
|
|
|
for (unsigned i = 0, e = eq.size(); i < e; ++i)
|
|
|
|
|
equalities(row, i) = eq[i];
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::addInequality(ArrayRef<int64_t> inEq) {
|
|
|
|
|
assert(inEq.size() == getNumCols());
|
2021-07-01 20:12:56 +05:30
|
|
|
unsigned row = inequalities.appendExtraRow();
|
|
|
|
|
for (unsigned i = 0, e = inEq.size(); i < e; ++i)
|
|
|
|
|
inequalities(row, i) = inEq[i];
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::addConstantLowerBound(unsigned pos, int64_t lb) {
|
2018-11-05 10:12:16 -08:00
|
|
|
assert(pos < getNumCols());
|
2021-07-01 20:12:56 +05:30
|
|
|
unsigned row = inequalities.appendExtraRow();
|
|
|
|
|
inequalities(row, pos) = 1;
|
|
|
|
|
inequalities(row, getNumCols() - 1) = -lb;
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::addConstantUpperBound(unsigned pos, int64_t ub) {
|
2018-11-05 10:12:16 -08:00
|
|
|
assert(pos < getNumCols());
|
2021-07-01 20:12:56 +05:30
|
|
|
unsigned row = inequalities.appendExtraRow();
|
|
|
|
|
inequalities(row, pos) = -1;
|
|
|
|
|
inequalities(row, getNumCols() - 1) = ub;
|
2018-10-30 13:45:10 -07:00
|
|
|
}
|
|
|
|
|
|
2018-11-01 15:41:08 -07:00
|
|
|
void FlatAffineConstraints::addConstantLowerBound(ArrayRef<int64_t> expr,
|
|
|
|
|
int64_t lb) {
|
2021-07-01 20:12:56 +05:30
|
|
|
addInequality(expr);
|
|
|
|
|
inequalities(inequalities.getNumRows() - 1, getNumCols() - 1) += -lb;
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::addConstantUpperBound(ArrayRef<int64_t> expr,
|
|
|
|
|
int64_t ub) {
|
|
|
|
|
assert(expr.size() == getNumCols());
|
2021-07-01 20:12:56 +05:30
|
|
|
unsigned row = inequalities.appendExtraRow();
|
|
|
|
|
for (unsigned i = 0, e = expr.size(); i < e; ++i)
|
|
|
|
|
inequalities(row, i) = -expr[i];
|
|
|
|
|
inequalities(inequalities.getNumRows() - 1, getNumCols() - 1) += ub;
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
2019-01-09 10:17:05 -08:00
|
|
|
/// Adds a new local identifier as the floordiv of an affine function of other
|
|
|
|
|
/// identifiers, the coefficients of which are provided in 'dividend' and with
|
|
|
|
|
/// respect to a positive constant 'divisor'. Two constraints are added to the
|
|
|
|
|
/// system to capture equivalence with the floordiv.
|
|
|
|
|
/// q = expr floordiv c <=> c*q <= expr <= c*q + c - 1.
|
|
|
|
|
void FlatAffineConstraints::addLocalFloorDiv(ArrayRef<int64_t> dividend,
|
|
|
|
|
int64_t divisor) {
|
|
|
|
|
assert(dividend.size() == getNumCols() && "incorrect dividend size");
|
|
|
|
|
assert(divisor > 0 && "positive divisor expected");
|
|
|
|
|
|
|
|
|
|
addLocalId(getNumLocalIds());
|
|
|
|
|
|
|
|
|
|
// Add two constraints for this new identifier 'q'.
|
|
|
|
|
SmallVector<int64_t, 8> bound(dividend.size() + 1);
|
|
|
|
|
|
|
|
|
|
// dividend - q * divisor >= 0
|
|
|
|
|
std::copy(dividend.begin(), dividend.begin() + dividend.size() - 1,
|
|
|
|
|
bound.begin());
|
|
|
|
|
bound.back() = dividend.back();
|
|
|
|
|
bound[getNumIds() - 1] = -divisor;
|
|
|
|
|
addInequality(bound);
|
|
|
|
|
|
|
|
|
|
// -dividend +qdivisor * q + divisor - 1 >= 0
|
|
|
|
|
std::transform(bound.begin(), bound.end(), bound.begin(),
|
|
|
|
|
std::negate<int64_t>());
|
|
|
|
|
bound[bound.size() - 1] += divisor - 1;
|
|
|
|
|
addInequality(bound);
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
bool FlatAffineConstraints::findId(Value id, unsigned *pos) const {
|
2018-11-16 20:12:06 -08:00
|
|
|
unsigned i = 0;
|
|
|
|
|
for (const auto &mayBeId : ids) {
|
2019-12-23 12:36:20 -08:00
|
|
|
if (mayBeId.hasValue() && mayBeId.getValue() == id) {
|
2018-11-16 20:12:06 -08:00
|
|
|
*pos = i;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
bool FlatAffineConstraints::containsId(Value id) const {
|
|
|
|
|
return llvm::any_of(ids, [&](const Optional<Value> &mayBeId) {
|
2019-12-23 12:36:20 -08:00
|
|
|
return mayBeId.hasValue() && mayBeId.getValue() == id;
|
2019-03-06 16:18:27 -08:00
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-16 16:04:09 +01:00
|
|
|
void FlatAffineConstraints::swapId(unsigned posA, unsigned posB) {
|
|
|
|
|
assert(posA < getNumIds() && "invalid position A");
|
|
|
|
|
assert(posB < getNumIds() && "invalid position B");
|
|
|
|
|
|
|
|
|
|
if (posA == posB)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++)
|
|
|
|
|
std::swap(atIneq(r, posA), atIneq(r, posB));
|
|
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++)
|
|
|
|
|
std::swap(atEq(r, posA), atEq(r, posB));
|
|
|
|
|
std::swap(getId(posA), getId(posB));
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-05 15:14:25 -08:00
|
|
|
void FlatAffineConstraints::setDimSymbolSeparation(unsigned newSymbolCount) {
|
|
|
|
|
assert(newSymbolCount <= numDims + numSymbols &&
|
|
|
|
|
"invalid separation position");
|
|
|
|
|
numDims = numDims + numSymbols - newSymbolCount;
|
|
|
|
|
numSymbols = newSymbolCount;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-20 00:11:03 -07:00
|
|
|
/// Sets the specified identifier to a constant value.
|
2018-10-30 13:45:10 -07:00
|
|
|
void FlatAffineConstraints::setIdToConstant(unsigned pos, int64_t val) {
|
2021-07-01 20:12:56 +05:30
|
|
|
equalities.resizeVertically(equalities.getNumRows() + 1);
|
|
|
|
|
unsigned row = equalities.getNumRows() - 1;
|
|
|
|
|
equalities(row, pos) = 1;
|
|
|
|
|
equalities(row, getNumCols() - 1) = -val;
|
2018-08-30 17:35:15 -07:00
|
|
|
}
|
2018-10-24 11:30:06 -07:00
|
|
|
|
2019-10-20 00:11:03 -07:00
|
|
|
/// Sets the specified identifier to a constant value; asserts if the id is not
|
2018-12-05 15:14:25 -08:00
|
|
|
/// found.
|
2019-12-23 14:45:01 -08:00
|
|
|
void FlatAffineConstraints::setIdToConstant(Value id, int64_t val) {
|
2018-12-05 15:14:25 -08:00
|
|
|
unsigned pos;
|
|
|
|
|
if (!findId(id, &pos))
|
|
|
|
|
// This is a pre-condition for this method.
|
|
|
|
|
assert(0 && "id not found");
|
|
|
|
|
setIdToConstant(pos, val);
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
void FlatAffineConstraints::removeEquality(unsigned pos) {
|
2021-07-01 20:12:56 +05:30
|
|
|
equalities.removeRow(pos);
|
2018-10-24 11:30:06 -07:00
|
|
|
}
|
|
|
|
|
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
void FlatAffineConstraints::removeInequality(unsigned pos) {
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.removeRow(pos);
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
}
|
|
|
|
|
|
2018-12-05 15:14:25 -08:00
|
|
|
/// Finds an equality that equates the specified identifier to a constant.
|
|
|
|
|
/// Returns the position of the equality row. If 'symbolic' is set to true,
|
|
|
|
|
/// symbols are also treated like a constant, i.e., an affine function of the
|
2020-03-24 09:52:41 +05:30
|
|
|
/// symbols is also treated like a constant. Returns -1 if such an equality
|
|
|
|
|
/// could not be found.
|
2018-12-05 15:14:25 -08:00
|
|
|
static int findEqualityToConstant(const FlatAffineConstraints &cst,
|
|
|
|
|
unsigned pos, bool symbolic = false) {
|
|
|
|
|
assert(pos < cst.getNumIds() && "invalid position");
|
|
|
|
|
for (unsigned r = 0, e = cst.getNumEqualities(); r < e; r++) {
|
|
|
|
|
int64_t v = cst.atEq(r, pos);
|
|
|
|
|
if (v * v != 1)
|
|
|
|
|
continue;
|
|
|
|
|
unsigned c;
|
|
|
|
|
unsigned f = symbolic ? cst.getNumDimIds() : cst.getNumIds();
|
|
|
|
|
// This checks for zeros in all positions other than 'pos' in [0, f)
|
|
|
|
|
for (c = 0; c < f; c++) {
|
|
|
|
|
if (c == pos)
|
|
|
|
|
continue;
|
|
|
|
|
if (cst.atEq(r, c) != 0) {
|
|
|
|
|
// Dependent on another identifier.
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (c == f)
|
|
|
|
|
// Equality is free of other identifiers.
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-22 21:04:05 +05:30
|
|
|
void FlatAffineConstraints::setAndEliminate(unsigned pos,
|
|
|
|
|
ArrayRef<int64_t> values) {
|
|
|
|
|
if (values.empty())
|
|
|
|
|
return;
|
|
|
|
|
assert(pos + values.size() <= getNumIds() &&
|
|
|
|
|
"invalid position or too many values");
|
|
|
|
|
// Setting x_j = p in sum_i a_i x_i + c is equivalent to adding p*a_j to the
|
|
|
|
|
// constant term and removing the id x_j. We do this for all the ids
|
|
|
|
|
// pos, pos + 1, ... pos + values.size() - 1.
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++)
|
|
|
|
|
for (unsigned i = 0, numVals = values.size(); i < numVals; ++i)
|
|
|
|
|
atIneq(r, getNumCols() - 1) += atIneq(r, pos + i) * values[i];
|
|
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++)
|
|
|
|
|
for (unsigned i = 0, numVals = values.size(); i < numVals; ++i)
|
|
|
|
|
atEq(r, getNumCols() - 1) += atEq(r, pos + i) * values[i];
|
|
|
|
|
removeIdRange(pos, pos + values.size());
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult FlatAffineConstraints::constantFoldId(unsigned pos) {
|
2018-12-05 15:14:25 -08:00
|
|
|
assert(pos < getNumIds() && "invalid position");
|
|
|
|
|
int rowIdx;
|
|
|
|
|
if ((rowIdx = findEqualityToConstant(*this, pos)) == -1)
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2018-12-05 15:14:25 -08:00
|
|
|
|
|
|
|
|
// atEq(rowIdx, pos) is either -1 or 1.
|
|
|
|
|
assert(atEq(rowIdx, pos) * atEq(rowIdx, pos) == 1);
|
2019-01-07 17:34:26 -08:00
|
|
|
int64_t constVal = -atEq(rowIdx, getNumCols() - 1) / atEq(rowIdx, pos);
|
2018-12-05 15:14:25 -08:00
|
|
|
setAndEliminate(pos, constVal);
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::constantFoldIdRange(unsigned pos, unsigned num) {
|
|
|
|
|
for (unsigned s = pos, t = pos, e = pos + num; s < e; s++) {
|
2019-03-07 22:14:47 -08:00
|
|
|
if (failed(constantFoldId(t)))
|
2018-12-05 15:14:25 -08:00
|
|
|
t++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-29 01:09:51 +05:30
|
|
|
/// Returns a non-negative constant bound on the extent (upper bound - lower
|
|
|
|
|
/// bound) of the specified identifier if it is found to be a constant; returns
|
|
|
|
|
/// None if it's not a constant. This methods treats symbolic identifiers
|
|
|
|
|
/// specially, i.e., it looks for constant differences between affine
|
|
|
|
|
/// expressions involving only the symbolic identifiers. See comments at
|
|
|
|
|
/// function definition for example. 'lb', if provided, is set to the lower
|
|
|
|
|
/// bound associated with the constant difference. Note that 'lb' is purely
|
|
|
|
|
/// symbolic and thus will contain the coefficients of the symbolic identifiers
|
|
|
|
|
/// and the constant coefficient.
|
2018-12-05 15:14:25 -08:00
|
|
|
// Egs: 0 <= i <= 15, return 16.
|
|
|
|
|
// s0 + 2 <= i <= s0 + 17, returns 16. (s0 has to be a symbol)
|
2019-01-22 13:58:52 -08:00
|
|
|
// s0 + s1 + 16 <= d0 <= s0 + s1 + 31, returns 16.
|
|
|
|
|
// s0 - 7 <= 8*j <= s0 returns 1 with lb = s0, lbDivisor = 8 (since lb =
|
|
|
|
|
// ceil(s0 - 7 / 8) = floor(s0 / 8)).
|
2018-12-07 15:04:55 -08:00
|
|
|
Optional<int64_t> FlatAffineConstraints::getConstantBoundOnDimSize(
|
2020-03-24 09:52:41 +05:30
|
|
|
unsigned pos, SmallVectorImpl<int64_t> *lb, int64_t *boundFloorDivisor,
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
SmallVectorImpl<int64_t> *ub, unsigned *minLbPos,
|
|
|
|
|
unsigned *minUbPos) const {
|
2018-12-28 15:34:07 -08:00
|
|
|
assert(pos < getNumDimIds() && "Invalid identifier position");
|
2018-12-05 15:14:25 -08:00
|
|
|
|
|
|
|
|
// Find an equality for 'pos'^th identifier that equates it to some function
|
|
|
|
|
// of the symbolic identifiers (+ constant).
|
2020-03-24 09:52:41 +05:30
|
|
|
int eqPos = findEqualityToConstant(*this, pos, /*symbolic=*/true);
|
|
|
|
|
if (eqPos != -1) {
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
auto eq = getEquality(eqPos);
|
|
|
|
|
// If the equality involves a local var, punt for now.
|
|
|
|
|
// TODO: this can be handled in the future by using the explicit
|
|
|
|
|
// representation of the local vars.
|
|
|
|
|
if (!std::all_of(eq.begin() + getNumDimAndSymbolIds(), eq.end() - 1,
|
|
|
|
|
[](int64_t coeff) { return coeff == 0; }))
|
|
|
|
|
return None;
|
|
|
|
|
|
2018-12-05 15:14:25 -08:00
|
|
|
// This identifier can only take a single value.
|
|
|
|
|
if (lb) {
|
2020-03-13 21:15:07 -07:00
|
|
|
// Set lb to that symbolic value.
|
2018-12-05 15:14:25 -08:00
|
|
|
lb->resize(getNumSymbolIds() + 1);
|
2019-03-12 10:52:09 -07:00
|
|
|
if (ub)
|
|
|
|
|
ub->resize(getNumSymbolIds() + 1);
|
2018-12-05 15:14:25 -08:00
|
|
|
for (unsigned c = 0, f = getNumSymbolIds() + 1; c < f; c++) {
|
2020-03-24 09:52:41 +05:30
|
|
|
int64_t v = atEq(eqPos, pos);
|
2018-12-05 15:14:25 -08:00
|
|
|
// atEq(eqRow, pos) is either -1 or 1.
|
|
|
|
|
assert(v * v == 1);
|
2020-03-24 09:52:41 +05:30
|
|
|
(*lb)[c] = v < 0 ? atEq(eqPos, getNumDimIds() + c) / -v
|
|
|
|
|
: -atEq(eqPos, getNumDimIds() + c) / v;
|
2019-03-12 10:52:09 -07:00
|
|
|
// Since this is an equality, ub = lb.
|
|
|
|
|
if (ub)
|
|
|
|
|
(*ub)[c] = (*lb)[c];
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
2020-03-24 09:52:41 +05:30
|
|
|
assert(boundFloorDivisor &&
|
2019-01-22 13:58:52 -08:00
|
|
|
"both lb and divisor or none should be provided");
|
2020-03-24 09:52:41 +05:30
|
|
|
*boundFloorDivisor = 1;
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
if (minLbPos)
|
|
|
|
|
*minLbPos = eqPos;
|
|
|
|
|
if (minUbPos)
|
|
|
|
|
*minUbPos = eqPos;
|
2018-12-05 15:14:25 -08:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-16 20:12:06 -08:00
|
|
|
// Check if the identifier appears at all in any of the inequalities.
|
|
|
|
|
unsigned r, e;
|
|
|
|
|
for (r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (atIneq(r, pos) != 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
2019-01-07 17:34:26 -08:00
|
|
|
if (r == e)
|
|
|
|
|
// If it doesn't, there isn't a bound on it.
|
2018-11-16 20:12:06 -08:00
|
|
|
return None;
|
|
|
|
|
|
|
|
|
|
// Positions of constraints that are lower/upper bounds on the variable.
|
|
|
|
|
SmallVector<unsigned, 4> lbIndices, ubIndices;
|
|
|
|
|
|
2020-03-13 21:15:07 -07:00
|
|
|
// Gather all symbolic lower bounds and upper bounds of the variable, i.e.,
|
|
|
|
|
// the bounds can only involve symbolic (and local) identifiers. Since the
|
|
|
|
|
// canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower
|
|
|
|
|
// bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices,
|
|
|
|
|
/*eqIndices=*/nullptr, /*offset=*/0,
|
2020-03-24 09:52:41 +05:30
|
|
|
/*num=*/getNumDimIds());
|
2018-11-16 20:12:06 -08:00
|
|
|
|
|
|
|
|
Optional<int64_t> minDiff = None;
|
2020-04-30 12:08:53 -07:00
|
|
|
unsigned minLbPosition = 0, minUbPosition = 0;
|
2018-11-16 20:12:06 -08:00
|
|
|
for (auto ubPos : ubIndices) {
|
|
|
|
|
for (auto lbPos : lbIndices) {
|
|
|
|
|
// Look for a lower bound and an upper bound that only differ by a
|
|
|
|
|
// constant, i.e., pairs of the form 0 <= c_pos - f(c_i's) <= diffConst.
|
|
|
|
|
// For example, if ii is the pos^th variable, we are looking for
|
|
|
|
|
// constraints like ii >= i, ii <= ii + 50, 50 being the difference. The
|
|
|
|
|
// minimum among all such constant differences is kept since that's the
|
|
|
|
|
// constant bounding the extent of the pos^th variable.
|
2018-12-05 20:34:23 -08:00
|
|
|
unsigned j, e;
|
|
|
|
|
for (j = 0, e = getNumCols() - 1; j < e; j++)
|
2018-11-16 20:12:06 -08:00
|
|
|
if (atIneq(ubPos, j) != -atIneq(lbPos, j)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (j < getNumCols() - 1)
|
|
|
|
|
continue;
|
2019-03-12 10:52:09 -07:00
|
|
|
int64_t diff = ceilDiv(atIneq(ubPos, getNumCols() - 1) +
|
|
|
|
|
atIneq(lbPos, getNumCols() - 1) + 1,
|
|
|
|
|
atIneq(lbPos, pos));
|
2021-06-29 01:09:51 +05:30
|
|
|
// This bound is non-negative by definition.
|
|
|
|
|
diff = std::max<int64_t>(diff, 0);
|
2019-01-07 17:34:26 -08:00
|
|
|
if (minDiff == None || diff < minDiff) {
|
|
|
|
|
minDiff = diff;
|
2018-12-05 15:14:25 -08:00
|
|
|
minLbPosition = lbPos;
|
2019-03-12 10:52:09 -07:00
|
|
|
minUbPosition = ubPos;
|
2018-11-16 20:12:06 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-12-05 15:14:25 -08:00
|
|
|
if (lb && minDiff.hasValue()) {
|
|
|
|
|
// Set lb to the symbolic lower bound.
|
|
|
|
|
lb->resize(getNumSymbolIds() + 1);
|
2019-03-12 10:52:09 -07:00
|
|
|
if (ub)
|
|
|
|
|
ub->resize(getNumSymbolIds() + 1);
|
2019-01-22 13:58:52 -08:00
|
|
|
// The lower bound is the ceildiv of the lb constraint over the coefficient
|
|
|
|
|
// of the variable at 'pos'. We express the ceildiv equivalently as a floor
|
|
|
|
|
// for uniformity. For eg., if the lower bound constraint was: 32*d0 - N +
|
|
|
|
|
// 31 >= 0, the lower bound for d0 is ceil(N - 31, 32), i.e., floor(N, 32).
|
2020-03-24 09:52:41 +05:30
|
|
|
*boundFloorDivisor = atIneq(minLbPosition, pos);
|
|
|
|
|
assert(*boundFloorDivisor == -atIneq(minUbPosition, pos));
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned c = 0, e = getNumSymbolIds() + 1; c < e; c++) {
|
2019-03-04 14:58:59 -08:00
|
|
|
(*lb)[c] = -atIneq(minLbPosition, getNumDimIds() + c);
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
2019-03-12 10:52:09 -07:00
|
|
|
if (ub) {
|
|
|
|
|
for (unsigned c = 0, e = getNumSymbolIds() + 1; c < e; c++)
|
|
|
|
|
(*ub)[c] = atIneq(minUbPosition, getNumDimIds() + c);
|
|
|
|
|
}
|
|
|
|
|
// The lower bound leads to a ceildiv while the upper bound is a floordiv
|
2019-10-04 04:37:14 -07:00
|
|
|
// whenever the coefficient at pos != 1. ceildiv (val / d) = floordiv (val +
|
2019-03-12 10:52:09 -07:00
|
|
|
// d - 1 / d); hence, the addition of 'atIneq(minLbPosition, pos) - 1' to
|
|
|
|
|
// the constant term for the lower bound.
|
2019-03-04 14:58:59 -08:00
|
|
|
(*lb)[getNumSymbolIds()] += atIneq(minLbPosition, pos) - 1;
|
2018-12-05 15:14:25 -08:00
|
|
|
}
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
if (minLbPos)
|
|
|
|
|
*minLbPos = minLbPosition;
|
|
|
|
|
if (minUbPos)
|
|
|
|
|
*minUbPos = minUbPosition;
|
2018-11-16 20:12:06 -08:00
|
|
|
return minDiff;
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-07 17:34:26 -08:00
|
|
|
template <bool isLower>
|
|
|
|
|
Optional<int64_t>
|
2019-02-14 13:25:46 -08:00
|
|
|
FlatAffineConstraints::computeConstantLowerOrUpperBound(unsigned pos) {
|
|
|
|
|
assert(pos < getNumIds() && "invalid position");
|
|
|
|
|
// Project to 'pos'.
|
|
|
|
|
projectOut(0, pos);
|
|
|
|
|
projectOut(1, getNumIds() - 1);
|
|
|
|
|
// Check if there's an equality equating the '0'^th identifier to a constant.
|
|
|
|
|
int eqRowIdx = findEqualityToConstant(*this, 0, /*symbolic=*/false);
|
2019-01-07 17:34:26 -08:00
|
|
|
if (eqRowIdx != -1)
|
2019-02-14 13:25:46 -08:00
|
|
|
// atEq(rowIdx, 0) is either -1 or 1.
|
|
|
|
|
return -atEq(eqRowIdx, getNumCols() - 1) / atEq(eqRowIdx, 0);
|
2019-01-07 17:34:26 -08:00
|
|
|
|
|
|
|
|
// Check if the identifier appears at all in any of the inequalities.
|
|
|
|
|
unsigned r, e;
|
|
|
|
|
for (r = 0, e = getNumInequalities(); r < e; r++) {
|
2019-02-14 13:25:46 -08:00
|
|
|
if (atIneq(r, 0) != 0)
|
2019-01-07 17:34:26 -08:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (r == e)
|
|
|
|
|
// If it doesn't, there isn't a bound on it.
|
|
|
|
|
return None;
|
|
|
|
|
|
|
|
|
|
Optional<int64_t> minOrMaxConst = None;
|
|
|
|
|
|
|
|
|
|
// Take the max across all const lower bounds (or min across all constant
|
|
|
|
|
// upper bounds).
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (isLower) {
|
2019-02-14 13:25:46 -08:00
|
|
|
if (atIneq(r, 0) <= 0)
|
2019-01-07 17:34:26 -08:00
|
|
|
// Not a lower bound.
|
|
|
|
|
continue;
|
2019-02-14 13:25:46 -08:00
|
|
|
} else if (atIneq(r, 0) >= 0) {
|
2019-01-07 17:34:26 -08:00
|
|
|
// Not an upper bound.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
unsigned c, f;
|
|
|
|
|
for (c = 0, f = getNumCols() - 1; c < f; c++)
|
2019-02-14 13:25:46 -08:00
|
|
|
if (c != 0 && atIneq(r, c) != 0)
|
2019-01-07 17:34:26 -08:00
|
|
|
break;
|
|
|
|
|
if (c < getNumCols() - 1)
|
|
|
|
|
// Not a constant bound.
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
int64_t boundConst =
|
2019-02-14 13:25:46 -08:00
|
|
|
isLower ? mlir::ceilDiv(-atIneq(r, getNumCols() - 1), atIneq(r, 0))
|
|
|
|
|
: mlir::floorDiv(atIneq(r, getNumCols() - 1), -atIneq(r, 0));
|
2019-01-07 17:34:26 -08:00
|
|
|
if (isLower) {
|
|
|
|
|
if (minOrMaxConst == None || boundConst > minOrMaxConst)
|
|
|
|
|
minOrMaxConst = boundConst;
|
|
|
|
|
} else {
|
|
|
|
|
if (minOrMaxConst == None || boundConst < minOrMaxConst)
|
|
|
|
|
minOrMaxConst = boundConst;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return minOrMaxConst;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Optional<int64_t>
|
|
|
|
|
FlatAffineConstraints::getConstantLowerBound(unsigned pos) const {
|
2019-02-14 13:25:46 -08:00
|
|
|
FlatAffineConstraints tmpCst(*this);
|
|
|
|
|
return tmpCst.computeConstantLowerOrUpperBound</*isLower=*/true>(pos);
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Optional<int64_t>
|
|
|
|
|
FlatAffineConstraints::getConstantUpperBound(unsigned pos) const {
|
2019-02-14 13:25:46 -08:00
|
|
|
FlatAffineConstraints tmpCst(*this);
|
|
|
|
|
return tmpCst.computeConstantLowerOrUpperBound</*isLower=*/false>(pos);
|
2019-01-07 17:34:26 -08:00
|
|
|
}
|
|
|
|
|
|
2019-10-04 04:37:14 -07:00
|
|
|
// A simple (naive and conservative) check for hyper-rectangularity.
|
2018-11-06 11:58:42 -08:00
|
|
|
bool FlatAffineConstraints::isHyperRectangular(unsigned pos,
|
|
|
|
|
unsigned num) const {
|
|
|
|
|
assert(pos < getNumCols() - 1);
|
|
|
|
|
// Check for two non-zero coefficients in the range [pos, pos + sum).
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
2018-11-06 11:58:42 -08:00
|
|
|
unsigned sum = 0;
|
|
|
|
|
for (unsigned c = pos; c < pos + num; c++) {
|
|
|
|
|
if (atIneq(r, c) != 0)
|
|
|
|
|
sum++;
|
|
|
|
|
}
|
|
|
|
|
if (sum > 1)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
|
2018-11-06 11:58:42 -08:00
|
|
|
unsigned sum = 0;
|
|
|
|
|
for (unsigned c = pos; c < pos + num; c++) {
|
|
|
|
|
if (atEq(r, c) != 0)
|
|
|
|
|
sum++;
|
|
|
|
|
}
|
|
|
|
|
if (sum > 1)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-24 11:30:06 -07:00
|
|
|
void FlatAffineConstraints::print(raw_ostream &os) const {
|
2018-12-04 13:09:45 -08:00
|
|
|
assert(hasConsistentState());
|
2018-11-06 11:58:42 -08:00
|
|
|
os << "\nConstraints (" << getNumDimIds() << " dims, " << getNumSymbolIds()
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
<< " symbols, " << getNumLocalIds() << " locals), (" << getNumConstraints()
|
|
|
|
|
<< " constraints)\n";
|
2018-11-16 20:12:06 -08:00
|
|
|
os << "(";
|
|
|
|
|
for (unsigned i = 0, e = getNumIds(); i < e; i++) {
|
|
|
|
|
if (ids[i] == None)
|
|
|
|
|
os << "None ";
|
|
|
|
|
else
|
2018-12-27 14:35:10 -08:00
|
|
|
os << "Value ";
|
2018-11-16 20:12:06 -08:00
|
|
|
}
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
os << " const)\n";
|
2018-10-24 11:30:06 -07:00
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) {
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned j = 0, f = getNumCols(); j < f; ++j) {
|
2018-10-24 11:30:06 -07:00
|
|
|
os << atEq(i, j) << " ";
|
|
|
|
|
}
|
|
|
|
|
os << "= 0\n";
|
|
|
|
|
}
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) {
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned j = 0, f = getNumCols(); j < f; ++j) {
|
2018-10-24 11:30:06 -07:00
|
|
|
os << atIneq(i, j) << " ";
|
|
|
|
|
}
|
|
|
|
|
os << ">= 0\n";
|
|
|
|
|
}
|
|
|
|
|
os << '\n';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::dump() const { print(llvm::errs()); }
|
2018-10-25 08:33:02 -07:00
|
|
|
|
2019-02-27 13:43:08 -08:00
|
|
|
/// Removes duplicate constraints, trivially true constraints, and constraints
|
|
|
|
|
/// that can be detected as redundant as a result of differing only in their
|
|
|
|
|
/// constant term part. A constraint of the form <non-negative constant> >= 0 is
|
|
|
|
|
/// considered trivially true.
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
// Uses a DenseSet to hash and detect duplicates followed by a linear scan to
|
|
|
|
|
// remove duplicates in place.
|
|
|
|
|
void FlatAffineConstraints::removeTrivialRedundancy() {
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
2020-03-23 19:36:18 +05:30
|
|
|
normalizeConstraintsByGCD();
|
2019-02-27 13:43:08 -08:00
|
|
|
|
|
|
|
|
// A map used to detect redundancy stemming from constraints that only differ
|
|
|
|
|
// in their constant term. The value stored is <row position, const term>
|
|
|
|
|
// for a given row.
|
|
|
|
|
SmallDenseMap<ArrayRef<int64_t>, std::pair<unsigned, int64_t>>
|
|
|
|
|
rowsWithoutConstTerm;
|
2020-03-23 19:36:18 +05:30
|
|
|
// To unique rows.
|
|
|
|
|
SmallDenseSet<ArrayRef<int64_t>, 8> rowSet;
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
|
|
|
|
|
// Check if constraint is of the form <non-negative-constant> >= 0.
|
|
|
|
|
auto isTriviallyValid = [&](unsigned r) -> bool {
|
|
|
|
|
for (unsigned c = 0, e = getNumCols() - 1; c < e; c++) {
|
|
|
|
|
if (atIneq(r, c) != 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return atIneq(r, getNumCols() - 1) >= 0;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Detect and mark redundant constraints.
|
2019-02-27 13:43:08 -08:00
|
|
|
SmallVector<bool, 256> redunIneq(getNumInequalities(), false);
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
2021-07-01 20:12:56 +05:30
|
|
|
int64_t *rowStart = &inequalities(r, 0);
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
auto row = ArrayRef<int64_t>(rowStart, getNumCols());
|
|
|
|
|
if (isTriviallyValid(r) || !rowSet.insert(row).second) {
|
|
|
|
|
redunIneq[r] = true;
|
2019-02-27 13:43:08 -08:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Among constraints that only differ in the constant term part, mark
|
|
|
|
|
// everything other than the one with the smallest constant term redundant.
|
|
|
|
|
// (eg: among i - 16j - 5 >= 0, i - 16j - 1 >=0, i - 16j - 7 >= 0, the
|
|
|
|
|
// former two are redundant).
|
|
|
|
|
int64_t constTerm = atIneq(r, getNumCols() - 1);
|
|
|
|
|
auto rowWithoutConstTerm = ArrayRef<int64_t>(rowStart, getNumCols() - 1);
|
|
|
|
|
const auto &ret =
|
|
|
|
|
rowsWithoutConstTerm.insert({rowWithoutConstTerm, {r, constTerm}});
|
|
|
|
|
if (!ret.second) {
|
|
|
|
|
// Check if the other constraint has a higher constant term.
|
|
|
|
|
auto &val = ret.first->second;
|
|
|
|
|
if (val.second > constTerm) {
|
|
|
|
|
// The stored row is redundant. Mark it so, and update with this one.
|
|
|
|
|
redunIneq[val.first] = true;
|
|
|
|
|
val = {r, constTerm};
|
|
|
|
|
} else {
|
|
|
|
|
// The one stored makes this one redundant.
|
|
|
|
|
redunIneq[r] = true;
|
|
|
|
|
}
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Scan to get rid of all rows marked redundant, in-place.
|
|
|
|
|
unsigned pos = 0;
|
2021-07-01 20:12:56 +05:30
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++)
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
if (!redunIneq[r])
|
2021-07-01 20:12:56 +05:30
|
|
|
inequalities.copyRow(r, pos++);
|
|
|
|
|
|
|
|
|
|
inequalities.resizeVertically(pos);
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: consider doing this for equalities as well, but probably not worth
|
|
|
|
|
// the savings.
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::clearAndCopyFrom(
|
|
|
|
|
const FlatAffineConstraints &other) {
|
|
|
|
|
FlatAffineConstraints copy(other);
|
|
|
|
|
std::swap(*this, copy);
|
2018-11-16 20:12:06 -08:00
|
|
|
assert(copy.getNumIds() == copy.getIds().size());
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::removeId(unsigned pos) {
|
2018-12-04 15:09:52 -08:00
|
|
|
removeIdRange(pos, pos + 1);
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::pair<unsigned, unsigned>
|
|
|
|
|
getNewNumDimsSymbols(unsigned pos, const FlatAffineConstraints &cst) {
|
|
|
|
|
unsigned numDims = cst.getNumDimIds();
|
|
|
|
|
unsigned numSymbols = cst.getNumSymbolIds();
|
|
|
|
|
unsigned newNumDims, newNumSymbols;
|
|
|
|
|
if (pos < numDims) {
|
|
|
|
|
newNumDims = numDims - 1;
|
|
|
|
|
newNumSymbols = numSymbols;
|
|
|
|
|
} else if (pos < numDims + numSymbols) {
|
|
|
|
|
assert(numSymbols >= 1);
|
|
|
|
|
newNumDims = numDims;
|
|
|
|
|
newNumSymbols = numSymbols - 1;
|
|
|
|
|
} else {
|
|
|
|
|
newNumDims = numDims;
|
|
|
|
|
newNumSymbols = numSymbols;
|
|
|
|
|
}
|
|
|
|
|
return {newNumDims, newNumSymbols};
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-23 09:16:24 -08:00
|
|
|
#undef DEBUG_TYPE
|
|
|
|
|
#define DEBUG_TYPE "fm"
|
|
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
/// Eliminates identifier at the specified position using Fourier-Motzkin
|
|
|
|
|
/// variable elimination. This technique is exact for rational spaces but
|
|
|
|
|
/// conservative (in "rare" cases) for integer spaces. The operation corresponds
|
|
|
|
|
/// to a projection operation yielding the (convex) set of integer points
|
|
|
|
|
/// contained in the rational shadow of the set. An emptiness test that relies
|
|
|
|
|
/// on this method will guarantee emptiness, i.e., it disproves the existence of
|
|
|
|
|
/// a solution if it says it's empty.
|
|
|
|
|
/// If a non-null isResultIntegerExact is passed, it is set to true if the
|
|
|
|
|
/// result is also integer exact. If it's set to false, the obtained solution
|
|
|
|
|
/// *may* not be exact, i.e., it may contain integer points that do not have an
|
|
|
|
|
/// integer pre-image in the original set.
|
|
|
|
|
///
|
|
|
|
|
/// Eg:
|
|
|
|
|
/// j >= 0, j <= i + 1
|
|
|
|
|
/// i >= 0, i <= N + 1
|
|
|
|
|
/// Eliminating i yields,
|
|
|
|
|
/// j >= 0, 0 <= N + 1, j - 1 <= N + 1
|
|
|
|
|
///
|
|
|
|
|
/// If darkShadow = true, this method computes the dark shadow on elimination;
|
|
|
|
|
/// the dark shadow is a convex integer subset of the exact integer shadow. A
|
|
|
|
|
/// non-empty dark shadow proves the existence of an integer solution. The
|
|
|
|
|
/// elimination in such a case could however be an under-approximation, and thus
|
|
|
|
|
/// should not be used for scanning sets or used by itself for dependence
|
|
|
|
|
/// checking.
|
|
|
|
|
///
|
|
|
|
|
/// Eg: 2-d set, * represents grid points, 'o' represents a point in the set.
|
|
|
|
|
/// ^
|
|
|
|
|
/// |
|
|
|
|
|
/// | * * * * o o
|
|
|
|
|
/// i | * * o o o o
|
|
|
|
|
/// | o * * * * *
|
|
|
|
|
/// --------------->
|
|
|
|
|
/// j ->
|
|
|
|
|
///
|
|
|
|
|
/// Eliminating i from this system (projecting on the j dimension):
|
|
|
|
|
/// rational shadow / integer light shadow: 1 <= j <= 6
|
|
|
|
|
/// dark shadow: 3 <= j <= 6
|
|
|
|
|
/// exact integer shadow: j = 1 \union 3 <= j <= 6
|
|
|
|
|
/// holes/splinters: j = 2
|
|
|
|
|
///
|
|
|
|
|
/// darkShadow = false, isResultIntegerExact = nullptr are default values.
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: a slight modification to yield dark shadow version of FM (tightened),
|
|
|
|
|
// which can prove the existence of a solution if there is one.
|
2021-07-19 18:15:49 +05:30
|
|
|
void FlatAffineConstraints::fourierMotzkinEliminate(
|
2018-10-25 08:33:02 -07:00
|
|
|
unsigned pos, bool darkShadow, bool *isResultIntegerExact) {
|
2018-10-30 13:45:10 -07:00
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM input (eliminate pos " << pos << "):\n");
|
2018-10-25 08:33:02 -07:00
|
|
|
LLVM_DEBUG(dump());
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(pos < getNumIds() && "invalid position");
|
2018-12-04 13:09:45 -08:00
|
|
|
assert(hasConsistentState());
|
2018-11-01 15:41:08 -07:00
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
// Check if this identifier can be eliminated through a substitution.
|
2018-12-05 20:34:23 -08:00
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
|
2018-10-29 10:19:21 -07:00
|
|
|
if (atEq(r, pos) != 0) {
|
2018-10-25 08:33:02 -07:00
|
|
|
// Use Gaussian elimination here (since we have an equality).
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult ret = gaussianEliminateId(pos);
|
2018-11-01 15:41:08 -07:00
|
|
|
(void)ret;
|
2019-03-07 22:14:47 -08:00
|
|
|
assert(succeeded(ret) && "Gaussian elimination guaranteed to succeed");
|
2019-02-28 12:07:12 -08:00
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM output (through Gaussian elimination):\n");
|
2018-11-01 15:41:08 -07:00
|
|
|
LLVM_DEBUG(dump());
|
|
|
|
|
return;
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-10 12:59:53 -08:00
|
|
|
// A fast linear time tightening.
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
2018-12-10 12:59:53 -08:00
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
// Check if the identifier appears at all in any of the inequalities.
|
|
|
|
|
unsigned r, e;
|
|
|
|
|
for (r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (atIneq(r, pos) != 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (r == getNumInequalities()) {
|
|
|
|
|
// If it doesn't appear, just remove the column and return.
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: refactor removeColumns to use it from here.
|
2018-10-25 08:33:02 -07:00
|
|
|
removeId(pos);
|
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
|
|
|
|
|
LLVM_DEBUG(dump());
|
2018-11-01 15:41:08 -07:00
|
|
|
return;
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Positions of constraints that are lower bounds on the variable.
|
|
|
|
|
SmallVector<unsigned, 4> lbIndices;
|
|
|
|
|
// Positions of constraints that are lower bounds on the variable.
|
|
|
|
|
SmallVector<unsigned, 4> ubIndices;
|
|
|
|
|
// Positions of constraints that do not involve the variable.
|
|
|
|
|
std::vector<unsigned> nbIndices;
|
|
|
|
|
nbIndices.reserve(getNumInequalities());
|
|
|
|
|
|
|
|
|
|
// Gather all lower bounds and upper bounds of the variable. Since the
|
|
|
|
|
// canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower
|
|
|
|
|
// bound for x_i if c_i >= 1, and an upper bound if c_i <= -1.
|
|
|
|
|
for (unsigned r = 0, e = getNumInequalities(); r < e; r++) {
|
|
|
|
|
if (atIneq(r, pos) == 0) {
|
|
|
|
|
// Id does not appear in bound.
|
|
|
|
|
nbIndices.push_back(r);
|
|
|
|
|
} else if (atIneq(r, pos) >= 1) {
|
|
|
|
|
// Lower bound.
|
|
|
|
|
lbIndices.push_back(r);
|
|
|
|
|
} else {
|
|
|
|
|
// Upper bound.
|
|
|
|
|
ubIndices.push_back(r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Set the number of dimensions, symbols in the resulting system.
|
|
|
|
|
const auto &dimsSymbols = getNewNumDimsSymbols(pos, *this);
|
|
|
|
|
unsigned newNumDims = dimsSymbols.first;
|
|
|
|
|
unsigned newNumSymbols = dimsSymbols.second;
|
|
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
SmallVector<Optional<Value>, 8> newIds;
|
2018-11-16 20:12:06 -08:00
|
|
|
newIds.reserve(numIds - 1);
|
2019-01-07 15:06:32 -08:00
|
|
|
newIds.append(ids.begin(), ids.begin() + pos);
|
|
|
|
|
newIds.append(ids.begin() + pos + 1, ids.end());
|
2018-11-16 20:12:06 -08:00
|
|
|
|
2018-10-25 08:33:02 -07:00
|
|
|
/// Create the new system which has one identifier less.
|
|
|
|
|
FlatAffineConstraints newFac(
|
|
|
|
|
lbIndices.size() * ubIndices.size() + nbIndices.size(),
|
|
|
|
|
getNumEqualities(), getNumCols() - 1, newNumDims, newNumSymbols,
|
2018-11-16 20:12:06 -08:00
|
|
|
/*numLocals=*/getNumIds() - 1 - newNumDims - newNumSymbols, newIds);
|
|
|
|
|
|
|
|
|
|
assert(newFac.getIds().size() == newFac.getNumIds());
|
2018-10-25 08:33:02 -07:00
|
|
|
|
|
|
|
|
// This will be used to check if the elimination was integer exact.
|
|
|
|
|
unsigned lcmProducts = 1;
|
|
|
|
|
|
|
|
|
|
// Let x be the variable we are eliminating.
|
|
|
|
|
// For each lower bound, lb <= c_l*x, and each upper bound c_u*x <= ub, (note
|
|
|
|
|
// that c_l, c_u >= 1) we have:
|
|
|
|
|
// lb*lcm(c_l, c_u)/c_l <= lcm(c_l, c_u)*x <= ub*lcm(c_l, c_u)/c_u
|
|
|
|
|
// We thus generate a constraint:
|
|
|
|
|
// lcm(c_l, c_u)/c_l*lb <= lcm(c_l, c_u)/c_u*ub.
|
|
|
|
|
// Note if c_l = c_u = 1, all integer points captured by the resulting
|
|
|
|
|
// constraint correspond to integer points in the original system (i.e., they
|
|
|
|
|
// have integer pre-images). Hence, if the lcm's are all 1, the elimination is
|
|
|
|
|
// integer exact.
|
|
|
|
|
for (auto ubPos : ubIndices) {
|
|
|
|
|
for (auto lbPos : lbIndices) {
|
|
|
|
|
SmallVector<int64_t, 4> ineq;
|
|
|
|
|
ineq.reserve(newFac.getNumCols());
|
|
|
|
|
int64_t lbCoeff = atIneq(lbPos, pos);
|
|
|
|
|
// Note that in the comments above, ubCoeff is the negation of the
|
|
|
|
|
// coefficient in the canonical form as the view taken here is that of the
|
|
|
|
|
// term being moved to the other size of '>='.
|
|
|
|
|
int64_t ubCoeff = -atIneq(ubPos, pos);
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: refactor this loop to avoid all branches inside.
|
2018-10-25 08:33:02 -07:00
|
|
|
for (unsigned l = 0, e = getNumCols(); l < e; l++) {
|
|
|
|
|
if (l == pos)
|
|
|
|
|
continue;
|
|
|
|
|
assert(lbCoeff >= 1 && ubCoeff >= 1 && "bounds wrongly identified");
|
|
|
|
|
int64_t lcm = mlir::lcm(lbCoeff, ubCoeff);
|
|
|
|
|
ineq.push_back(atIneq(ubPos, l) * (lcm / ubCoeff) +
|
|
|
|
|
atIneq(lbPos, l) * (lcm / lbCoeff));
|
|
|
|
|
lcmProducts *= lcm;
|
|
|
|
|
}
|
|
|
|
|
if (darkShadow) {
|
|
|
|
|
// The dark shadow is a convex subset of the exact integer shadow. If
|
|
|
|
|
// there is a point here, it proves the existence of a solution.
|
|
|
|
|
ineq[ineq.size() - 1] += lbCoeff * ubCoeff - lbCoeff - ubCoeff + 1;
|
|
|
|
|
}
|
|
|
|
|
// TODO: we need to have a way to add inequalities in-place in
|
|
|
|
|
// FlatAffineConstraints instead of creating and copying over.
|
|
|
|
|
newFac.addInequality(ineq);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-04 10:35:51 -07:00
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM isResultIntegerExact: " << (lcmProducts == 1)
|
|
|
|
|
<< "\n");
|
2018-10-25 08:33:02 -07:00
|
|
|
if (lcmProducts == 1 && isResultIntegerExact)
|
2019-09-23 02:33:51 -07:00
|
|
|
*isResultIntegerExact = true;
|
2018-10-25 08:33:02 -07:00
|
|
|
|
|
|
|
|
// Copy over the constraints not involving this variable.
|
|
|
|
|
for (auto nbPos : nbIndices) {
|
|
|
|
|
SmallVector<int64_t, 4> ineq;
|
|
|
|
|
ineq.reserve(getNumCols() - 1);
|
|
|
|
|
for (unsigned l = 0, e = getNumCols(); l < e; l++) {
|
|
|
|
|
if (l == pos)
|
|
|
|
|
continue;
|
|
|
|
|
ineq.push_back(atIneq(nbPos, l));
|
|
|
|
|
}
|
|
|
|
|
newFac.addInequality(ineq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(newFac.getNumConstraints() ==
|
|
|
|
|
lbIndices.size() * ubIndices.size() + nbIndices.size());
|
|
|
|
|
|
|
|
|
|
// Copy over the equalities.
|
|
|
|
|
for (unsigned r = 0, e = getNumEqualities(); r < e; r++) {
|
|
|
|
|
SmallVector<int64_t, 4> eq;
|
|
|
|
|
eq.reserve(newFac.getNumCols());
|
|
|
|
|
for (unsigned l = 0, e = getNumCols(); l < e; l++) {
|
|
|
|
|
if (l == pos)
|
|
|
|
|
continue;
|
|
|
|
|
eq.push_back(atEq(r, l));
|
|
|
|
|
}
|
|
|
|
|
newFac.addEquality(eq);
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-27 13:43:08 -08:00
|
|
|
// GCD tightening and normalization allows detection of more trivially
|
|
|
|
|
// redundant constraints.
|
2021-07-19 18:15:49 +05:30
|
|
|
newFac.gcdTightenInequalities();
|
2019-02-27 13:43:08 -08:00
|
|
|
newFac.normalizeConstraintsByGCD();
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
newFac.removeTrivialRedundancy();
|
2018-10-25 08:33:02 -07:00
|
|
|
clearAndCopyFrom(newFac);
|
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "FM output:\n");
|
|
|
|
|
LLVM_DEBUG(dump());
|
2018-11-01 15:41:08 -07:00
|
|
|
}
|
|
|
|
|
|
2019-01-23 09:16:24 -08:00
|
|
|
#undef DEBUG_TYPE
|
|
|
|
|
#define DEBUG_TYPE "affine-structures"
|
|
|
|
|
|
2018-11-01 15:41:08 -07:00
|
|
|
void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
|
2018-11-16 20:12:06 -08:00
|
|
|
if (num == 0)
|
|
|
|
|
return;
|
2018-12-10 12:59:53 -08:00
|
|
|
|
2018-12-29 15:51:30 -08:00
|
|
|
// 'pos' can be at most getNumCols() - 2 if num > 0.
|
2019-04-04 13:25:29 -07:00
|
|
|
assert((getNumCols() < 2 || pos <= getNumCols() - 2) && "invalid position");
|
2018-11-01 15:41:08 -07:00
|
|
|
assert(pos + num < getNumCols() && "invalid range");
|
2018-12-10 12:59:53 -08:00
|
|
|
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
// Eliminate as many identifiers as possible using Gaussian elimination.
|
|
|
|
|
unsigned currentPos = pos;
|
|
|
|
|
unsigned numToEliminate = num;
|
|
|
|
|
unsigned numGaussianEliminated = 0;
|
2018-12-29 15:51:30 -08:00
|
|
|
|
|
|
|
|
while (currentPos < getNumIds()) {
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
unsigned curNumEliminated =
|
|
|
|
|
gaussianEliminateIds(currentPos, currentPos + numToEliminate);
|
2018-12-29 15:51:30 -08:00
|
|
|
++currentPos;
|
|
|
|
|
numToEliminate -= curNumEliminated + 1;
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
numGaussianEliminated += curNumEliminated;
|
2018-12-29 15:51:30 -08:00
|
|
|
}
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
|
|
|
|
|
// Eliminate the remaining using Fourier-Motzkin.
|
|
|
|
|
for (unsigned i = 0; i < num - numGaussianEliminated; i++) {
|
2019-01-07 17:34:26 -08:00
|
|
|
unsigned numToEliminate = num - numGaussianEliminated - i;
|
2021-07-19 18:15:49 +05:30
|
|
|
fourierMotzkinEliminate(
|
2019-01-07 17:34:26 -08:00
|
|
|
getBestIdToEliminate(*this, pos, pos + numToEliminate));
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
}
|
2018-12-10 12:59:53 -08:00
|
|
|
|
|
|
|
|
// Fast/trivial simplifications.
|
2021-07-19 18:15:49 +05:30
|
|
|
gcdTightenInequalities();
|
FlatAffineConstraints - complete TODOs: add method to remove duplicate /
trivially redundant constraints. Update projectOut to eliminate identifiers in
a more efficient order. Fix b/120801118.
- add method to remove duplicate / trivially redundant constraints from
FlatAffineConstraints (use a hashing-based approach with DenseSet)
- update projectOut to eliminate identifiers in a more efficient order
(A sequence of affine_apply's like this (from a real use case) finally exposed
the lack of the above trivial/low hanging simplifications).
for %ii = 0 to 64 {
for %jj = 0 to 9 {
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a1 = affine_apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128),
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
floordiv 128) (%a0)
%v0 = load %in[%a1tensorflow/mlir#0, %a1tensorflow/mlir#1, %a1tensorflow/mlir#3, %a1tensorflow/mlir#4, %a1tensorflow/mlir#2, %a1tensorflow/mlir#5]
: memref<2x2x3x3x16x1xi32>
}
}
- update FlatAffineConstraints::print to print number of constraints.
PiperOrigin-RevId: 225397480
2018-12-13 10:47:09 -08:00
|
|
|
// Normalize constraints after tightening since the latter impacts this, but
|
|
|
|
|
// not the other way round.
|
|
|
|
|
normalizeConstraintsByGCD();
|
2018-10-25 08:33:02 -07:00
|
|
|
}
|
2018-11-16 20:12:06 -08:00
|
|
|
|
2019-12-23 14:45:01 -08:00
|
|
|
void FlatAffineConstraints::projectOut(Value id) {
|
2018-11-16 20:12:06 -08:00
|
|
|
unsigned pos;
|
2020-01-11 08:54:04 -08:00
|
|
|
bool ret = findId(id, &pos);
|
2018-11-16 20:12:06 -08:00
|
|
|
assert(ret);
|
|
|
|
|
(void)ret;
|
2021-07-19 18:15:49 +05:30
|
|
|
fourierMotzkinEliminate(pos);
|
2018-11-16 20:12:06 -08:00
|
|
|
}
|
2018-12-29 19:16:55 -08:00
|
|
|
|
2019-01-24 22:10:53 -08:00
|
|
|
void FlatAffineConstraints::clearConstraints() {
|
2021-07-01 20:12:56 +05:30
|
|
|
equalities.resizeVertically(0);
|
|
|
|
|
inequalities.resizeVertically(0);
|
2019-01-24 22:10:53 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
enum BoundCmpResult { Greater, Less, Equal, Unknown };
|
|
|
|
|
|
|
|
|
|
/// Compares two affine bounds whose coefficients are provided in 'first' and
|
|
|
|
|
/// 'second'. The last coefficient is the constant term.
|
|
|
|
|
static BoundCmpResult compareBounds(ArrayRef<int64_t> a, ArrayRef<int64_t> b) {
|
|
|
|
|
assert(a.size() == b.size());
|
|
|
|
|
|
|
|
|
|
// For the bounds to be comparable, their corresponding identifier
|
|
|
|
|
// coefficients should be equal; the constant terms are then compared to
|
|
|
|
|
// determine less/greater/equal.
|
|
|
|
|
|
|
|
|
|
if (!std::equal(a.begin(), a.end() - 1, b.begin()))
|
|
|
|
|
return Unknown;
|
|
|
|
|
|
|
|
|
|
if (a.back() == b.back())
|
|
|
|
|
return Equal;
|
|
|
|
|
|
|
|
|
|
return a.back() < b.back() ? Less : Greater;
|
|
|
|
|
}
|
2019-04-04 13:25:29 -07:00
|
|
|
} // namespace
|
2019-01-24 22:10:53 -08:00
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
// Returns constraints that are common to both A & B.
|
2021-07-19 18:15:49 +05:30
|
|
|
static void getCommonConstraints(const FlatAffineConstraints &a,
|
|
|
|
|
const FlatAffineConstraints &b,
|
|
|
|
|
FlatAffineConstraints &c) {
|
|
|
|
|
c.reset(a.getNumDimIds(), a.getNumSymbolIds(), a.getNumLocalIds());
|
|
|
|
|
// a naive O(n^2) check should be enough here given the input sizes.
|
|
|
|
|
for (unsigned r = 0, e = a.getNumInequalities(); r < e; ++r) {
|
|
|
|
|
for (unsigned s = 0, f = b.getNumInequalities(); s < f; ++s) {
|
|
|
|
|
if (a.getInequality(r) == b.getInequality(s)) {
|
|
|
|
|
c.addInequality(a.getInequality(r));
|
2020-04-01 12:00:26 +05:30
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-07-19 18:15:49 +05:30
|
|
|
for (unsigned r = 0, e = a.getNumEqualities(); r < e; ++r) {
|
|
|
|
|
for (unsigned s = 0, f = b.getNumEqualities(); s < f; ++s) {
|
|
|
|
|
if (a.getEquality(r) == b.getEquality(s)) {
|
|
|
|
|
c.addEquality(a.getEquality(r));
|
2020-04-01 12:00:26 +05:30
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-01 08:49:20 -08:00
|
|
|
// Computes the bounding box with respect to 'other' by finding the min of the
|
2019-01-24 22:10:53 -08:00
|
|
|
// lower bounds and the max of the upper bounds along each of the dimensions.
|
2019-03-08 16:04:42 -08:00
|
|
|
LogicalResult
|
2019-03-07 22:14:47 -08:00
|
|
|
FlatAffineConstraints::unionBoundingBox(const FlatAffineConstraints &otherCst) {
|
2019-03-01 08:49:20 -08:00
|
|
|
assert(otherCst.getNumDimIds() == numDims && "dims mismatch");
|
|
|
|
|
assert(otherCst.getIds()
|
|
|
|
|
.slice(0, getNumDimIds())
|
|
|
|
|
.equals(getIds().slice(0, getNumDimIds())) &&
|
|
|
|
|
"dim values mismatch");
|
|
|
|
|
assert(otherCst.getNumLocalIds() == 0 && "local ids not supported here");
|
|
|
|
|
assert(getNumLocalIds() == 0 && "local ids not supported yet here");
|
|
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
// Align `other` to this.
|
2019-03-01 08:49:20 -08:00
|
|
|
Optional<FlatAffineConstraints> otherCopy;
|
|
|
|
|
if (!areIdsAligned(*this, otherCst)) {
|
|
|
|
|
otherCopy.emplace(FlatAffineConstraints(otherCst));
|
2019-03-06 16:41:32 -08:00
|
|
|
mergeAndAlignIds(/*offset=*/numDims, this, &otherCopy.getValue());
|
2019-03-01 08:49:20 -08:00
|
|
|
}
|
|
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
const auto &otherAligned = otherCopy ? *otherCopy : otherCst;
|
|
|
|
|
|
|
|
|
|
// Get the constraints common to both systems; these will be added as is to
|
|
|
|
|
// the union.
|
|
|
|
|
FlatAffineConstraints commonCst;
|
|
|
|
|
getCommonConstraints(*this, otherAligned, commonCst);
|
2019-02-19 18:17:19 -08:00
|
|
|
|
2019-01-24 22:10:53 -08:00
|
|
|
std::vector<SmallVector<int64_t, 8>> boundingLbs;
|
|
|
|
|
std::vector<SmallVector<int64_t, 8>> boundingUbs;
|
|
|
|
|
boundingLbs.reserve(2 * getNumDimIds());
|
|
|
|
|
boundingUbs.reserve(2 * getNumDimIds());
|
|
|
|
|
|
2019-03-06 16:18:27 -08:00
|
|
|
// To hold lower and upper bounds for each dimension.
|
|
|
|
|
SmallVector<int64_t, 4> lb, otherLb, ub, otherUb;
|
|
|
|
|
// To compute min of lower bounds and max of upper bounds for each dimension.
|
2019-03-07 07:11:59 -08:00
|
|
|
SmallVector<int64_t, 4> minLb(getNumSymbolIds() + 1);
|
|
|
|
|
SmallVector<int64_t, 4> maxUb(getNumSymbolIds() + 1);
|
2019-03-06 16:18:27 -08:00
|
|
|
// To compute final new lower and upper bounds for the union.
|
|
|
|
|
SmallVector<int64_t, 8> newLb(getNumCols()), newUb(getNumCols());
|
|
|
|
|
|
2019-03-12 10:52:09 -07:00
|
|
|
int64_t lbFloorDivisor, otherLbFloorDivisor;
|
2019-01-24 22:10:53 -08:00
|
|
|
for (unsigned d = 0, e = getNumDimIds(); d < e; ++d) {
|
2019-03-12 10:52:09 -07:00
|
|
|
auto extent = getConstantBoundOnDimSize(d, &lb, &lbFloorDivisor, &ub);
|
2019-01-24 22:10:53 -08:00
|
|
|
if (!extent.hasValue())
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: symbolic extents when necessary.
|
|
|
|
|
// TODO: handle union if a dimension is unbounded.
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-01-24 22:10:53 -08:00
|
|
|
|
2020-04-01 12:00:26 +05:30
|
|
|
auto otherExtent = otherAligned.getConstantBoundOnDimSize(
|
2019-03-12 10:52:09 -07:00
|
|
|
d, &otherLb, &otherLbFloorDivisor, &otherUb);
|
|
|
|
|
if (!otherExtent.hasValue() || lbFloorDivisor != otherLbFloorDivisor)
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: symbolic extents when necessary.
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-01-24 22:10:53 -08:00
|
|
|
|
2019-03-12 10:52:09 -07:00
|
|
|
assert(lbFloorDivisor > 0 && "divisor always expected to be positive");
|
2019-01-24 22:10:53 -08:00
|
|
|
|
|
|
|
|
auto res = compareBounds(lb, otherLb);
|
|
|
|
|
// Identify min.
|
|
|
|
|
if (res == BoundCmpResult::Less || res == BoundCmpResult::Equal) {
|
|
|
|
|
minLb = lb;
|
2019-03-12 10:52:09 -07:00
|
|
|
// Since the divisor is for a floordiv, we need to convert to ceildiv,
|
|
|
|
|
// i.e., i >= expr floordiv div <=> i >= (expr - div + 1) ceildiv div <=>
|
|
|
|
|
// div * i >= expr - div + 1.
|
|
|
|
|
minLb.back() -= lbFloorDivisor - 1;
|
2019-01-24 22:10:53 -08:00
|
|
|
} else if (res == BoundCmpResult::Greater) {
|
|
|
|
|
minLb = otherLb;
|
2019-03-12 10:52:09 -07:00
|
|
|
minLb.back() -= otherLbFloorDivisor - 1;
|
2019-01-24 22:10:53 -08:00
|
|
|
} else {
|
2019-03-05 20:33:30 -08:00
|
|
|
// Uncomparable - check for constant lower/upper bounds.
|
2019-02-19 18:17:19 -08:00
|
|
|
auto constLb = getConstantLowerBound(d);
|
2020-04-01 12:00:26 +05:30
|
|
|
auto constOtherLb = otherAligned.getConstantLowerBound(d);
|
2019-02-19 18:17:19 -08:00
|
|
|
if (!constLb.hasValue() || !constOtherLb.hasValue())
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-03-07 07:11:59 -08:00
|
|
|
std::fill(minLb.begin(), minLb.end(), 0);
|
2019-03-05 20:33:30 -08:00
|
|
|
minLb.back() = std::min(constLb.getValue(), constOtherLb.getValue());
|
2019-01-24 22:10:53 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-12 10:52:09 -07:00
|
|
|
// Do the same for ub's but max of upper bounds. Identify max.
|
2019-01-24 22:10:53 -08:00
|
|
|
auto uRes = compareBounds(ub, otherUb);
|
2019-02-04 07:58:42 -08:00
|
|
|
if (uRes == BoundCmpResult::Greater || uRes == BoundCmpResult::Equal) {
|
2019-01-24 22:10:53 -08:00
|
|
|
maxUb = ub;
|
|
|
|
|
} else if (uRes == BoundCmpResult::Less) {
|
|
|
|
|
maxUb = otherUb;
|
|
|
|
|
} else {
|
2019-03-05 20:33:30 -08:00
|
|
|
// Uncomparable - check for constant lower/upper bounds.
|
2019-02-19 18:17:19 -08:00
|
|
|
auto constUb = getConstantUpperBound(d);
|
2020-04-01 12:00:26 +05:30
|
|
|
auto constOtherUb = otherAligned.getConstantUpperBound(d);
|
2019-02-19 18:17:19 -08:00
|
|
|
if (!constUb.hasValue() || !constOtherUb.hasValue())
|
2019-03-10 15:32:54 -07:00
|
|
|
return failure();
|
2019-03-07 07:11:59 -08:00
|
|
|
std::fill(maxUb.begin(), maxUb.end(), 0);
|
2019-03-05 20:33:30 -08:00
|
|
|
maxUb.back() = std::max(constUb.getValue(), constOtherUb.getValue());
|
2019-01-24 22:10:53 -08:00
|
|
|
}
|
|
|
|
|
|
2019-03-06 16:18:27 -08:00
|
|
|
std::fill(newLb.begin(), newLb.end(), 0);
|
|
|
|
|
std::fill(newUb.begin(), newUb.end(), 0);
|
2019-01-24 22:10:53 -08:00
|
|
|
|
|
|
|
|
// The divisor for lb, ub, otherLb, otherUb at this point is lbDivisor,
|
|
|
|
|
// and so it's the divisor for newLb and newUb as well.
|
2019-03-12 10:52:09 -07:00
|
|
|
newLb[d] = lbFloorDivisor;
|
|
|
|
|
newUb[d] = -lbFloorDivisor;
|
2019-01-24 22:10:53 -08:00
|
|
|
// Copy over the symbolic part + constant term.
|
|
|
|
|
std::copy(minLb.begin(), minLb.end(), newLb.begin() + getNumDimIds());
|
|
|
|
|
std::transform(newLb.begin() + getNumDimIds(), newLb.end(),
|
|
|
|
|
newLb.begin() + getNumDimIds(), std::negate<int64_t>());
|
|
|
|
|
std::copy(maxUb.begin(), maxUb.end(), newUb.begin() + getNumDimIds());
|
|
|
|
|
|
|
|
|
|
boundingLbs.push_back(newLb);
|
|
|
|
|
boundingUbs.push_back(newUb);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Clear all constraints and add the lower/upper bounds for the bounding box.
|
|
|
|
|
clearConstraints();
|
|
|
|
|
for (unsigned d = 0, e = getNumDimIds(); d < e; ++d) {
|
|
|
|
|
addInequality(boundingLbs[d]);
|
|
|
|
|
addInequality(boundingUbs[d]);
|
|
|
|
|
}
|
2020-04-01 12:00:26 +05:30
|
|
|
|
|
|
|
|
// Add the constraints that were common to both systems.
|
|
|
|
|
append(commonCst);
|
|
|
|
|
removeTrivialRedundancy();
|
|
|
|
|
|
2020-07-07 01:35:23 -07:00
|
|
|
// TODO: copy over pure symbolic constraints from this and 'other' over to the
|
|
|
|
|
// union (since the above are just the union along dimensions); we shouldn't
|
|
|
|
|
// be discarding any other constraints on the symbols.
|
2019-01-24 22:10:53 -08:00
|
|
|
|
2019-03-10 15:32:54 -07:00
|
|
|
return success();
|
2019-01-24 22:10:53 -08:00
|
|
|
}
|
2020-03-23 19:36:18 +05:30
|
|
|
|
|
|
|
|
/// Compute an explicit representation for local vars. For all systems coming
|
|
|
|
|
/// from MLIR integer sets, maps, or expressions where local vars were
|
|
|
|
|
/// introduced to model floordivs and mods, this always succeeds.
|
|
|
|
|
static LogicalResult computeLocalVars(const FlatAffineConstraints &cst,
|
|
|
|
|
SmallVectorImpl<AffineExpr> &memo,
|
|
|
|
|
MLIRContext *context) {
|
|
|
|
|
unsigned numDims = cst.getNumDimIds();
|
|
|
|
|
unsigned numSyms = cst.getNumSymbolIds();
|
|
|
|
|
|
|
|
|
|
// Initialize dimensional and symbolic identifiers.
|
|
|
|
|
for (unsigned i = 0; i < numDims; i++)
|
|
|
|
|
memo[i] = getAffineDimExpr(i, context);
|
|
|
|
|
for (unsigned i = numDims, e = numDims + numSyms; i < e; i++)
|
|
|
|
|
memo[i] = getAffineSymbolExpr(i - numDims, context);
|
|
|
|
|
|
|
|
|
|
bool changed;
|
|
|
|
|
do {
|
|
|
|
|
// Each time `changed` is true at the end of this iteration, one or more
|
|
|
|
|
// local vars would have been detected as floordivs and set in memo; so the
|
|
|
|
|
// number of null entries in memo[...] strictly reduces; so this converges.
|
|
|
|
|
changed = false;
|
|
|
|
|
for (unsigned i = 0, e = cst.getNumLocalIds(); i < e; ++i)
|
|
|
|
|
if (!memo[numDims + numSyms + i] &&
|
|
|
|
|
detectAsFloorDiv(cst, /*pos=*/numDims + numSyms + i, context, memo))
|
|
|
|
|
changed = true;
|
|
|
|
|
} while (changed);
|
|
|
|
|
|
|
|
|
|
ArrayRef<AffineExpr> localExprs =
|
|
|
|
|
ArrayRef<AffineExpr>(memo).take_back(cst.getNumLocalIds());
|
|
|
|
|
return success(
|
|
|
|
|
llvm::all_of(localExprs, [](AffineExpr expr) { return expr; }));
|
|
|
|
|
}
|
|
|
|
|
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
void FlatAffineConstraints::getIneqAsAffineValueMap(
|
|
|
|
|
unsigned pos, unsigned ineqPos, AffineValueMap &vmap,
|
|
|
|
|
MLIRContext *context) const {
|
|
|
|
|
unsigned numDims = getNumDimIds();
|
|
|
|
|
unsigned numSyms = getNumSymbolIds();
|
|
|
|
|
|
|
|
|
|
assert(pos < numDims && "invalid position");
|
|
|
|
|
assert(ineqPos < getNumInequalities() && "invalid inequality position");
|
|
|
|
|
|
|
|
|
|
// Get expressions for local vars.
|
|
|
|
|
SmallVector<AffineExpr, 8> memo(getNumIds(), AffineExpr());
|
|
|
|
|
if (failed(computeLocalVars(*this, memo, context)))
|
|
|
|
|
assert(false &&
|
|
|
|
|
"one or more local exprs do not have an explicit representation");
|
|
|
|
|
auto localExprs = ArrayRef<AffineExpr>(memo).take_back(getNumLocalIds());
|
|
|
|
|
|
|
|
|
|
// Compute the AffineExpr lower/upper bound for this inequality.
|
|
|
|
|
ArrayRef<int64_t> inequality = getInequality(ineqPos);
|
|
|
|
|
SmallVector<int64_t, 8> bound;
|
|
|
|
|
bound.reserve(getNumCols() - 1);
|
|
|
|
|
// Everything other than the coefficient at `pos`.
|
|
|
|
|
bound.append(inequality.begin(), inequality.begin() + pos);
|
|
|
|
|
bound.append(inequality.begin() + pos + 1, inequality.end());
|
|
|
|
|
|
|
|
|
|
if (inequality[pos] > 0)
|
|
|
|
|
// Lower bound.
|
|
|
|
|
std::transform(bound.begin(), bound.end(), bound.begin(),
|
|
|
|
|
std::negate<int64_t>());
|
|
|
|
|
else
|
|
|
|
|
// Upper bound (which is exclusive).
|
|
|
|
|
bound.back() += 1;
|
|
|
|
|
|
|
|
|
|
// Convert to AffineExpr (tree) form.
|
|
|
|
|
auto boundExpr = getAffineExprFromFlatForm(bound, numDims - 1, numSyms,
|
|
|
|
|
localExprs, context);
|
|
|
|
|
|
|
|
|
|
// Get the values to bind to this affine expr (all dims and symbols).
|
|
|
|
|
SmallVector<Value, 4> operands;
|
|
|
|
|
getIdValues(0, pos, &operands);
|
|
|
|
|
SmallVector<Value, 4> trailingOperands;
|
|
|
|
|
getIdValues(pos + 1, getNumDimAndSymbolIds(), &trailingOperands);
|
|
|
|
|
operands.append(trailingOperands.begin(), trailingOperands.end());
|
|
|
|
|
vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands);
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-23 19:36:18 +05:30
|
|
|
/// Returns true if the pos^th column is all zero for both inequalities and
|
|
|
|
|
/// equalities..
|
|
|
|
|
static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) {
|
|
|
|
|
unsigned rowPos;
|
|
|
|
|
return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) &&
|
|
|
|
|
!findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const {
|
|
|
|
|
if (getNumConstraints() == 0)
|
|
|
|
|
// Return universal set (always true): 0 == 0.
|
|
|
|
|
return IntegerSet::get(getNumDimIds(), getNumSymbolIds(),
|
|
|
|
|
getAffineConstantExpr(/*constant=*/0, context),
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
/*eqFlags=*/true);
|
2020-03-23 19:36:18 +05:30
|
|
|
|
|
|
|
|
// Construct local references.
|
|
|
|
|
SmallVector<AffineExpr, 8> memo(getNumIds(), AffineExpr());
|
|
|
|
|
|
|
|
|
|
if (failed(computeLocalVars(*this, memo, context))) {
|
|
|
|
|
// Check if the local variables without an explicit representation have
|
|
|
|
|
// zero coefficients everywhere.
|
|
|
|
|
for (unsigned i = getNumDimAndSymbolIds(), e = getNumIds(); i < e; ++i) {
|
|
|
|
|
if (!memo[i] && !isColZero(*this, /*pos=*/i)) {
|
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "one or more local exprs do not have an "
|
|
|
|
|
"explicit representation");
|
|
|
|
|
return IntegerSet();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ArrayRef<AffineExpr> localExprs =
|
|
|
|
|
ArrayRef<AffineExpr>(memo).take_back(getNumLocalIds());
|
|
|
|
|
|
|
|
|
|
// Construct the IntegerSet from the equalities/inequalities.
|
|
|
|
|
unsigned numDims = getNumDimIds();
|
|
|
|
|
unsigned numSyms = getNumSymbolIds();
|
|
|
|
|
|
|
|
|
|
SmallVector<bool, 16> eqFlags(getNumConstraints());
|
|
|
|
|
std::fill(eqFlags.begin(), eqFlags.begin() + getNumEqualities(), true);
|
|
|
|
|
std::fill(eqFlags.begin() + getNumEqualities(), eqFlags.end(), false);
|
|
|
|
|
|
|
|
|
|
SmallVector<AffineExpr, 8> exprs;
|
|
|
|
|
exprs.reserve(getNumConstraints());
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = getNumEqualities(); i < e; ++i)
|
|
|
|
|
exprs.push_back(getAffineExprFromFlatForm(getEquality(i), numDims, numSyms,
|
|
|
|
|
localExprs, context));
|
|
|
|
|
for (unsigned i = 0, e = getNumInequalities(); i < e; ++i)
|
|
|
|
|
exprs.push_back(getAffineExprFromFlatForm(getInequality(i), numDims,
|
|
|
|
|
numSyms, localExprs, context));
|
|
|
|
|
return IntegerSet::get(numDims, numSyms, exprs, eqFlags);
|
|
|
|
|
}
|
[MLIR] Introduce full/partial tile separation using if/else
This patch introduces a utility to separate full tiles from partial
tiles when tiling affine loop nests where trip counts are unknown or
where tile sizes don't divide trip counts. A conditional guard is
generated to separate out the full tile (with constant trip count loops)
into the then block of an 'affine.if' and the partial tile to the else
block. The separation allows the 'then' block (which has constant trip
count loops) to be optimized better subsequently: for eg. for
unroll-and-jam, register tiling, vectorization without leading to
cleanup code, or to offload to accelerators. Among techniques from the
literature, the if/else based separation leads to the most compact
cleanup code for multi-dimensional cases (because a single version is
used to model all partial tiles).
INPUT
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
"foo"() : () -> ()
}
}
OUTPUT AFTER TILING W/O SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.for %arg4 = #map0(%arg2) to min #map1(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map1(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
OUTPUT AFTER TILING WITH SEPARATION
map0 = affine_map<(d0) -> (d0)>
map1 = affine_map<(d0) -> (d0 + 32)>
map2 = affine_map<(d0)[s0] -> (d0 + 32, s0)>
#set0 = affine_set<(d0, d1)[s0, s1] : (-d0 + s0 - 32 >= 0, -d1 + s1 - 32 >= 0)>
affine.for %arg2 = 0 to %M step 32 {
affine.for %arg3 = 0 to %N step 32 {
affine.if #set0(%arg2, %arg3)[%M, %N] {
// Full tile.
affine.for %arg4 = #map0(%arg2) to #map1(%arg2) {
affine.for %arg5 = #map0(%arg3) to #map1(%arg3) {
"foo"() : () -> ()
}
}
} else {
// Partial tile.
affine.for %arg4 = #map0(%arg2) to min #map2(%arg2)[%M] {
affine.for %arg5 = #map0(%arg3) to min #map2(%arg3)[%N] {
"foo"() : () -> ()
}
}
}
}
}
The separation is tested via a cmd line flag on the loop tiling pass.
The utility itself allows one to pass in any band of contiguously nested
loops, and can be used by other transforms/utilities. The current
implementation works for hyperrectangular loop nests.
Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76700
2020-03-23 20:30:12 +05:30
|
|
|
|
|
|
|
|
/// Find positions of inequalities and equalities that do not have a coefficient
|
|
|
|
|
/// for [pos, pos + num) identifiers.
|
|
|
|
|
static void getIndependentConstraints(const FlatAffineConstraints &cst,
|
|
|
|
|
unsigned pos, unsigned num,
|
|
|
|
|
SmallVectorImpl<unsigned> &nbIneqIndices,
|
|
|
|
|
SmallVectorImpl<unsigned> &nbEqIndices) {
|
|
|
|
|
assert(pos < cst.getNumIds() && "invalid start position");
|
|
|
|
|
assert(pos + num <= cst.getNumIds() && "invalid limit");
|
|
|
|
|
|
|
|
|
|
for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
|
|
|
|
|
// The bounds are to be independent of [offset, offset + num) columns.
|
|
|
|
|
unsigned c;
|
|
|
|
|
for (c = pos; c < pos + num; ++c) {
|
|
|
|
|
if (cst.atIneq(r, c) != 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (c == pos + num)
|
|
|
|
|
nbIneqIndices.push_back(r);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (unsigned r = 0, e = cst.getNumEqualities(); r < e; r++) {
|
|
|
|
|
// The bounds are to be independent of [offset, offset + num) columns.
|
|
|
|
|
unsigned c;
|
|
|
|
|
for (c = pos; c < pos + num; ++c) {
|
|
|
|
|
if (cst.atEq(r, c) != 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (c == pos + num)
|
|
|
|
|
nbEqIndices.push_back(r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void FlatAffineConstraints::removeIndependentConstraints(unsigned pos,
|
|
|
|
|
unsigned num) {
|
|
|
|
|
assert(pos + num <= getNumIds() && "invalid range");
|
|
|
|
|
|
|
|
|
|
// Remove constraints that are independent of these identifiers.
|
|
|
|
|
SmallVector<unsigned, 4> nbIneqIndices, nbEqIndices;
|
|
|
|
|
getIndependentConstraints(*this, /*pos=*/0, num, nbIneqIndices, nbEqIndices);
|
|
|
|
|
|
|
|
|
|
// Iterate in reverse so that indices don't have to be updated.
|
|
|
|
|
// TODO: This method can be made more efficient (because removal of each
|
|
|
|
|
// inequality leads to much shifting/copying in the underlying buffer).
|
|
|
|
|
for (auto nbIndex : llvm::reverse(nbIneqIndices))
|
|
|
|
|
removeInequality(nbIndex);
|
|
|
|
|
for (auto nbIndex : llvm::reverse(nbEqIndices))
|
|
|
|
|
removeEquality(nbIndex);
|
|
|
|
|
}
|
2021-08-11 14:55:22 +09:00
|
|
|
|
|
|
|
|
AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands,
|
|
|
|
|
ValueRange dims, ValueRange syms,
|
|
|
|
|
SmallVector<Value> *newSyms) {
|
|
|
|
|
assert(operands.size() == map.getNumInputs() &&
|
|
|
|
|
"expected same number of operands and map inputs");
|
|
|
|
|
MLIRContext *ctx = map.getContext();
|
|
|
|
|
Builder builder(ctx);
|
|
|
|
|
SmallVector<AffineExpr> dimReplacements(map.getNumDims(), {});
|
|
|
|
|
unsigned numSymbols = syms.size();
|
|
|
|
|
SmallVector<AffineExpr> symReplacements(map.getNumSymbols(), {});
|
|
|
|
|
if (newSyms) {
|
|
|
|
|
newSyms->clear();
|
|
|
|
|
newSyms->append(syms.begin(), syms.end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto operand : llvm::enumerate(operands)) {
|
|
|
|
|
// Compute replacement dim/sym of operand.
|
|
|
|
|
AffineExpr replacement;
|
|
|
|
|
auto dimIt = std::find(dims.begin(), dims.end(), operand.value());
|
|
|
|
|
auto symIt = std::find(syms.begin(), syms.end(), operand.value());
|
|
|
|
|
if (dimIt != dims.end()) {
|
|
|
|
|
replacement =
|
|
|
|
|
builder.getAffineDimExpr(std::distance(dims.begin(), dimIt));
|
|
|
|
|
} else if (symIt != syms.end()) {
|
|
|
|
|
replacement =
|
|
|
|
|
builder.getAffineSymbolExpr(std::distance(syms.begin(), symIt));
|
|
|
|
|
} else {
|
|
|
|
|
// This operand is neither a dimension nor a symbol. Add it as a new
|
|
|
|
|
// symbol.
|
|
|
|
|
replacement = builder.getAffineSymbolExpr(numSymbols++);
|
|
|
|
|
if (newSyms)
|
|
|
|
|
newSyms->push_back(operand.value());
|
|
|
|
|
}
|
|
|
|
|
// Add to corresponding replacements vector.
|
|
|
|
|
if (operand.index() < map.getNumDims()) {
|
|
|
|
|
dimReplacements[operand.index()] = replacement;
|
|
|
|
|
} else {
|
|
|
|
|
symReplacements[operand.index() - map.getNumDims()] = replacement;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return map.replaceDimsAndSymbols(dimReplacements, symReplacements,
|
|
|
|
|
dims.size(), numSymbols);
|
|
|
|
|
}
|