mirror of
https://github.com/intel/llvm.git
synced 2026-02-08 17:28:30 +08:00
[mlir][vector] Remove ExtractMap/InsertMap operations
As discussed on discourse: https://discourse.llvm.org/t/vector-vector-distribution-large-vector-to-small-vector/1983/22 removing insert_map/extract_map op as vector distribution now uses warp_execute_on_lane_0 op. Differential Revision: https://reviews.llvm.org/D134000
This commit is contained in:
@@ -119,11 +119,6 @@ void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
|
||||
bool force32BitVectorIndices,
|
||||
PatternBenefit benefit = 1);
|
||||
|
||||
/// Collect a set of patterns to propagate insert_map/extract_map in the ssa
|
||||
/// chain.
|
||||
void populatePropagateVectorDistributionPatterns(RewritePatternSet &patterns,
|
||||
PatternBenefit benefit = 1);
|
||||
|
||||
/// Collects patterns to progressively lower vector.broadcast ops on high-D
|
||||
/// vectors to low-D vector ops.
|
||||
void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns,
|
||||
|
||||
@@ -599,75 +599,6 @@ def Vector_ExtractOp :
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def Vector_ExtractMapOp :
|
||||
Vector_Op<"extract_map", [NoSideEffect]>,
|
||||
Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
|
||||
Results<(outs AnyVector)> {
|
||||
let summary = "vector extract map operation";
|
||||
let description = [{
|
||||
Takes an N-D vector and extracts a sub-part of the vector starting at id
|
||||
along each dimension.
|
||||
|
||||
The dimension associated to each element of `ids` used to extract are
|
||||
implicitly deduced from the destination type. For each dimension the
|
||||
multiplicity is the destination dimension size divided by the source
|
||||
dimension size, each dimension with a multiplicity greater than 1 is
|
||||
associated to the next id, following ids order.
|
||||
For example if the source type is `vector<64x4x32xf32>` and the destination
|
||||
type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
|
||||
id to dimension 2.
|
||||
|
||||
Similarly to vector.tuple_get, this operation is used for progressive
|
||||
lowering and should be folded away before converting to LLVM.
|
||||
|
||||
It is different than `vector.extract_slice` and
|
||||
`vector.extract_strided_slice` as it takes a Value as index instead of an
|
||||
attribute. Also in the future it is meant to support extracting along any
|
||||
dimensions and not only the most major ones.
|
||||
|
||||
For instance:
|
||||
```
|
||||
// dynamic computation producing the value 0 of index type
|
||||
%idx0 = ... : index
|
||||
// dynamic computation producing the value 1 of index type
|
||||
%idx1 = ... : index
|
||||
%0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
|
||||
// extracts values [0, 1]
|
||||
%1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
|
||||
// extracts values [1, 2]
|
||||
%2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
|
||||
%ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
|
||||
to vector<4x4x2xf32>
|
||||
```
|
||||
}];
|
||||
let builders = [
|
||||
OpBuilder<(ins "Value":$vector, "ValueRange":$ids,
|
||||
"ArrayRef<int64_t>":$multiplicity,
|
||||
"AffineMap":$map)>];
|
||||
let extraClassDeclaration = [{
|
||||
VectorType getSourceVectorType() {
|
||||
return getVector().getType().cast<VectorType>();
|
||||
}
|
||||
VectorType getResultType() {
|
||||
return getResult().getType().cast<VectorType>();
|
||||
}
|
||||
void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
|
||||
AffineMap map();
|
||||
}];
|
||||
let assemblyFormat = [{
|
||||
$vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
|
||||
}];
|
||||
|
||||
let hasFolder = 1;
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def Vector_FMAOp :
|
||||
Op<Vector_Dialect, "fma", [
|
||||
NoSideEffect, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
|
||||
@@ -790,72 +721,6 @@ def Vector_InsertOp :
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def Vector_InsertMapOp :
|
||||
Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
|
||||
Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
|
||||
Results<(outs AnyVector:$result)> {
|
||||
let summary = "vector insert map operation";
|
||||
let description = [{
|
||||
Inserts a N-D vector and within a larger vector starting at id. The new
|
||||
vector created will have the same size as the destination operand vector.
|
||||
|
||||
The dimension associated to each element of `ids` used to insert is
|
||||
implicitly deduced from the source type (see `ExtractMapOp` for details).
|
||||
For example if source type is `vector<4x4x2xf32>` and the destination type
|
||||
is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
|
||||
to dimension 2.
|
||||
|
||||
Similarly to vector.tuple_get, this operation is used for progressive
|
||||
lowering and should be folded away before converting to LLVM.
|
||||
|
||||
It is different than `vector.insert` and `vector.insert_strided_slice` as it
|
||||
takes a Value as index instead of an attribute. Also in the future it is
|
||||
meant to support inserting along any dimensions and not only the most major
|
||||
ones.
|
||||
|
||||
This operations is meant to be used in combination with vector.extract_map.
|
||||
|
||||
For instance:
|
||||
```
|
||||
// dynamic computation producing the value 0 of index type
|
||||
%idx0 = ... : index
|
||||
// dynamic computation producing the value 1 of index type
|
||||
%idx1 = ... : index /
|
||||
%0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
|
||||
// extracts values [0, 1]
|
||||
%1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
|
||||
// extracts values [1, 2]
|
||||
%2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
|
||||
// insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
|
||||
%3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
|
||||
// insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
|
||||
%4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
|
||||
```
|
||||
Example:
|
||||
|
||||
```mlir
|
||||
%v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
|
||||
%v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
|
||||
into vector<64x4x32xf32>
|
||||
```
|
||||
}];
|
||||
let extraClassDeclaration = [{
|
||||
VectorType getSourceVectorType() {
|
||||
return getVector().getType().cast<VectorType>();
|
||||
}
|
||||
VectorType getResultType() {
|
||||
return getResult().getType().cast<VectorType>();
|
||||
}
|
||||
// Return a map indicating the dimension mapping to the given ids.
|
||||
AffineMap map();
|
||||
}];
|
||||
let assemblyFormat = [{
|
||||
$vector `,` $dest `[` $ids `]` attr-dict
|
||||
`:` type($vector) `into` type($result)
|
||||
}];
|
||||
let hasVerifier = 1;
|
||||
}
|
||||
|
||||
def Vector_InsertStridedSliceOp :
|
||||
Vector_Op<"insert_strided_slice", [NoSideEffect,
|
||||
PredOpTrait<"operand #0 and result have same element type",
|
||||
|
||||
@@ -65,28 +65,6 @@ LogicalResult splitFullAndPartialTransfer(
|
||||
VectorTransformsOptions options = VectorTransformsOptions(),
|
||||
scf::IfOp *ifOp = nullptr);
|
||||
|
||||
struct DistributeOps {
|
||||
ExtractMapOp extract;
|
||||
InsertMapOp insert;
|
||||
};
|
||||
|
||||
/// Distribute a N-D vector pointwise operation over a range of given ids taking
|
||||
/// *all* values in [0 .. multiplicity - 1] (e.g. loop induction variable or
|
||||
/// SPMD id). This transformation only inserts
|
||||
/// vector.extract_map/vector.insert_map. It is meant to be used with
|
||||
/// canonicalizations pattern to propagate and fold the vector
|
||||
/// insert_map/extract_map operations.
|
||||
/// Transforms:
|
||||
// %v = arith.addf %a, %b : vector<32xf32>
|
||||
/// to:
|
||||
/// %v = arith.addf %a, %b : vector<32xf32>
|
||||
/// %ev = vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32>
|
||||
/// %nv = vector.insert_map %ev, %id, 32 : vector<1xf32> into vector<32xf32>
|
||||
Optional<DistributeOps>
|
||||
distributPointwiseVectorOp(OpBuilder &builder, Operation *op,
|
||||
ArrayRef<Value> id, ArrayRef<int64_t> multiplicity,
|
||||
const AffineMap &map);
|
||||
|
||||
/// Implements transfer op write to read forwarding and dead transfer write
|
||||
/// optimizations.
|
||||
void transferOpflowOpt(Operation *rootOp);
|
||||
|
||||
@@ -1630,81 +1630,6 @@ static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
|
||||
results.push_back(attr.cast<IntegerAttr>().getInt());
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ExtractMapOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void ExtractMapOp::build(OpBuilder &builder, OperationState &result,
|
||||
Value vector, ValueRange ids,
|
||||
ArrayRef<int64_t> multiplicity,
|
||||
AffineMap permutationMap) {
|
||||
assert(ids.size() == multiplicity.size() &&
|
||||
ids.size() == permutationMap.getNumResults());
|
||||
assert(permutationMap.isProjectedPermutation());
|
||||
VectorType type = vector.getType().cast<VectorType>();
|
||||
SmallVector<int64_t, 4> newShape(type.getShape().begin(),
|
||||
type.getShape().end());
|
||||
for (unsigned i = 0, e = permutationMap.getNumResults(); i < e; i++) {
|
||||
AffineExpr expr = permutationMap.getResult(i);
|
||||
auto dim = expr.cast<AffineDimExpr>();
|
||||
newShape[dim.getPosition()] = newShape[dim.getPosition()] / multiplicity[i];
|
||||
}
|
||||
VectorType resultType = VectorType::get(newShape, type.getElementType());
|
||||
ExtractMapOp::build(builder, result, resultType, vector, ids);
|
||||
}
|
||||
|
||||
LogicalResult ExtractMapOp::verify() {
|
||||
if (getSourceVectorType().getRank() != getResultType().getRank())
|
||||
return emitOpError("expected source and destination vectors of same rank");
|
||||
unsigned numId = 0;
|
||||
for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; ++i) {
|
||||
if (getSourceVectorType().getDimSize(i) % getResultType().getDimSize(i) !=
|
||||
0)
|
||||
return emitOpError("source vector dimensions must be a multiple of "
|
||||
"destination vector dimensions");
|
||||
if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
|
||||
numId++;
|
||||
}
|
||||
if (numId != getIds().size())
|
||||
return emitOpError("expected number of ids must match the number of "
|
||||
"dimensions distributed");
|
||||
return success();
|
||||
}
|
||||
|
||||
OpFoldResult ExtractMapOp::fold(ArrayRef<Attribute> operands) {
|
||||
auto insert = getVector().getDefiningOp<vector::InsertMapOp>();
|
||||
if (insert == nullptr || getType() != insert.getVector().getType() ||
|
||||
getIds() != insert.getIds())
|
||||
return {};
|
||||
return insert.getVector();
|
||||
}
|
||||
|
||||
void ExtractMapOp::getMultiplicity(SmallVectorImpl<int64_t> &multiplicity) {
|
||||
assert(multiplicity.empty());
|
||||
for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; i++) {
|
||||
if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
|
||||
multiplicity.push_back(getSourceVectorType().getDimSize(i) /
|
||||
getResultType().getDimSize(i));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename MapOp>
|
||||
AffineMap calculateImplicitMap(MapOp op) {
|
||||
SmallVector<AffineExpr, 4> perm;
|
||||
// Check which dimension have a multiplicity greater than 1 and associated
|
||||
// them to the IDs in order.
|
||||
for (unsigned i = 0, e = op.getSourceVectorType().getRank(); i < e; i++) {
|
||||
if (op.getSourceVectorType().getDimSize(i) !=
|
||||
op.getResultType().getDimSize(i))
|
||||
perm.push_back(getAffineDimExpr(i, op.getContext()));
|
||||
}
|
||||
auto map = AffineMap::get(op.getSourceVectorType().getRank(), 0, perm,
|
||||
op.getContext());
|
||||
return map;
|
||||
}
|
||||
|
||||
AffineMap ExtractMapOp::map() { return calculateImplicitMap(*this); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FmaOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -2133,30 +2058,6 @@ OpFoldResult vector::InsertOp::fold(ArrayRef<Attribute> operands) {
|
||||
return {};
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// InsertMapOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
LogicalResult InsertMapOp::verify() {
|
||||
if (getSourceVectorType().getRank() != getResultType().getRank())
|
||||
return emitOpError("expected source and destination vectors of same rank");
|
||||
unsigned numId = 0;
|
||||
for (unsigned i = 0, e = getResultType().getRank(); i < e; i++) {
|
||||
if (getResultType().getDimSize(i) % getSourceVectorType().getDimSize(i) !=
|
||||
0)
|
||||
return emitOpError(
|
||||
"destination vector size must be a multiple of source vector size");
|
||||
if (getResultType().getDimSize(i) != getSourceVectorType().getDimSize(i))
|
||||
numId++;
|
||||
}
|
||||
if (numId != getIds().size())
|
||||
return emitOpError("expected number of ids must match the number of "
|
||||
"dimensions distributed");
|
||||
return success();
|
||||
}
|
||||
|
||||
AffineMap InsertMapOp::map() { return calculateImplicitMap(*this); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// InsertStridedSliceOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRVectorTransforms
|
||||
VectorTransferSplitRewritePatterns.cpp
|
||||
VectorTransferPermutationMapRewritePatterns.cpp
|
||||
VectorTransforms.cpp
|
||||
VectorUnrollDistribute.cpp
|
||||
VectorUnroll.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector/Transforms
|
||||
|
||||
@@ -1998,37 +1998,6 @@ ContractionOpLowering::lowerReduction(vector::ContractionOp op,
|
||||
|
||||
} // namespace mlir
|
||||
|
||||
Optional<mlir::vector::DistributeOps> mlir::vector::distributPointwiseVectorOp(
|
||||
OpBuilder &builder, Operation *op, ArrayRef<Value> ids,
|
||||
ArrayRef<int64_t> multiplicity, const AffineMap &map) {
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPointAfter(op);
|
||||
Location loc = op->getLoc();
|
||||
if (op->getNumResults() != 1)
|
||||
return {};
|
||||
Value result = op->getResult(0);
|
||||
VectorType type = op->getResult(0).getType().dyn_cast<VectorType>();
|
||||
if (!type || map.getNumResults() != multiplicity.size())
|
||||
return {};
|
||||
// For each dimension being distributed check that the size is a multiple of
|
||||
// the multiplicity. To handle more sizes we would need to support masking.
|
||||
unsigned multiplictyCount = 0;
|
||||
for (auto exp : map.getResults()) {
|
||||
auto affinExp = exp.dyn_cast<AffineDimExpr>();
|
||||
if (!affinExp || affinExp.getPosition() >= type.getRank() ||
|
||||
type.getDimSize(affinExp.getPosition()) %
|
||||
multiplicity[multiplictyCount++] !=
|
||||
0)
|
||||
return {};
|
||||
}
|
||||
DistributeOps ops;
|
||||
ops.extract =
|
||||
builder.create<vector::ExtractMapOp>(loc, result, ids, multiplicity, map);
|
||||
ops.insert =
|
||||
builder.create<vector::InsertMapOp>(loc, ops.extract, result, ids);
|
||||
return ops;
|
||||
}
|
||||
|
||||
/// Progressive lowering of transfer_read. This pattern supports lowering of
|
||||
/// `vector.transfer_read` to a combination of `vector.load` and
|
||||
/// `vector.broadcast` if all of the following hold:
|
||||
|
||||
@@ -538,202 +538,6 @@ private:
|
||||
vector::UnrollVectorOptions options;
|
||||
};
|
||||
|
||||
/// Canonicalize an extract_map using the result of a pointwise operation.
|
||||
/// Transforms:
|
||||
/// %v = arith.addf %a, %b : vector32xf32>
|
||||
/// %dv = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
|
||||
/// to:
|
||||
/// %da = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
|
||||
/// %db = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
|
||||
/// %dv = arith.addf %da, %db : vector<1xf32>
|
||||
struct PointwiseExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
|
||||
using OpRewritePattern::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
|
||||
PatternRewriter &rewriter) const override {
|
||||
Operation *definedOp = extract.getVector().getDefiningOp();
|
||||
if (!definedOp || !OpTrait::hasElementwiseMappableTraits(definedOp) ||
|
||||
definedOp->getNumResults() != 1)
|
||||
return failure();
|
||||
Location loc = extract.getLoc();
|
||||
SmallVector<Value, 4> extractOperands;
|
||||
for (OpOperand &operand : definedOp->getOpOperands()) {
|
||||
auto vecType = operand.get().getType().template dyn_cast<VectorType>();
|
||||
if (!vecType) {
|
||||
extractOperands.push_back(operand.get());
|
||||
continue;
|
||||
}
|
||||
extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
|
||||
loc,
|
||||
VectorType::get(extract.getResultType().getShape(),
|
||||
vecType.getElementType()),
|
||||
operand.get(), extract.getIds()));
|
||||
}
|
||||
Operation *newOp = cloneOpWithOperandsAndTypes(
|
||||
rewriter, loc, definedOp, extractOperands, extract.getResultType());
|
||||
rewriter.replaceOp(extract, newOp->getResult(0));
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
/// Canonicalize an extract_map using the result of a contract operation.
|
||||
/// This propagate the extract_map to operands.
|
||||
struct ContractExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
|
||||
using OpRewritePattern::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
|
||||
PatternRewriter &rewriter) const override {
|
||||
Operation *definedOp = extract.getVector().getDefiningOp();
|
||||
auto contract = dyn_cast_or_null<vector::ContractionOp>(definedOp);
|
||||
if (!contract)
|
||||
return failure();
|
||||
Location loc = contract.getLoc();
|
||||
unsigned accIndex = vector::ContractionOp::getAccOperandIndex();
|
||||
AffineMap affineMap = contract.getIndexingMapsArray()[accIndex];
|
||||
// Create a map of the dimensions distributed based on the acc affine map.
|
||||
// Only parallel dimensions are being distributed, reduction dimensions are
|
||||
// untouched.
|
||||
DenseMap<int64_t, int64_t> map;
|
||||
for (unsigned i : llvm::seq(unsigned(0), affineMap.getNumResults()))
|
||||
map[affineMap.getDimPosition(i)] = extract.getResultType().getDimSize(i);
|
||||
SmallVector<Value, 4> extractOperands;
|
||||
for (const auto &it : llvm::enumerate(contract.getIndexingMapsArray())) {
|
||||
// For each operands calculate the new vector type after distribution.
|
||||
Value operand = contract->getOperand(it.index());
|
||||
auto vecType = operand.getType().cast<VectorType>();
|
||||
SmallVector<int64_t> operandShape(vecType.getShape().begin(),
|
||||
vecType.getShape().end());
|
||||
for (unsigned i : llvm::seq(unsigned(0), it.value().getNumResults())) {
|
||||
unsigned dim = it.value().getDimPosition(i);
|
||||
auto distributedDim = map.find(dim);
|
||||
// If the dimension is not in the map it means it is a reduction and
|
||||
// doesn't get distributed.
|
||||
if (distributedDim == map.end())
|
||||
continue;
|
||||
operandShape[i] = distributedDim->second;
|
||||
}
|
||||
VectorType newVecType =
|
||||
VectorType::get(operandShape, vecType.getElementType());
|
||||
extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
|
||||
loc, newVecType, operand, extract.getIds()));
|
||||
}
|
||||
Operation *newOp =
|
||||
cloneOpWithOperandsAndTypes(rewriter, loc, definedOp, extractOperands,
|
||||
extract.getResult().getType());
|
||||
rewriter.replaceOp(extract, newOp->getResult(0));
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
/// Converts TransferRead op used by ExtractMap op into a smaller dimension
|
||||
/// TransferRead.
|
||||
/// Example:
|
||||
/// ```
|
||||
/// %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0:
|
||||
/// memref<64x64x64xf32>, vector<64x4x32xf32>
|
||||
/// %e = vector.extract_map %a[%id] : vector<64x4x32xf32> to vector<2x4x1xf32>
|
||||
/// ```
|
||||
/// to:
|
||||
/// ```
|
||||
/// %id1 = affine.apply affine_map<()[s0] -> (s0 * 2)> (%id)
|
||||
/// %e = vector.transfer_read %A[%id1, %c0, %id1], %cf0 :
|
||||
/// memref<64x64x64xf32>, vector<2x4x1xf32>
|
||||
/// ```
|
||||
struct TransferReadExtractPattern
|
||||
: public OpRewritePattern<vector::TransferReadOp> {
|
||||
using OpRewritePattern::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(vector::TransferReadOp read,
|
||||
PatternRewriter &rewriter) const override {
|
||||
// TODO: support 0-d corner case.
|
||||
if (read.getTransferRank() == 0)
|
||||
return failure();
|
||||
|
||||
if (!read.getResult().hasOneUse())
|
||||
return failure();
|
||||
auto extract =
|
||||
dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());
|
||||
if (!extract)
|
||||
return failure();
|
||||
if (read.getMask())
|
||||
return failure();
|
||||
|
||||
SmallVector<Value, 4> indices(read.getIndices().begin(),
|
||||
read.getIndices().end());
|
||||
AffineMap indexMap = extract.map().compose(read.getPermutationMap());
|
||||
unsigned idCount = 0;
|
||||
ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
|
||||
for (auto it :
|
||||
llvm::zip(indexMap.getResults(), extract.map().getResults())) {
|
||||
AffineExpr d0, d1;
|
||||
bindDims(read.getContext(), d0, d1);
|
||||
auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
|
||||
if (!indexExpr)
|
||||
continue;
|
||||
unsigned indexPos = indexExpr.getPosition();
|
||||
unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
|
||||
auto scale = getAffineConstantExpr(
|
||||
extract.getResultType().getDimSize(vectorPos), read.getContext());
|
||||
indices[indexPos] = makeComposedAffineApply(
|
||||
rewriter, read.getLoc(), d0 + scale * d1,
|
||||
{indices[indexPos], extract.getIds()[idCount++]});
|
||||
}
|
||||
Value newRead = lb.create<vector::TransferReadOp>(
|
||||
extract.getType(), read.getSource(), indices,
|
||||
read.getPermutationMapAttr(), read.getPadding(), read.getMask(),
|
||||
read.getInBoundsAttr());
|
||||
Value dest = lb.create<arith::ConstantOp>(
|
||||
read.getType(), rewriter.getZeroAttr(read.getType()));
|
||||
newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.getIds());
|
||||
rewriter.replaceOp(read, newRead);
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct TransferWriteInsertPattern
|
||||
: public OpRewritePattern<vector::TransferWriteOp> {
|
||||
using OpRewritePattern::OpRewritePattern;
|
||||
|
||||
LogicalResult matchAndRewrite(vector::TransferWriteOp write,
|
||||
PatternRewriter &rewriter) const override {
|
||||
// TODO: support 0-d corner case.
|
||||
if (write.getTransferRank() == 0)
|
||||
return failure();
|
||||
|
||||
auto insert = write.getVector().getDefiningOp<vector::InsertMapOp>();
|
||||
if (!insert)
|
||||
return failure();
|
||||
if (write.getMask())
|
||||
return failure();
|
||||
SmallVector<Value, 4> indices(write.getIndices().begin(),
|
||||
write.getIndices().end());
|
||||
AffineMap indexMap = insert.map().compose(write.getPermutationMap());
|
||||
unsigned idCount = 0;
|
||||
Location loc = write.getLoc();
|
||||
for (auto it :
|
||||
llvm::zip(indexMap.getResults(), insert.map().getResults())) {
|
||||
AffineExpr d0, d1;
|
||||
bindDims(write.getContext(), d0, d1);
|
||||
auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
|
||||
if (!indexExpr)
|
||||
continue;
|
||||
unsigned indexPos = indexExpr.getPosition();
|
||||
unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
|
||||
auto scale = getAffineConstantExpr(
|
||||
insert.getSourceVectorType().getDimSize(vectorPos),
|
||||
write.getContext());
|
||||
indices[indexPos] = makeComposedAffineApply(
|
||||
rewriter, loc, d0 + scale * d1,
|
||||
{indices[indexPos], insert.getIds()[idCount++]});
|
||||
}
|
||||
rewriter.create<vector::TransferWriteOp>(
|
||||
loc, insert.getVector(), write.getSource(), indices,
|
||||
write.getPermutationMapAttr(), write.getInBoundsAttr());
|
||||
rewriter.eraseOp(write);
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct UnrollReductionPattern : public OpRewritePattern<vector::ReductionOp> {
|
||||
UnrollReductionPattern(MLIRContext *context,
|
||||
const vector::UnrollVectorOptions &options,
|
||||
@@ -841,10 +645,3 @@ void mlir::vector::populateVectorUnrollPatterns(
|
||||
UnrollReductionPattern, UnrollMultiReductionPattern,
|
||||
UnrollTranposePattern>(patterns.getContext(), options, benefit);
|
||||
}
|
||||
|
||||
void mlir::vector::populatePropagateVectorDistributionPatterns(
|
||||
RewritePatternSet &patterns, PatternBenefit benefit) {
|
||||
patterns.add<PointwiseExtractPattern, ContractExtractPattern,
|
||||
TransferReadExtractPattern, TransferWriteInsertPattern>(
|
||||
patterns.getContext(), benefit);
|
||||
}
|
||||
@@ -1470,48 +1470,6 @@ func.func @compress_memref_mismatch(%base: memref<?x?xf32>, %mask: vector<16xi1>
|
||||
|
||||
// -----
|
||||
|
||||
func.func @extract_map_rank(%v: vector<32xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.extract_map' op expected source and destination vectors of same rank}}
|
||||
%0 = vector.extract_map %v[%id] : vector<32xf32> to vector<2x1xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @extract_map_size(%v: vector<63xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.extract_map' op source vector dimensions must be a multiple of destination vector dimensions}}
|
||||
%0 = vector.extract_map %v[%id] : vector<63xf32> to vector<2xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @extract_map_id(%v: vector<2x32xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.extract_map' op expected number of ids must match the number of dimensions distributed}}
|
||||
%0 = vector.extract_map %v[%id] : vector<2x32xf32> to vector<1x1xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @insert_map_rank(%v: vector<2x1xf32>, %v1: vector<32xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.insert_map' op expected source and destination vectors of same rank}}
|
||||
%0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @insert_map_size(%v: vector<3xf32>, %v1: vector<64xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.insert_map' op destination vector size must be a multiple of source vector size}}
|
||||
%0 = vector.insert_map %v, %v1[%id] : vector<3xf32> into vector<64xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @insert_map_id(%v: vector<2x1xf32>, %v1: vector<4x32xf32>, %id : index) {
|
||||
// expected-error@+1 {{'vector.insert_map' op expected number of ids must match the number of dimensions distributed}}
|
||||
%0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<4x32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @scan_reduction_dim_constraint(%arg0: vector<2x3xi32>, %arg1: vector<3xi32>) -> vector<3xi32> {
|
||||
// expected-error@+1 {{'vector.scan' op reduction dimension 5 has to be less than 2}}
|
||||
%0:2 = vector.scan <add>, %arg0, %arg1 {inclusive = true, reduction_dim = 5} :
|
||||
|
||||
@@ -754,21 +754,6 @@ func.func @expand_and_compress2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @extract_insert_map
|
||||
func.func @extract_insert_map(%v: vector<32xf32>, %v2: vector<16x32xf32>,
|
||||
%id0 : index, %id1 : index) -> (vector<32xf32>, vector<16x32xf32>) {
|
||||
// CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}}] : vector<32xf32> to vector<2xf32>
|
||||
%vd = vector.extract_map %v[%id0] : vector<32xf32> to vector<2xf32>
|
||||
// CHECK: %[[V1:.*]] = vector.extract_map %{{.*}}[%{{.*}}, %{{.*}}] : vector<16x32xf32> to vector<4x2xf32>
|
||||
%vd2 = vector.extract_map %v2[%id0, %id1] : vector<16x32xf32> to vector<4x2xf32>
|
||||
// CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}[%{{.*}}] : vector<2xf32> into vector<32xf32>
|
||||
%r = vector.insert_map %vd, %v[%id0] : vector<2xf32> into vector<32xf32>
|
||||
// CHECK: %[[R1:.*]] = vector.insert_map %[[V1]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x2xf32> into vector<16x32xf32>
|
||||
%r2 = vector.insert_map %vd2, %v2[%id0, %id1] : vector<4x2xf32> into vector<16x32xf32>
|
||||
// CHECK: return %[[R]], %[[R1]] : vector<32xf32>, vector<16x32xf32>
|
||||
return %r, %r2 : vector<32xf32>, vector<16x32xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @multi_reduction
|
||||
func.func @multi_reduction(%0: vector<4x8x16x32xf32>, %acc0: vector<4x16xf32>,
|
||||
%acc1: f32) -> f32 {
|
||||
|
||||
@@ -1,204 +0,0 @@
|
||||
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,1,32 -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,4 -split-input-file | FileCheck %s --check-prefix=CHECK2D
|
||||
|
||||
// CHECK-LABEL: func @distribute_vector_add
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<32xf32>
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
|
||||
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
|
||||
// CHECK-NEXT: return %[[INS]] : vector<32xf32>
|
||||
func.func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
|
||||
%0 = arith.addf %A, %B : vector<32xf32>
|
||||
return %0: vector<32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @distribute_vector_add_exp
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK-NEXT: %[[EXPV:.*]] = math.exp %{{.*}} : vector<32xf32>
|
||||
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %[[EXPV]], %{{.*}} : vector<32xf32>
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
|
||||
// CHECK-NEXT: %[[EXC:.*]] = math.exp %[[EXA]] : vector<1xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXC]], %[[EXB]] : vector<1xf32>
|
||||
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
|
||||
// CHECK-NEXT: return %[[INS]] : vector<32xf32>
|
||||
func.func @distribute_vector_add_exp(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
|
||||
%C = math.exp %A : vector<32xf32>
|
||||
%0 = arith.addf %C, %B : vector<32xf32>
|
||||
return %0: vector<32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @vector_add_read_write
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
|
||||
// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
|
||||
// CHECK-NEXT: %[[EXC:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
|
||||
// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[EXC]] : vector<1xf32>
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] {{.*}} : vector<1xf32>, memref<32xf32>
|
||||
// CHECK-NEXT: return
|
||||
func.func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>
|
||||
%b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>
|
||||
%acc = arith.addf %a, %b: vector<32xf32>
|
||||
%c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>
|
||||
%d = arith.addf %acc, %c: vector<32xf32>
|
||||
vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
|
||||
|
||||
// CHECK: func @vector_add_cycle
|
||||
// CHECK-SAME: (%[[ID:.*]]: index
|
||||
// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2xf32>
|
||||
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] {{.*}} : vector<2xf32>, memref<64xf32>
|
||||
// CHECK-NEXT: return
|
||||
func.func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>
|
||||
%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>
|
||||
%acc = arith.addf %a, %b: vector<64xf32>
|
||||
vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Negative test to make sure nothing is done in case the vector size is not a
|
||||
// multiple of multiplicity.
|
||||
// CHECK-LABEL: func @vector_negative_test
|
||||
// CHECK: %[[C0:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<16xf32>
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]]] {{.*}} : vector<16xf32>, memref<64xf32>
|
||||
// CHECK-NEXT: return
|
||||
func.func @vector_negative_test(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<16xf32>
|
||||
%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<16xf32>
|
||||
%acc = arith.addf %a, %b: vector<16xf32>
|
||||
vector.transfer_write %acc, %C[%c0]: vector<16xf32>, memref<64xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @distribute_vector_add_3d
|
||||
// CHECK-SAME: (%[[ID0:.*]]: index, %[[ID1:.*]]: index
|
||||
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<64x4x32xf32>
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID0]], %[[ID1]]] : vector<2x4x1xf32> into vector<64x4x32xf32>
|
||||
// CHECK-NEXT: return %[[INS]] : vector<64x4x32xf32>
|
||||
func.func @distribute_vector_add_3d(%id0 : index, %id1 : index,
|
||||
%A: vector<64x4x32xf32>, %B: vector<64x4x32xf32>) -> vector<64x4x32xf32> {
|
||||
%0 = arith.addf %A, %B : vector<64x4x32xf32>
|
||||
return %0: vector<64x4x32xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
|
||||
|
||||
// CHECK: func @vector_add_transfer_3d
|
||||
// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
|
||||
// CHECK: %[[C0:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]], %[[C0]], %[[ID_1]]] {{.*}} : vector<2x4x1xf32>, memref<64x64x64xf32>
|
||||
// CHECK-NEXT: return
|
||||
func.func @vector_add_transfer_3d(%id0 : index, %id1 : index, %A: memref<64x64x64xf32>,
|
||||
%B: memref<64x64x64xf32>, %C: memref<64x64x64xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
|
||||
%b = vector.transfer_read %B[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
|
||||
%acc = arith.addf %a, %b: vector<64x4x32xf32>
|
||||
vector.transfer_write %acc, %C[%c0, %c0, %c0]: vector<64x4x32xf32>, memref<64x64x64xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
#map0 = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
|
||||
#map1 = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
|
||||
#map2 = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
|
||||
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
|
||||
|
||||
// CHECK: func @vector_add_transfer_permutation
|
||||
// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
|
||||
// CHECK: %[[C0:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
|
||||
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[ID2]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID_0]], %[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP2]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
|
||||
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
|
||||
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]], %[[ID_1]], %[[C0]], %[[ID3]]] {permutation_map = #[[MAP3]]} : vector<2x4x1xf32>, memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: return
|
||||
func.func @vector_add_transfer_permutation(%id0 : index, %id1 : index, %A: memref<?x?x?x?xf32>,
|
||||
%B: memref<?x?x?x?xf32>, %C: memref<?x?x?x?xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map0} : memref<?x?x?x?xf32>, vector<64x4x32xf32>
|
||||
%b = vector.transfer_read %B[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map1}: memref<?x?x?x?xf32>, vector<64x4x32xf32>
|
||||
%acc = arith.addf %a, %b: vector<64x4x32xf32>
|
||||
vector.transfer_write %acc, %C[%c0, %c0, %c0, %c0] {permutation_map = #map2}: vector<64x4x32xf32>, memref<?x?x?x?xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK2D-LABEL: vector_add_contract
|
||||
// CHECK2D: %[[A:.+]] = vector.transfer_read %arg2[%0, %c0], %cst : memref<?x?xf32>, vector<2x4xf32>
|
||||
// CHECK2D: %[[B:.+]] = vector.transfer_read %arg3[%2, %c0], %cst : memref<?x?xf32>, vector<16x4xf32>
|
||||
// CHECK2D: %[[C:.+]] = vector.transfer_read %arg4[%4, %5], %cst : memref<?x?xf32>, vector<2x16xf32>
|
||||
// CHECK2D: %[[E:.+]] = vector.transfer_read %arg5[%7, %8], %cst : memref<?x?xf32>, vector<2x16xf32>
|
||||
// CHECK2D: %[[D:.+]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]] : vector<2x4xf32>, vector<16x4xf32> into vector<2x16xf32>
|
||||
// CHECK2D: %[[R:.+]] = arith.addf %[[D]], %[[E]] : vector<2x16xf32>
|
||||
// CHECK2D: vector.transfer_write %[[R]], {{.*}} : vector<2x16xf32>, memref<?x?xf32>
|
||||
func.func @vector_add_contract(%id0 : index, %id1 : index, %A: memref<?x?xf32>,
|
||||
%B: memref<?x?xf32>, %C: memref<?x?xf32>, %D: memref<?x?xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%a = vector.transfer_read %A[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
|
||||
%b = vector.transfer_read %B[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
|
||||
%c = vector.transfer_read %C[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
|
||||
%d = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
|
||||
affine_map<(d0, d1, d2) -> (d1, d2)>,
|
||||
affine_map<(d0, d1, d2) -> (d0, d1)>],
|
||||
iterator_types = ["parallel", "parallel", "reduction"],
|
||||
kind = #vector.kind<add>}
|
||||
%a, %b, %c : vector<64x4xf32>, vector<64x4xf32> into vector<64x64xf32>
|
||||
%e = vector.transfer_read %D[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
|
||||
%r = arith.addf %d, %e : vector<64x64xf32>
|
||||
vector.transfer_write %r, %C[%c0, %c0] : vector<64x64xf32>, memref<?x?xf32>
|
||||
return
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
|
||||
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
|
||||
// RUN: -entry-point-result=void \
|
||||
// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
|
||||
// RUN: FileCheck %s
|
||||
|
||||
// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
|
||||
|
||||
|
||||
func.func private @printMemrefF32(memref<*xf32>)
|
||||
|
||||
func.func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%0 = memref.alloc(%arg0) : memref<?xf32>
|
||||
scf.for %arg2 = %c0 to %arg0 step %c1 {
|
||||
%tmp = arith.index_cast %arg2 : index to i32
|
||||
%tmp1 = arith.sitofp %tmp : i32 to f32
|
||||
%tmp2 = arith.addf %tmp1, %arg1 : f32
|
||||
memref.store %tmp2, %0[%arg2] : memref<?xf32>
|
||||
}
|
||||
return %0 : memref<?xf32>
|
||||
}
|
||||
|
||||
// Large vector addf that can be broken down into a loop of smaller vector addf.
|
||||
func.func @main() {
|
||||
%cf0 = arith.constant 0.0 : f32
|
||||
%cf1 = arith.constant 1.0 : f32
|
||||
%cf2 = arith.constant 2.0 : f32
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%c32 = arith.constant 32 : index
|
||||
%c64 = arith.constant 64 : index
|
||||
%out = memref.alloc(%c64) : memref<?xf32>
|
||||
%in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
|
||||
%in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
|
||||
// Check that the tansformatio correctly happened.
|
||||
// TRANSFORM: scf.for
|
||||
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
|
||||
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
|
||||
// TRANSFORM: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : vector<2xf32>
|
||||
// TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
|
||||
// TRANSFORM: }
|
||||
%a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
|
||||
%acc = arith.addf %a, %b: vector<64xf32>
|
||||
vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
|
||||
%converted = memref.cast %out : memref<?xf32> to memref<*xf32>
|
||||
call @printMemrefF32(%converted): (memref<*xf32>) -> ()
|
||||
// CHECK: Unranked{{.*}}data =
|
||||
// CHECK: [
|
||||
// CHECK-SAME: 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
|
||||
// CHECK-SAME: 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,
|
||||
// CHECK-SAME: 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75,
|
||||
// CHECK-SAME: 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,
|
||||
// CHECK-SAME: 101, 103, 105, 107, 109, 111, 113, 115, 117, 119,
|
||||
// CHECK-SAME: 121, 123, 125, 127, 129]
|
||||
memref.dealloc %out : memref<?xf32>
|
||||
memref.dealloc %in1 : memref<?xf32>
|
||||
memref.dealloc %in2 : memref<?xf32>
|
||||
return
|
||||
}
|
||||
@@ -364,126 +364,6 @@ struct TestVectorUnrollingPatterns
|
||||
llvm::cl::init(false)};
|
||||
};
|
||||
|
||||
struct TestVectorDistributePatterns
|
||||
: public PassWrapper<TestVectorDistributePatterns,
|
||||
OperationPass<func::FuncOp>> {
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorDistributePatterns)
|
||||
|
||||
StringRef getArgument() const final {
|
||||
return "test-vector-distribute-patterns";
|
||||
}
|
||||
StringRef getDescription() const final {
|
||||
return "Test lowering patterns to distribute vector ops in the vector "
|
||||
"dialect";
|
||||
}
|
||||
TestVectorDistributePatterns() = default;
|
||||
TestVectorDistributePatterns(const TestVectorDistributePatterns &pass)
|
||||
: PassWrapper(pass) {}
|
||||
void getDependentDialects(DialectRegistry ®istry) const override {
|
||||
registry.insert<VectorDialect>();
|
||||
registry.insert<AffineDialect>();
|
||||
}
|
||||
ListOption<int32_t> multiplicity{
|
||||
*this, "distribution-multiplicity",
|
||||
llvm::cl::desc("Set the multiplicity used for distributing vector")};
|
||||
|
||||
void runOnOperation() override {
|
||||
MLIRContext *ctx = &getContext();
|
||||
RewritePatternSet patterns(ctx);
|
||||
func::FuncOp func = getOperation();
|
||||
func.walk([&](arith::AddFOp op) {
|
||||
OpBuilder builder(op);
|
||||
if (auto vecType = op.getType().dyn_cast<VectorType>()) {
|
||||
SmallVector<int64_t, 2> mul;
|
||||
SmallVector<AffineExpr, 2> perm;
|
||||
SmallVector<Value, 2> ids;
|
||||
unsigned count = 0;
|
||||
// Remove the multiplicity of 1 and calculate the affine map based on
|
||||
// the multiplicity.
|
||||
SmallVector<int32_t, 4> m(multiplicity.begin(), multiplicity.end());
|
||||
for (unsigned i = 0, e = vecType.getRank(); i < e; i++) {
|
||||
if (i < m.size() && m[i] != 1 && vecType.getDimSize(i) % m[i] == 0) {
|
||||
mul.push_back(m[i]);
|
||||
ids.push_back(func.getArgument(count++));
|
||||
perm.push_back(getAffineDimExpr(i, ctx));
|
||||
}
|
||||
}
|
||||
auto map = AffineMap::get(op.getType().cast<VectorType>().getRank(), 0,
|
||||
perm, ctx);
|
||||
Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
|
||||
builder, op.getOperation(), ids, mul, map);
|
||||
if (ops) {
|
||||
SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
|
||||
op.getResult().replaceAllUsesExcept(ops->insert.getResult(),
|
||||
extractOp);
|
||||
}
|
||||
}
|
||||
});
|
||||
populatePropagateVectorDistributionPatterns(patterns);
|
||||
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
|
||||
}
|
||||
};
|
||||
|
||||
struct TestVectorToLoopPatterns
|
||||
: public PassWrapper<TestVectorToLoopPatterns,
|
||||
OperationPass<func::FuncOp>> {
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorToLoopPatterns)
|
||||
|
||||
StringRef getArgument() const final { return "test-vector-to-forloop"; }
|
||||
StringRef getDescription() const final {
|
||||
return "Test lowering patterns to break up a vector op into a for loop";
|
||||
}
|
||||
TestVectorToLoopPatterns() = default;
|
||||
TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass)
|
||||
: PassWrapper(pass) {}
|
||||
void getDependentDialects(DialectRegistry ®istry) const override {
|
||||
registry.insert<VectorDialect>();
|
||||
registry.insert<AffineDialect>();
|
||||
}
|
||||
Option<int32_t> multiplicity{
|
||||
*this, "distribution-multiplicity",
|
||||
llvm::cl::desc("Set the multiplicity used for distributing vector"),
|
||||
llvm::cl::init(32)};
|
||||
void runOnOperation() override {
|
||||
MLIRContext *ctx = &getContext();
|
||||
RewritePatternSet patterns(ctx);
|
||||
func::FuncOp func = getOperation();
|
||||
func.walk([&](arith::AddFOp op) {
|
||||
// Check that the operation type can be broken down into a loop.
|
||||
VectorType type = op.getType().dyn_cast<VectorType>();
|
||||
if (!type || type.getRank() != 1 ||
|
||||
type.getNumElements() % multiplicity != 0)
|
||||
return mlir::WalkResult::advance();
|
||||
auto filterAlloc = [](Operation *op) {
|
||||
return !isa<arith::ConstantOp, memref::AllocOp, func::CallOp>(op);
|
||||
};
|
||||
auto dependentOps = getSlice(op, filterAlloc);
|
||||
// Create a loop and move instructions from the Op slice into the loop.
|
||||
OpBuilder builder(op);
|
||||
auto zero = builder.create<arith::ConstantIndexOp>(op.getLoc(), 0);
|
||||
auto one = builder.create<arith::ConstantIndexOp>(op.getLoc(), 1);
|
||||
auto numIter =
|
||||
builder.create<arith::ConstantIndexOp>(op.getLoc(), multiplicity);
|
||||
auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
|
||||
for (Operation *it : dependentOps) {
|
||||
it->moveBefore(forOp.getBody()->getTerminator());
|
||||
}
|
||||
auto map = AffineMap::getMultiDimIdentityMap(1, ctx);
|
||||
// break up the original op and let the patterns propagate.
|
||||
Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
|
||||
builder, op.getOperation(), {forOp.getInductionVar()}, {multiplicity},
|
||||
map);
|
||||
if (ops) {
|
||||
SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
|
||||
op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
|
||||
}
|
||||
return mlir::WalkResult::interrupt();
|
||||
});
|
||||
populatePropagateVectorDistributionPatterns(patterns);
|
||||
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
|
||||
}
|
||||
};
|
||||
|
||||
struct TestVectorTransferUnrollingPatterns
|
||||
: public PassWrapper<TestVectorTransferUnrollingPatterns,
|
||||
OperationPass<func::FuncOp>> {
|
||||
@@ -918,10 +798,6 @@ void registerTestVectorLowerings() {
|
||||
|
||||
PassRegistration<TestVectorTransferFullPartialSplitPatterns>();
|
||||
|
||||
PassRegistration<TestVectorDistributePatterns>();
|
||||
|
||||
PassRegistration<TestVectorToLoopPatterns>();
|
||||
|
||||
PassRegistration<TestVectorTransferOpt>();
|
||||
|
||||
PassRegistration<TestVectorTransferLoweringPatterns>();
|
||||
|
||||
Reference in New Issue
Block a user