[mlir][vector] Remove ExtractMap/InsertMap operations

As discussed on discourse: https://discourse.llvm.org/t/vector-vector-distribution-large-vector-to-small-vector/1983/22
removing insert_map/extract_map op as vector distribution now uses
warp_execute_on_lane_0 op.

Differential Revision: https://reviews.llvm.org/D134000
This commit is contained in:
Thomas Raoux
2022-09-16 00:39:15 +00:00
parent 587729c3ad
commit 54db8cc7b1
12 changed files with 1 additions and 947 deletions

View File

@@ -119,11 +119,6 @@ void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
bool force32BitVectorIndices,
PatternBenefit benefit = 1);
/// Collect a set of patterns to propagate insert_map/extract_map in the ssa
/// chain.
void populatePropagateVectorDistributionPatterns(RewritePatternSet &patterns,
PatternBenefit benefit = 1);
/// Collects patterns to progressively lower vector.broadcast ops on high-D
/// vectors to low-D vector ops.
void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns,

View File

@@ -599,75 +599,6 @@ def Vector_ExtractOp :
let hasVerifier = 1;
}
def Vector_ExtractMapOp :
Vector_Op<"extract_map", [NoSideEffect]>,
Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
Results<(outs AnyVector)> {
let summary = "vector extract map operation";
let description = [{
Takes an N-D vector and extracts a sub-part of the vector starting at id
along each dimension.
The dimension associated to each element of `ids` used to extract are
implicitly deduced from the destination type. For each dimension the
multiplicity is the destination dimension size divided by the source
dimension size, each dimension with a multiplicity greater than 1 is
associated to the next id, following ids order.
For example if the source type is `vector<64x4x32xf32>` and the destination
type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
id to dimension 2.
Similarly to vector.tuple_get, this operation is used for progressive
lowering and should be folded away before converting to LLVM.
It is different than `vector.extract_slice` and
`vector.extract_strided_slice` as it takes a Value as index instead of an
attribute. Also in the future it is meant to support extracting along any
dimensions and not only the most major ones.
For instance:
```
// dynamic computation producing the value 0 of index type
%idx0 = ... : index
// dynamic computation producing the value 1 of index type
%idx1 = ... : index
%0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
// extracts values [0, 1]
%1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
// extracts values [1, 2]
%2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
```
Example:
```mlir
%ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
%ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
to vector<4x4x2xf32>
```
}];
let builders = [
OpBuilder<(ins "Value":$vector, "ValueRange":$ids,
"ArrayRef<int64_t>":$multiplicity,
"AffineMap":$map)>];
let extraClassDeclaration = [{
VectorType getSourceVectorType() {
return getVector().getType().cast<VectorType>();
}
VectorType getResultType() {
return getResult().getType().cast<VectorType>();
}
void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
AffineMap map();
}];
let assemblyFormat = [{
$vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
}];
let hasFolder = 1;
let hasVerifier = 1;
}
def Vector_FMAOp :
Op<Vector_Dialect, "fma", [
NoSideEffect, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
@@ -790,72 +721,6 @@ def Vector_InsertOp :
let hasVerifier = 1;
}
def Vector_InsertMapOp :
Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
Results<(outs AnyVector:$result)> {
let summary = "vector insert map operation";
let description = [{
Inserts a N-D vector and within a larger vector starting at id. The new
vector created will have the same size as the destination operand vector.
The dimension associated to each element of `ids` used to insert is
implicitly deduced from the source type (see `ExtractMapOp` for details).
For example if source type is `vector<4x4x2xf32>` and the destination type
is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
to dimension 2.
Similarly to vector.tuple_get, this operation is used for progressive
lowering and should be folded away before converting to LLVM.
It is different than `vector.insert` and `vector.insert_strided_slice` as it
takes a Value as index instead of an attribute. Also in the future it is
meant to support inserting along any dimensions and not only the most major
ones.
This operations is meant to be used in combination with vector.extract_map.
For instance:
```
// dynamic computation producing the value 0 of index type
%idx0 = ... : index
// dynamic computation producing the value 1 of index type
%idx1 = ... : index /
%0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
// extracts values [0, 1]
%1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
// extracts values [1, 2]
%2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
// insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
%3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
// insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
%4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
```
Example:
```mlir
%v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
%v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
into vector<64x4x32xf32>
```
}];
let extraClassDeclaration = [{
VectorType getSourceVectorType() {
return getVector().getType().cast<VectorType>();
}
VectorType getResultType() {
return getResult().getType().cast<VectorType>();
}
// Return a map indicating the dimension mapping to the given ids.
AffineMap map();
}];
let assemblyFormat = [{
$vector `,` $dest `[` $ids `]` attr-dict
`:` type($vector) `into` type($result)
}];
let hasVerifier = 1;
}
def Vector_InsertStridedSliceOp :
Vector_Op<"insert_strided_slice", [NoSideEffect,
PredOpTrait<"operand #0 and result have same element type",

View File

@@ -65,28 +65,6 @@ LogicalResult splitFullAndPartialTransfer(
VectorTransformsOptions options = VectorTransformsOptions(),
scf::IfOp *ifOp = nullptr);
struct DistributeOps {
ExtractMapOp extract;
InsertMapOp insert;
};
/// Distribute a N-D vector pointwise operation over a range of given ids taking
/// *all* values in [0 .. multiplicity - 1] (e.g. loop induction variable or
/// SPMD id). This transformation only inserts
/// vector.extract_map/vector.insert_map. It is meant to be used with
/// canonicalizations pattern to propagate and fold the vector
/// insert_map/extract_map operations.
/// Transforms:
// %v = arith.addf %a, %b : vector<32xf32>
/// to:
/// %v = arith.addf %a, %b : vector<32xf32>
/// %ev = vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32>
/// %nv = vector.insert_map %ev, %id, 32 : vector<1xf32> into vector<32xf32>
Optional<DistributeOps>
distributPointwiseVectorOp(OpBuilder &builder, Operation *op,
ArrayRef<Value> id, ArrayRef<int64_t> multiplicity,
const AffineMap &map);
/// Implements transfer op write to read forwarding and dead transfer write
/// optimizations.
void transferOpflowOpt(Operation *rootOp);

View File

@@ -1630,81 +1630,6 @@ static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
results.push_back(attr.cast<IntegerAttr>().getInt());
}
//===----------------------------------------------------------------------===//
// ExtractMapOp
//===----------------------------------------------------------------------===//
void ExtractMapOp::build(OpBuilder &builder, OperationState &result,
Value vector, ValueRange ids,
ArrayRef<int64_t> multiplicity,
AffineMap permutationMap) {
assert(ids.size() == multiplicity.size() &&
ids.size() == permutationMap.getNumResults());
assert(permutationMap.isProjectedPermutation());
VectorType type = vector.getType().cast<VectorType>();
SmallVector<int64_t, 4> newShape(type.getShape().begin(),
type.getShape().end());
for (unsigned i = 0, e = permutationMap.getNumResults(); i < e; i++) {
AffineExpr expr = permutationMap.getResult(i);
auto dim = expr.cast<AffineDimExpr>();
newShape[dim.getPosition()] = newShape[dim.getPosition()] / multiplicity[i];
}
VectorType resultType = VectorType::get(newShape, type.getElementType());
ExtractMapOp::build(builder, result, resultType, vector, ids);
}
LogicalResult ExtractMapOp::verify() {
if (getSourceVectorType().getRank() != getResultType().getRank())
return emitOpError("expected source and destination vectors of same rank");
unsigned numId = 0;
for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; ++i) {
if (getSourceVectorType().getDimSize(i) % getResultType().getDimSize(i) !=
0)
return emitOpError("source vector dimensions must be a multiple of "
"destination vector dimensions");
if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
numId++;
}
if (numId != getIds().size())
return emitOpError("expected number of ids must match the number of "
"dimensions distributed");
return success();
}
OpFoldResult ExtractMapOp::fold(ArrayRef<Attribute> operands) {
auto insert = getVector().getDefiningOp<vector::InsertMapOp>();
if (insert == nullptr || getType() != insert.getVector().getType() ||
getIds() != insert.getIds())
return {};
return insert.getVector();
}
void ExtractMapOp::getMultiplicity(SmallVectorImpl<int64_t> &multiplicity) {
assert(multiplicity.empty());
for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; i++) {
if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
multiplicity.push_back(getSourceVectorType().getDimSize(i) /
getResultType().getDimSize(i));
}
}
template <typename MapOp>
AffineMap calculateImplicitMap(MapOp op) {
SmallVector<AffineExpr, 4> perm;
// Check which dimension have a multiplicity greater than 1 and associated
// them to the IDs in order.
for (unsigned i = 0, e = op.getSourceVectorType().getRank(); i < e; i++) {
if (op.getSourceVectorType().getDimSize(i) !=
op.getResultType().getDimSize(i))
perm.push_back(getAffineDimExpr(i, op.getContext()));
}
auto map = AffineMap::get(op.getSourceVectorType().getRank(), 0, perm,
op.getContext());
return map;
}
AffineMap ExtractMapOp::map() { return calculateImplicitMap(*this); }
//===----------------------------------------------------------------------===//
// FmaOp
//===----------------------------------------------------------------------===//
@@ -2133,30 +2058,6 @@ OpFoldResult vector::InsertOp::fold(ArrayRef<Attribute> operands) {
return {};
}
//===----------------------------------------------------------------------===//
// InsertMapOp
//===----------------------------------------------------------------------===//
LogicalResult InsertMapOp::verify() {
if (getSourceVectorType().getRank() != getResultType().getRank())
return emitOpError("expected source and destination vectors of same rank");
unsigned numId = 0;
for (unsigned i = 0, e = getResultType().getRank(); i < e; i++) {
if (getResultType().getDimSize(i) % getSourceVectorType().getDimSize(i) !=
0)
return emitOpError(
"destination vector size must be a multiple of source vector size");
if (getResultType().getDimSize(i) != getSourceVectorType().getDimSize(i))
numId++;
}
if (numId != getIds().size())
return emitOpError("expected number of ids must match the number of "
"dimensions distributed");
return success();
}
AffineMap InsertMapOp::map() { return calculateImplicitMap(*this); }
//===----------------------------------------------------------------------===//
// InsertStridedSliceOp
//===----------------------------------------------------------------------===//

View File

@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRVectorTransforms
VectorTransferSplitRewritePatterns.cpp
VectorTransferPermutationMapRewritePatterns.cpp
VectorTransforms.cpp
VectorUnrollDistribute.cpp
VectorUnroll.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector/Transforms

View File

@@ -1998,37 +1998,6 @@ ContractionOpLowering::lowerReduction(vector::ContractionOp op,
} // namespace mlir
Optional<mlir::vector::DistributeOps> mlir::vector::distributPointwiseVectorOp(
OpBuilder &builder, Operation *op, ArrayRef<Value> ids,
ArrayRef<int64_t> multiplicity, const AffineMap &map) {
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointAfter(op);
Location loc = op->getLoc();
if (op->getNumResults() != 1)
return {};
Value result = op->getResult(0);
VectorType type = op->getResult(0).getType().dyn_cast<VectorType>();
if (!type || map.getNumResults() != multiplicity.size())
return {};
// For each dimension being distributed check that the size is a multiple of
// the multiplicity. To handle more sizes we would need to support masking.
unsigned multiplictyCount = 0;
for (auto exp : map.getResults()) {
auto affinExp = exp.dyn_cast<AffineDimExpr>();
if (!affinExp || affinExp.getPosition() >= type.getRank() ||
type.getDimSize(affinExp.getPosition()) %
multiplicity[multiplictyCount++] !=
0)
return {};
}
DistributeOps ops;
ops.extract =
builder.create<vector::ExtractMapOp>(loc, result, ids, multiplicity, map);
ops.insert =
builder.create<vector::InsertMapOp>(loc, ops.extract, result, ids);
return ops;
}
/// Progressive lowering of transfer_read. This pattern supports lowering of
/// `vector.transfer_read` to a combination of `vector.load` and
/// `vector.broadcast` if all of the following hold:

View File

@@ -538,202 +538,6 @@ private:
vector::UnrollVectorOptions options;
};
/// Canonicalize an extract_map using the result of a pointwise operation.
/// Transforms:
/// %v = arith.addf %a, %b : vector32xf32>
/// %dv = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
/// to:
/// %da = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
/// %db = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
/// %dv = arith.addf %da, %db : vector<1xf32>
struct PointwiseExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
PatternRewriter &rewriter) const override {
Operation *definedOp = extract.getVector().getDefiningOp();
if (!definedOp || !OpTrait::hasElementwiseMappableTraits(definedOp) ||
definedOp->getNumResults() != 1)
return failure();
Location loc = extract.getLoc();
SmallVector<Value, 4> extractOperands;
for (OpOperand &operand : definedOp->getOpOperands()) {
auto vecType = operand.get().getType().template dyn_cast<VectorType>();
if (!vecType) {
extractOperands.push_back(operand.get());
continue;
}
extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
loc,
VectorType::get(extract.getResultType().getShape(),
vecType.getElementType()),
operand.get(), extract.getIds()));
}
Operation *newOp = cloneOpWithOperandsAndTypes(
rewriter, loc, definedOp, extractOperands, extract.getResultType());
rewriter.replaceOp(extract, newOp->getResult(0));
return success();
}
};
/// Canonicalize an extract_map using the result of a contract operation.
/// This propagate the extract_map to operands.
struct ContractExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
PatternRewriter &rewriter) const override {
Operation *definedOp = extract.getVector().getDefiningOp();
auto contract = dyn_cast_or_null<vector::ContractionOp>(definedOp);
if (!contract)
return failure();
Location loc = contract.getLoc();
unsigned accIndex = vector::ContractionOp::getAccOperandIndex();
AffineMap affineMap = contract.getIndexingMapsArray()[accIndex];
// Create a map of the dimensions distributed based on the acc affine map.
// Only parallel dimensions are being distributed, reduction dimensions are
// untouched.
DenseMap<int64_t, int64_t> map;
for (unsigned i : llvm::seq(unsigned(0), affineMap.getNumResults()))
map[affineMap.getDimPosition(i)] = extract.getResultType().getDimSize(i);
SmallVector<Value, 4> extractOperands;
for (const auto &it : llvm::enumerate(contract.getIndexingMapsArray())) {
// For each operands calculate the new vector type after distribution.
Value operand = contract->getOperand(it.index());
auto vecType = operand.getType().cast<VectorType>();
SmallVector<int64_t> operandShape(vecType.getShape().begin(),
vecType.getShape().end());
for (unsigned i : llvm::seq(unsigned(0), it.value().getNumResults())) {
unsigned dim = it.value().getDimPosition(i);
auto distributedDim = map.find(dim);
// If the dimension is not in the map it means it is a reduction and
// doesn't get distributed.
if (distributedDim == map.end())
continue;
operandShape[i] = distributedDim->second;
}
VectorType newVecType =
VectorType::get(operandShape, vecType.getElementType());
extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
loc, newVecType, operand, extract.getIds()));
}
Operation *newOp =
cloneOpWithOperandsAndTypes(rewriter, loc, definedOp, extractOperands,
extract.getResult().getType());
rewriter.replaceOp(extract, newOp->getResult(0));
return success();
}
};
/// Converts TransferRead op used by ExtractMap op into a smaller dimension
/// TransferRead.
/// Example:
/// ```
/// %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0:
/// memref<64x64x64xf32>, vector<64x4x32xf32>
/// %e = vector.extract_map %a[%id] : vector<64x4x32xf32> to vector<2x4x1xf32>
/// ```
/// to:
/// ```
/// %id1 = affine.apply affine_map<()[s0] -> (s0 * 2)> (%id)
/// %e = vector.transfer_read %A[%id1, %c0, %id1], %cf0 :
/// memref<64x64x64xf32>, vector<2x4x1xf32>
/// ```
struct TransferReadExtractPattern
: public OpRewritePattern<vector::TransferReadOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(vector::TransferReadOp read,
PatternRewriter &rewriter) const override {
// TODO: support 0-d corner case.
if (read.getTransferRank() == 0)
return failure();
if (!read.getResult().hasOneUse())
return failure();
auto extract =
dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());
if (!extract)
return failure();
if (read.getMask())
return failure();
SmallVector<Value, 4> indices(read.getIndices().begin(),
read.getIndices().end());
AffineMap indexMap = extract.map().compose(read.getPermutationMap());
unsigned idCount = 0;
ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
for (auto it :
llvm::zip(indexMap.getResults(), extract.map().getResults())) {
AffineExpr d0, d1;
bindDims(read.getContext(), d0, d1);
auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
if (!indexExpr)
continue;
unsigned indexPos = indexExpr.getPosition();
unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
auto scale = getAffineConstantExpr(
extract.getResultType().getDimSize(vectorPos), read.getContext());
indices[indexPos] = makeComposedAffineApply(
rewriter, read.getLoc(), d0 + scale * d1,
{indices[indexPos], extract.getIds()[idCount++]});
}
Value newRead = lb.create<vector::TransferReadOp>(
extract.getType(), read.getSource(), indices,
read.getPermutationMapAttr(), read.getPadding(), read.getMask(),
read.getInBoundsAttr());
Value dest = lb.create<arith::ConstantOp>(
read.getType(), rewriter.getZeroAttr(read.getType()));
newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.getIds());
rewriter.replaceOp(read, newRead);
return success();
}
};
struct TransferWriteInsertPattern
: public OpRewritePattern<vector::TransferWriteOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(vector::TransferWriteOp write,
PatternRewriter &rewriter) const override {
// TODO: support 0-d corner case.
if (write.getTransferRank() == 0)
return failure();
auto insert = write.getVector().getDefiningOp<vector::InsertMapOp>();
if (!insert)
return failure();
if (write.getMask())
return failure();
SmallVector<Value, 4> indices(write.getIndices().begin(),
write.getIndices().end());
AffineMap indexMap = insert.map().compose(write.getPermutationMap());
unsigned idCount = 0;
Location loc = write.getLoc();
for (auto it :
llvm::zip(indexMap.getResults(), insert.map().getResults())) {
AffineExpr d0, d1;
bindDims(write.getContext(), d0, d1);
auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
if (!indexExpr)
continue;
unsigned indexPos = indexExpr.getPosition();
unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
auto scale = getAffineConstantExpr(
insert.getSourceVectorType().getDimSize(vectorPos),
write.getContext());
indices[indexPos] = makeComposedAffineApply(
rewriter, loc, d0 + scale * d1,
{indices[indexPos], insert.getIds()[idCount++]});
}
rewriter.create<vector::TransferWriteOp>(
loc, insert.getVector(), write.getSource(), indices,
write.getPermutationMapAttr(), write.getInBoundsAttr());
rewriter.eraseOp(write);
return success();
}
};
struct UnrollReductionPattern : public OpRewritePattern<vector::ReductionOp> {
UnrollReductionPattern(MLIRContext *context,
const vector::UnrollVectorOptions &options,
@@ -841,10 +645,3 @@ void mlir::vector::populateVectorUnrollPatterns(
UnrollReductionPattern, UnrollMultiReductionPattern,
UnrollTranposePattern>(patterns.getContext(), options, benefit);
}
void mlir::vector::populatePropagateVectorDistributionPatterns(
RewritePatternSet &patterns, PatternBenefit benefit) {
patterns.add<PointwiseExtractPattern, ContractExtractPattern,
TransferReadExtractPattern, TransferWriteInsertPattern>(
patterns.getContext(), benefit);
}

View File

@@ -1470,48 +1470,6 @@ func.func @compress_memref_mismatch(%base: memref<?x?xf32>, %mask: vector<16xi1>
// -----
func.func @extract_map_rank(%v: vector<32xf32>, %id : index) {
// expected-error@+1 {{'vector.extract_map' op expected source and destination vectors of same rank}}
%0 = vector.extract_map %v[%id] : vector<32xf32> to vector<2x1xf32>
}
// -----
func.func @extract_map_size(%v: vector<63xf32>, %id : index) {
// expected-error@+1 {{'vector.extract_map' op source vector dimensions must be a multiple of destination vector dimensions}}
%0 = vector.extract_map %v[%id] : vector<63xf32> to vector<2xf32>
}
// -----
func.func @extract_map_id(%v: vector<2x32xf32>, %id : index) {
// expected-error@+1 {{'vector.extract_map' op expected number of ids must match the number of dimensions distributed}}
%0 = vector.extract_map %v[%id] : vector<2x32xf32> to vector<1x1xf32>
}
// -----
func.func @insert_map_rank(%v: vector<2x1xf32>, %v1: vector<32xf32>, %id : index) {
// expected-error@+1 {{'vector.insert_map' op expected source and destination vectors of same rank}}
%0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<32xf32>
}
// -----
func.func @insert_map_size(%v: vector<3xf32>, %v1: vector<64xf32>, %id : index) {
// expected-error@+1 {{'vector.insert_map' op destination vector size must be a multiple of source vector size}}
%0 = vector.insert_map %v, %v1[%id] : vector<3xf32> into vector<64xf32>
}
// -----
func.func @insert_map_id(%v: vector<2x1xf32>, %v1: vector<4x32xf32>, %id : index) {
// expected-error@+1 {{'vector.insert_map' op expected number of ids must match the number of dimensions distributed}}
%0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<4x32xf32>
}
// -----
func.func @scan_reduction_dim_constraint(%arg0: vector<2x3xi32>, %arg1: vector<3xi32>) -> vector<3xi32> {
// expected-error@+1 {{'vector.scan' op reduction dimension 5 has to be less than 2}}
%0:2 = vector.scan <add>, %arg0, %arg1 {inclusive = true, reduction_dim = 5} :

View File

@@ -754,21 +754,6 @@ func.func @expand_and_compress2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %
return
}
// CHECK-LABEL: @extract_insert_map
func.func @extract_insert_map(%v: vector<32xf32>, %v2: vector<16x32xf32>,
%id0 : index, %id1 : index) -> (vector<32xf32>, vector<16x32xf32>) {
// CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}}] : vector<32xf32> to vector<2xf32>
%vd = vector.extract_map %v[%id0] : vector<32xf32> to vector<2xf32>
// CHECK: %[[V1:.*]] = vector.extract_map %{{.*}}[%{{.*}}, %{{.*}}] : vector<16x32xf32> to vector<4x2xf32>
%vd2 = vector.extract_map %v2[%id0, %id1] : vector<16x32xf32> to vector<4x2xf32>
// CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}[%{{.*}}] : vector<2xf32> into vector<32xf32>
%r = vector.insert_map %vd, %v[%id0] : vector<2xf32> into vector<32xf32>
// CHECK: %[[R1:.*]] = vector.insert_map %[[V1]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x2xf32> into vector<16x32xf32>
%r2 = vector.insert_map %vd2, %v2[%id0, %id1] : vector<4x2xf32> into vector<16x32xf32>
// CHECK: return %[[R]], %[[R1]] : vector<32xf32>, vector<16x32xf32>
return %r, %r2 : vector<32xf32>, vector<16x32xf32>
}
// CHECK-LABEL: @multi_reduction
func.func @multi_reduction(%0: vector<4x8x16x32xf32>, %acc0: vector<4x16xf32>,
%acc1: f32) -> f32 {

View File

@@ -1,204 +0,0 @@
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,1,32 -split-input-file | FileCheck %s
// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,4 -split-input-file | FileCheck %s --check-prefix=CHECK2D
// CHECK-LABEL: func @distribute_vector_add
// CHECK-SAME: (%[[ID:.*]]: index
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<32xf32>
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
// CHECK-NEXT: return %[[INS]] : vector<32xf32>
func.func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
%0 = arith.addf %A, %B : vector<32xf32>
return %0: vector<32xf32>
}
// -----
// CHECK-LABEL: func @distribute_vector_add_exp
// CHECK-SAME: (%[[ID:.*]]: index
// CHECK-NEXT: %[[EXPV:.*]] = math.exp %{{.*}} : vector<32xf32>
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %[[EXPV]], %{{.*}} : vector<32xf32>
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
// CHECK-NEXT: %[[EXC:.*]] = math.exp %[[EXA]] : vector<1xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXC]], %[[EXB]] : vector<1xf32>
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
// CHECK-NEXT: return %[[INS]] : vector<32xf32>
func.func @distribute_vector_add_exp(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
%C = math.exp %A : vector<32xf32>
%0 = arith.addf %C, %B : vector<32xf32>
return %0: vector<32xf32>
}
// -----
// CHECK-LABEL: func @vector_add_read_write
// CHECK-SAME: (%[[ID:.*]]: index
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
// CHECK-NEXT: %[[EXC:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[EXC]] : vector<1xf32>
// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] {{.*}} : vector<1xf32>, memref<32xf32>
// CHECK-NEXT: return
func.func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>
%b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>
%acc = arith.addf %a, %b: vector<32xf32>
%c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>
%d = arith.addf %acc, %c: vector<32xf32>
vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>
return
}
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
// CHECK: func @vector_add_cycle
// CHECK-SAME: (%[[ID:.*]]: index
// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2xf32>
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] {{.*}} : vector<2xf32>, memref<64xf32>
// CHECK-NEXT: return
func.func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>
%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>
%acc = arith.addf %a, %b: vector<64xf32>
vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>
return
}
// -----
// Negative test to make sure nothing is done in case the vector size is not a
// multiple of multiplicity.
// CHECK-LABEL: func @vector_negative_test
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<16xf32>
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]]] {{.*}} : vector<16xf32>, memref<64xf32>
// CHECK-NEXT: return
func.func @vector_negative_test(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<16xf32>
%b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<16xf32>
%acc = arith.addf %a, %b: vector<16xf32>
vector.transfer_write %acc, %C[%c0]: vector<16xf32>, memref<64xf32>
return
}
// -----
// CHECK-LABEL: func @distribute_vector_add_3d
// CHECK-SAME: (%[[ID0:.*]]: index, %[[ID1:.*]]: index
// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<64x4x32xf32>
// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID0]], %[[ID1]]] : vector<2x4x1xf32> into vector<64x4x32xf32>
// CHECK-NEXT: return %[[INS]] : vector<64x4x32xf32>
func.func @distribute_vector_add_3d(%id0 : index, %id1 : index,
%A: vector<64x4x32xf32>, %B: vector<64x4x32xf32>) -> vector<64x4x32xf32> {
%0 = arith.addf %A, %B : vector<64x4x32xf32>
return %0: vector<64x4x32xf32>
}
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
// CHECK: func @vector_add_transfer_3d
// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]], %[[C0]], %[[ID_1]]] {{.*}} : vector<2x4x1xf32>, memref<64x64x64xf32>
// CHECK-NEXT: return
func.func @vector_add_transfer_3d(%id0 : index, %id1 : index, %A: memref<64x64x64xf32>,
%B: memref<64x64x64xf32>, %C: memref<64x64x64xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
%b = vector.transfer_read %B[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
%acc = arith.addf %a, %b: vector<64x4x32xf32>
vector.transfer_write %acc, %C[%c0, %c0, %c0]: vector<64x4x32xf32>, memref<64x64x64xf32>
return
}
// -----
#map0 = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
#map1 = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
// CHECK: func @vector_add_transfer_permutation
// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[ID2]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID_0]], %[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP2]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]], %[[ID_1]], %[[C0]], %[[ID3]]] {permutation_map = #[[MAP3]]} : vector<2x4x1xf32>, memref<?x?x?x?xf32>
// CHECK-NEXT: return
func.func @vector_add_transfer_permutation(%id0 : index, %id1 : index, %A: memref<?x?x?x?xf32>,
%B: memref<?x?x?x?xf32>, %C: memref<?x?x?x?xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map0} : memref<?x?x?x?xf32>, vector<64x4x32xf32>
%b = vector.transfer_read %B[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map1}: memref<?x?x?x?xf32>, vector<64x4x32xf32>
%acc = arith.addf %a, %b: vector<64x4x32xf32>
vector.transfer_write %acc, %C[%c0, %c0, %c0, %c0] {permutation_map = #map2}: vector<64x4x32xf32>, memref<?x?x?x?xf32>
return
}
// -----
// CHECK2D-LABEL: vector_add_contract
// CHECK2D: %[[A:.+]] = vector.transfer_read %arg2[%0, %c0], %cst : memref<?x?xf32>, vector<2x4xf32>
// CHECK2D: %[[B:.+]] = vector.transfer_read %arg3[%2, %c0], %cst : memref<?x?xf32>, vector<16x4xf32>
// CHECK2D: %[[C:.+]] = vector.transfer_read %arg4[%4, %5], %cst : memref<?x?xf32>, vector<2x16xf32>
// CHECK2D: %[[E:.+]] = vector.transfer_read %arg5[%7, %8], %cst : memref<?x?xf32>, vector<2x16xf32>
// CHECK2D: %[[D:.+]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]] : vector<2x4xf32>, vector<16x4xf32> into vector<2x16xf32>
// CHECK2D: %[[R:.+]] = arith.addf %[[D]], %[[E]] : vector<2x16xf32>
// CHECK2D: vector.transfer_write %[[R]], {{.*}} : vector<2x16xf32>, memref<?x?xf32>
func.func @vector_add_contract(%id0 : index, %id1 : index, %A: memref<?x?xf32>,
%B: memref<?x?xf32>, %C: memref<?x?xf32>, %D: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
%cf0 = arith.constant 0.0 : f32
%a = vector.transfer_read %A[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
%b = vector.transfer_read %B[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
%c = vector.transfer_read %C[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
%d = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
affine_map<(d0, d1, d2) -> (d1, d2)>,
affine_map<(d0, d1, d2) -> (d0, d1)>],
iterator_types = ["parallel", "parallel", "reduction"],
kind = #vector.kind<add>}
%a, %b, %c : vector<64x4xf32>, vector<64x4xf32> into vector<64x64xf32>
%e = vector.transfer_read %D[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
%r = arith.addf %d, %e : vector<64x64xf32>
vector.transfer_write %r, %C[%c0, %c0] : vector<64x64xf32>, memref<?x?xf32>
return
}

View File

@@ -1,66 +0,0 @@
// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
// RUN: FileCheck %s
// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
// RUN: -entry-point-result=void \
// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
// RUN: FileCheck %s
// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
func.func private @printMemrefF32(memref<*xf32>)
func.func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = memref.alloc(%arg0) : memref<?xf32>
scf.for %arg2 = %c0 to %arg0 step %c1 {
%tmp = arith.index_cast %arg2 : index to i32
%tmp1 = arith.sitofp %tmp : i32 to f32
%tmp2 = arith.addf %tmp1, %arg1 : f32
memref.store %tmp2, %0[%arg2] : memref<?xf32>
}
return %0 : memref<?xf32>
}
// Large vector addf that can be broken down into a loop of smaller vector addf.
func.func @main() {
%cf0 = arith.constant 0.0 : f32
%cf1 = arith.constant 1.0 : f32
%cf2 = arith.constant 2.0 : f32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
%c64 = arith.constant 64 : index
%out = memref.alloc(%c64) : memref<?xf32>
%in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
%in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
// Check that the tansformatio correctly happened.
// TRANSFORM: scf.for
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
// TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
// TRANSFORM: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : vector<2xf32>
// TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
// TRANSFORM: }
%a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
%b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
%acc = arith.addf %a, %b: vector<64xf32>
vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
%converted = memref.cast %out : memref<?xf32> to memref<*xf32>
call @printMemrefF32(%converted): (memref<*xf32>) -> ()
// CHECK: Unranked{{.*}}data =
// CHECK: [
// CHECK-SAME: 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
// CHECK-SAME: 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,
// CHECK-SAME: 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75,
// CHECK-SAME: 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,
// CHECK-SAME: 101, 103, 105, 107, 109, 111, 113, 115, 117, 119,
// CHECK-SAME: 121, 123, 125, 127, 129]
memref.dealloc %out : memref<?xf32>
memref.dealloc %in1 : memref<?xf32>
memref.dealloc %in2 : memref<?xf32>
return
}

View File

@@ -364,126 +364,6 @@ struct TestVectorUnrollingPatterns
llvm::cl::init(false)};
};
struct TestVectorDistributePatterns
: public PassWrapper<TestVectorDistributePatterns,
OperationPass<func::FuncOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorDistributePatterns)
StringRef getArgument() const final {
return "test-vector-distribute-patterns";
}
StringRef getDescription() const final {
return "Test lowering patterns to distribute vector ops in the vector "
"dialect";
}
TestVectorDistributePatterns() = default;
TestVectorDistributePatterns(const TestVectorDistributePatterns &pass)
: PassWrapper(pass) {}
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<VectorDialect>();
registry.insert<AffineDialect>();
}
ListOption<int32_t> multiplicity{
*this, "distribution-multiplicity",
llvm::cl::desc("Set the multiplicity used for distributing vector")};
void runOnOperation() override {
MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);
func::FuncOp func = getOperation();
func.walk([&](arith::AddFOp op) {
OpBuilder builder(op);
if (auto vecType = op.getType().dyn_cast<VectorType>()) {
SmallVector<int64_t, 2> mul;
SmallVector<AffineExpr, 2> perm;
SmallVector<Value, 2> ids;
unsigned count = 0;
// Remove the multiplicity of 1 and calculate the affine map based on
// the multiplicity.
SmallVector<int32_t, 4> m(multiplicity.begin(), multiplicity.end());
for (unsigned i = 0, e = vecType.getRank(); i < e; i++) {
if (i < m.size() && m[i] != 1 && vecType.getDimSize(i) % m[i] == 0) {
mul.push_back(m[i]);
ids.push_back(func.getArgument(count++));
perm.push_back(getAffineDimExpr(i, ctx));
}
}
auto map = AffineMap::get(op.getType().cast<VectorType>().getRank(), 0,
perm, ctx);
Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
builder, op.getOperation(), ids, mul, map);
if (ops) {
SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
op.getResult().replaceAllUsesExcept(ops->insert.getResult(),
extractOp);
}
}
});
populatePropagateVectorDistributionPatterns(patterns);
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
};
struct TestVectorToLoopPatterns
: public PassWrapper<TestVectorToLoopPatterns,
OperationPass<func::FuncOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorToLoopPatterns)
StringRef getArgument() const final { return "test-vector-to-forloop"; }
StringRef getDescription() const final {
return "Test lowering patterns to break up a vector op into a for loop";
}
TestVectorToLoopPatterns() = default;
TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass)
: PassWrapper(pass) {}
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<VectorDialect>();
registry.insert<AffineDialect>();
}
Option<int32_t> multiplicity{
*this, "distribution-multiplicity",
llvm::cl::desc("Set the multiplicity used for distributing vector"),
llvm::cl::init(32)};
void runOnOperation() override {
MLIRContext *ctx = &getContext();
RewritePatternSet patterns(ctx);
func::FuncOp func = getOperation();
func.walk([&](arith::AddFOp op) {
// Check that the operation type can be broken down into a loop.
VectorType type = op.getType().dyn_cast<VectorType>();
if (!type || type.getRank() != 1 ||
type.getNumElements() % multiplicity != 0)
return mlir::WalkResult::advance();
auto filterAlloc = [](Operation *op) {
return !isa<arith::ConstantOp, memref::AllocOp, func::CallOp>(op);
};
auto dependentOps = getSlice(op, filterAlloc);
// Create a loop and move instructions from the Op slice into the loop.
OpBuilder builder(op);
auto zero = builder.create<arith::ConstantIndexOp>(op.getLoc(), 0);
auto one = builder.create<arith::ConstantIndexOp>(op.getLoc(), 1);
auto numIter =
builder.create<arith::ConstantIndexOp>(op.getLoc(), multiplicity);
auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
for (Operation *it : dependentOps) {
it->moveBefore(forOp.getBody()->getTerminator());
}
auto map = AffineMap::getMultiDimIdentityMap(1, ctx);
// break up the original op and let the patterns propagate.
Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
builder, op.getOperation(), {forOp.getInductionVar()}, {multiplicity},
map);
if (ops) {
SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
}
return mlir::WalkResult::interrupt();
});
populatePropagateVectorDistributionPatterns(patterns);
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
};
struct TestVectorTransferUnrollingPatterns
: public PassWrapper<TestVectorTransferUnrollingPatterns,
OperationPass<func::FuncOp>> {
@@ -918,10 +798,6 @@ void registerTestVectorLowerings() {
PassRegistration<TestVectorTransferFullPartialSplitPatterns>();
PassRegistration<TestVectorDistributePatterns>();
PassRegistration<TestVectorToLoopPatterns>();
PassRegistration<TestVectorTransferOpt>();
PassRegistration<TestVectorTransferLoweringPatterns>();