[mlir][vector] Remove ExtractMap/InsertMap operations

As discussed on discourse: https://discourse.llvm.org/t/vector-vector-distribution-large-vector-to-small-vector/1983/22 removing insert_map/extract_map op as vector distribution now uses warp_execute_on_lane_0 op. Differential Revision: https://reviews.llvm.org/D134000
2026-02-08 17:28:30 +08:00 · 2022-09-16 00:39:15 +00:00
parent 587729c3ad
commit 54db8cc7b1
12 changed files with 1 additions and 947 deletions
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
@@ -119,11 +119,6 @@ void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
                                               bool force32BitVectorIndices,
                                               PatternBenefit benefit = 1);

-/// Collect a set of patterns to propagate insert_map/extract_map in the ssa
-/// chain.
-void populatePropagateVectorDistributionPatterns(RewritePatternSet &patterns,
-                                                 PatternBenefit benefit = 1);
-
 /// Collects patterns to progressively lower vector.broadcast ops on high-D
 /// vectors to low-D vector ops.
 void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns,
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -599,75 +599,6 @@ def Vector_ExtractOp :
  let hasVerifier = 1;
 }

-def Vector_ExtractMapOp :
-  Vector_Op<"extract_map", [NoSideEffect]>,
-    Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
-    Results<(outs AnyVector)> {
-  let summary = "vector extract map operation";
-  let description = [{
-    Takes an N-D vector and extracts a sub-part of the vector starting at id
-    along each dimension.
-
-    The dimension associated to each element of `ids` used to extract are
-    implicitly deduced from the destination type. For each dimension the
-    multiplicity is the destination dimension size divided by the source
-    dimension size, each dimension with a multiplicity greater than 1 is
-    associated to the next id, following ids order.
-    For example if the source type is `vector<64x4x32xf32>` and the destination
-    type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
-    id to dimension 2.
-
-    Similarly to vector.tuple_get, this operation is used for progressive
-    lowering and should be folded away before converting to LLVM.
-
-    It is different than `vector.extract_slice` and
-    `vector.extract_strided_slice` as it takes a Value as index instead of an
-    attribute. Also in the future it is meant to support extracting along any
-    dimensions and not only the most major ones.
-
-    For instance:
-    ```
-    // dynamic computation producing the value 0 of index type
-    %idx0 = ... : index
-    // dynamic computation producing the value 1 of index type
-    %idx1 = ... : index
-    %0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
-    // extracts values [0, 1]
-    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
-    // extracts values [1, 2]
-    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
-    ```
-
-    Example:
-
-    ```mlir
-    %ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
-    %ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
-      to vector<4x4x2xf32>
-    ```
-  }];
-  let builders = [
-    OpBuilder<(ins "Value":$vector, "ValueRange":$ids,
-                  "ArrayRef<int64_t>":$multiplicity,
-                  "AffineMap":$map)>];
-  let extraClassDeclaration = [{
-    VectorType getSourceVectorType() {
-      return getVector().getType().cast<VectorType>();
-    }
-    VectorType getResultType() {
-      return getResult().getType().cast<VectorType>();
-    }
-    void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
-    AffineMap map();
-  }];
-  let assemblyFormat = [{
-    $vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
-  }];
-
-  let hasFolder = 1;
-  let hasVerifier = 1;
-}
-
 def Vector_FMAOp :
  Op<Vector_Dialect, "fma", [
       NoSideEffect, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
@@ -790,72 +721,6 @@ def Vector_InsertOp :
  let hasVerifier = 1;
 }

-def Vector_InsertMapOp :
-  Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
-    Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
-    Results<(outs AnyVector:$result)> {
-  let summary = "vector insert map operation";
-  let description = [{
-    Inserts a N-D vector and within a larger vector starting at id. The new
-    vector created will have the same size as the destination operand vector.
-
-    The dimension associated to each element of `ids` used to insert is
-    implicitly deduced from the source type (see `ExtractMapOp` for details).
-    For example if source type is `vector<4x4x2xf32>` and the destination type
-    is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
-    to dimension 2.
-
-    Similarly to vector.tuple_get, this operation is used for progressive
-    lowering and should be folded away before converting to LLVM.
-
-    It is different than `vector.insert` and `vector.insert_strided_slice` as it
-    takes a Value as index instead of an attribute. Also in the future it is
-    meant to support inserting along any dimensions and not only the most major
-    ones.
-
-    This operations is meant to be used in combination with vector.extract_map.
-
-    For instance:
-    ```
-    // dynamic computation producing the value 0 of index type
-    %idx0 = ... : index
-    // dynamic computation producing the value 1 of index type
-    %idx1 = ... : index /
-    %0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
-    // extracts values [0, 1]
-    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
-    // extracts values [1, 2]
-    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
-    // insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
-    %3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
-    // insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
-    %4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
-    ```
-    Example:
-
-    ```mlir
-    %v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
-    %v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
-      into vector<64x4x32xf32>
-    ```
-  }];
-  let extraClassDeclaration = [{
-    VectorType getSourceVectorType() {
-      return getVector().getType().cast<VectorType>();
-    }
-    VectorType getResultType() {
-      return getResult().getType().cast<VectorType>();
-    }
-    // Return a map indicating the dimension mapping to the given ids.
-    AffineMap map();
-  }];
-  let assemblyFormat = [{
-    $vector `,` $dest `[` $ids `]` attr-dict
-      `:` type($vector) `into` type($result)
-  }];
-  let hasVerifier = 1;
-}
-
 def Vector_InsertStridedSliceOp :
  Vector_Op<"insert_strided_slice", [NoSideEffect,
    PredOpTrait<"operand #0 and result have same element type",
--- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
+++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
@@ -65,28 +65,6 @@ LogicalResult splitFullAndPartialTransfer(
    VectorTransformsOptions options = VectorTransformsOptions(),
    scf::IfOp *ifOp = nullptr);

-struct DistributeOps {
-  ExtractMapOp extract;
-  InsertMapOp insert;
-};
-
-/// Distribute a N-D vector pointwise operation over a range of given ids taking
-/// *all* values in [0 .. multiplicity - 1] (e.g. loop induction variable or
-/// SPMD id). This transformation only inserts
-/// vector.extract_map/vector.insert_map. It is meant to be used with
-/// canonicalizations pattern to propagate and fold the vector
-/// insert_map/extract_map operations.
-/// Transforms:
-//  %v = arith.addf %a, %b : vector<32xf32>
-/// to:
-/// %v = arith.addf %a, %b : vector<32xf32>
-/// %ev = vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32>
-/// %nv = vector.insert_map %ev, %id, 32 : vector<1xf32> into vector<32xf32>
-Optional<DistributeOps>
-distributPointwiseVectorOp(OpBuilder &builder, Operation *op,
-                           ArrayRef<Value> id, ArrayRef<int64_t> multiplicity,
-                           const AffineMap &map);
-
 /// Implements transfer op write to read forwarding and dead transfer write
 /// optimizations.
 void transferOpflowOpt(Operation *rootOp);
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -1630,81 +1630,6 @@ static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
    results.push_back(attr.cast<IntegerAttr>().getInt());
 }

-//===----------------------------------------------------------------------===//
-// ExtractMapOp
-//===----------------------------------------------------------------------===//
-
-void ExtractMapOp::build(OpBuilder &builder, OperationState &result,
-                         Value vector, ValueRange ids,
-                         ArrayRef<int64_t> multiplicity,
-                         AffineMap permutationMap) {
-  assert(ids.size() == multiplicity.size() &&
-         ids.size() == permutationMap.getNumResults());
-  assert(permutationMap.isProjectedPermutation());
-  VectorType type = vector.getType().cast<VectorType>();
-  SmallVector<int64_t, 4> newShape(type.getShape().begin(),
-                                   type.getShape().end());
-  for (unsigned i = 0, e = permutationMap.getNumResults(); i < e; i++) {
-    AffineExpr expr = permutationMap.getResult(i);
-    auto dim = expr.cast<AffineDimExpr>();
-    newShape[dim.getPosition()] = newShape[dim.getPosition()] / multiplicity[i];
-  }
-  VectorType resultType = VectorType::get(newShape, type.getElementType());
-  ExtractMapOp::build(builder, result, resultType, vector, ids);
-}
-
-LogicalResult ExtractMapOp::verify() {
-  if (getSourceVectorType().getRank() != getResultType().getRank())
-    return emitOpError("expected source and destination vectors of same rank");
-  unsigned numId = 0;
-  for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; ++i) {
-    if (getSourceVectorType().getDimSize(i) % getResultType().getDimSize(i) !=
-        0)
-      return emitOpError("source vector dimensions must be a multiple of "
-                         "destination vector dimensions");
-    if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
-      numId++;
-  }
-  if (numId != getIds().size())
-    return emitOpError("expected number of ids must match the number of "
-                       "dimensions distributed");
-  return success();
-}
-
-OpFoldResult ExtractMapOp::fold(ArrayRef<Attribute> operands) {
-  auto insert = getVector().getDefiningOp<vector::InsertMapOp>();
-  if (insert == nullptr || getType() != insert.getVector().getType() ||
-      getIds() != insert.getIds())
-    return {};
-  return insert.getVector();
-}
-
-void ExtractMapOp::getMultiplicity(SmallVectorImpl<int64_t> &multiplicity) {
-  assert(multiplicity.empty());
-  for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; i++) {
-    if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
-      multiplicity.push_back(getSourceVectorType().getDimSize(i) /
-                             getResultType().getDimSize(i));
-  }
-}
-
-template <typename MapOp>
-AffineMap calculateImplicitMap(MapOp op) {
-  SmallVector<AffineExpr, 4> perm;
-  // Check which dimension have a multiplicity greater than 1 and associated
-  // them to the IDs in order.
-  for (unsigned i = 0, e = op.getSourceVectorType().getRank(); i < e; i++) {
-    if (op.getSourceVectorType().getDimSize(i) !=
-        op.getResultType().getDimSize(i))
-      perm.push_back(getAffineDimExpr(i, op.getContext()));
-  }
-  auto map = AffineMap::get(op.getSourceVectorType().getRank(), 0, perm,
-                            op.getContext());
-  return map;
-}
-
-AffineMap ExtractMapOp::map() { return calculateImplicitMap(*this); }
-
 //===----------------------------------------------------------------------===//
 // FmaOp
 //===----------------------------------------------------------------------===//
@@ -2133,30 +2058,6 @@ OpFoldResult vector::InsertOp::fold(ArrayRef<Attribute> operands) {
  return {};
 }

-//===----------------------------------------------------------------------===//
-// InsertMapOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult InsertMapOp::verify() {
-  if (getSourceVectorType().getRank() != getResultType().getRank())
-    return emitOpError("expected source and destination vectors of same rank");
-  unsigned numId = 0;
-  for (unsigned i = 0, e = getResultType().getRank(); i < e; i++) {
-    if (getResultType().getDimSize(i) % getSourceVectorType().getDimSize(i) !=
-        0)
-      return emitOpError(
-          "destination vector size must be a multiple of source vector size");
-    if (getResultType().getDimSize(i) != getSourceVectorType().getDimSize(i))
-      numId++;
-  }
-  if (numId != getIds().size())
-    return emitOpError("expected number of ids must match the number of "
-                       "dimensions distributed");
-  return success();
-}
-
-AffineMap InsertMapOp::map() { return calculateImplicitMap(*this); }
-
 //===----------------------------------------------------------------------===//
 // InsertStridedSliceOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRVectorTransforms
  VectorTransferSplitRewritePatterns.cpp
  VectorTransferPermutationMapRewritePatterns.cpp
  VectorTransforms.cpp
-  VectorUnrollDistribute.cpp
+  VectorUnroll.cpp

  ADDITIONAL_HEADER_DIRS
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector/Transforms
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1998,37 +1998,6 @@ ContractionOpLowering::lowerReduction(vector::ContractionOp op,

 } // namespace mlir

-Optional<mlir::vector::DistributeOps> mlir::vector::distributPointwiseVectorOp(
-    OpBuilder &builder, Operation *op, ArrayRef<Value> ids,
-    ArrayRef<int64_t> multiplicity, const AffineMap &map) {
-  OpBuilder::InsertionGuard guard(builder);
-  builder.setInsertionPointAfter(op);
-  Location loc = op->getLoc();
-  if (op->getNumResults() != 1)
-    return {};
-  Value result = op->getResult(0);
-  VectorType type = op->getResult(0).getType().dyn_cast<VectorType>();
-  if (!type || map.getNumResults() != multiplicity.size())
-    return {};
-  // For each dimension being distributed check that the size is a multiple of
-  // the multiplicity. To handle more sizes we would need to support masking.
-  unsigned multiplictyCount = 0;
-  for (auto exp : map.getResults()) {
-    auto affinExp = exp.dyn_cast<AffineDimExpr>();
-    if (!affinExp || affinExp.getPosition() >= type.getRank() ||
-        type.getDimSize(affinExp.getPosition()) %
-                multiplicity[multiplictyCount++] !=
-            0)
-      return {};
-  }
-  DistributeOps ops;
-  ops.extract =
-      builder.create<vector::ExtractMapOp>(loc, result, ids, multiplicity, map);
-  ops.insert =
-      builder.create<vector::InsertMapOp>(loc, ops.extract, result, ids);
-  return ops;
-}
-
 /// Progressive lowering of transfer_read. This pattern supports lowering of
 /// `vector.transfer_read` to a combination of `vector.load` and
 /// `vector.broadcast` if all of the following hold:
--- a/mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp
@@ -538,202 +538,6 @@ private:
  vector::UnrollVectorOptions options;
 };

-/// Canonicalize an extract_map using the result of a pointwise operation.
-/// Transforms:
-/// %v = arith.addf %a, %b : vector32xf32>
-/// %dv = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
-/// to:
-/// %da = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
-/// %db = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
-/// %dv = arith.addf %da, %db : vector<1xf32>
-struct PointwiseExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
-                                PatternRewriter &rewriter) const override {
-    Operation *definedOp = extract.getVector().getDefiningOp();
-    if (!definedOp || !OpTrait::hasElementwiseMappableTraits(definedOp) ||
-        definedOp->getNumResults() != 1)
-      return failure();
-    Location loc = extract.getLoc();
-    SmallVector<Value, 4> extractOperands;
-    for (OpOperand &operand : definedOp->getOpOperands()) {
-      auto vecType = operand.get().getType().template dyn_cast<VectorType>();
-      if (!vecType) {
-        extractOperands.push_back(operand.get());
-        continue;
-      }
-      extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
-          loc,
-          VectorType::get(extract.getResultType().getShape(),
-                          vecType.getElementType()),
-          operand.get(), extract.getIds()));
-    }
-    Operation *newOp = cloneOpWithOperandsAndTypes(
-        rewriter, loc, definedOp, extractOperands, extract.getResultType());
-    rewriter.replaceOp(extract, newOp->getResult(0));
-    return success();
-  }
-};
-
-/// Canonicalize an extract_map using the result of a contract operation.
-/// This propagate the extract_map to operands.
-struct ContractExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
-                                PatternRewriter &rewriter) const override {
-    Operation *definedOp = extract.getVector().getDefiningOp();
-    auto contract = dyn_cast_or_null<vector::ContractionOp>(definedOp);
-    if (!contract)
-      return failure();
-    Location loc = contract.getLoc();
-    unsigned accIndex = vector::ContractionOp::getAccOperandIndex();
-    AffineMap affineMap = contract.getIndexingMapsArray()[accIndex];
-    // Create a map of the dimensions distributed based on the acc affine map.
-    // Only parallel dimensions are being distributed, reduction dimensions are
-    // untouched.
-    DenseMap<int64_t, int64_t> map;
-    for (unsigned i : llvm::seq(unsigned(0), affineMap.getNumResults()))
-      map[affineMap.getDimPosition(i)] = extract.getResultType().getDimSize(i);
-    SmallVector<Value, 4> extractOperands;
-    for (const auto &it : llvm::enumerate(contract.getIndexingMapsArray())) {
-      // For each operands calculate the new vector type after distribution.
-      Value operand = contract->getOperand(it.index());
-      auto vecType = operand.getType().cast<VectorType>();
-      SmallVector<int64_t> operandShape(vecType.getShape().begin(),
-                                        vecType.getShape().end());
-      for (unsigned i : llvm::seq(unsigned(0), it.value().getNumResults())) {
-        unsigned dim = it.value().getDimPosition(i);
-        auto distributedDim = map.find(dim);
-        // If the dimension is not in the map it means it is a reduction and
-        // doesn't get distributed.
-        if (distributedDim == map.end())
-          continue;
-        operandShape[i] = distributedDim->second;
-      }
-      VectorType newVecType =
-          VectorType::get(operandShape, vecType.getElementType());
-      extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
-          loc, newVecType, operand, extract.getIds()));
-    }
-    Operation *newOp =
-        cloneOpWithOperandsAndTypes(rewriter, loc, definedOp, extractOperands,
-                                    extract.getResult().getType());
-    rewriter.replaceOp(extract, newOp->getResult(0));
-    return success();
-  }
-};
-
-/// Converts TransferRead op used by ExtractMap op into a smaller dimension
-/// TransferRead.
-/// Example:
-/// ```
-/// %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0:
-///   memref<64x64x64xf32>, vector<64x4x32xf32>
-/// %e = vector.extract_map %a[%id] : vector<64x4x32xf32> to vector<2x4x1xf32>
-/// ```
-/// to:
-/// ```
-/// %id1 = affine.apply affine_map<()[s0] -> (s0 * 2)> (%id)
-/// %e = vector.transfer_read %A[%id1, %c0, %id1], %cf0 :
-///   memref<64x64x64xf32>, vector<2x4x1xf32>
-/// ```
-struct TransferReadExtractPattern
-    : public OpRewritePattern<vector::TransferReadOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::TransferReadOp read,
-                                PatternRewriter &rewriter) const override {
-    // TODO: support 0-d corner case.
-    if (read.getTransferRank() == 0)
-      return failure();
-
-    if (!read.getResult().hasOneUse())
-      return failure();
-    auto extract =
-        dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());
-    if (!extract)
-      return failure();
-    if (read.getMask())
-      return failure();
-
-    SmallVector<Value, 4> indices(read.getIndices().begin(),
-                                  read.getIndices().end());
-    AffineMap indexMap = extract.map().compose(read.getPermutationMap());
-    unsigned idCount = 0;
-    ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
-    for (auto it :
-         llvm::zip(indexMap.getResults(), extract.map().getResults())) {
-      AffineExpr d0, d1;
-      bindDims(read.getContext(), d0, d1);
-      auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
-      if (!indexExpr)
-        continue;
-      unsigned indexPos = indexExpr.getPosition();
-      unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
-      auto scale = getAffineConstantExpr(
-          extract.getResultType().getDimSize(vectorPos), read.getContext());
-      indices[indexPos] = makeComposedAffineApply(
-          rewriter, read.getLoc(), d0 + scale * d1,
-          {indices[indexPos], extract.getIds()[idCount++]});
-    }
-    Value newRead = lb.create<vector::TransferReadOp>(
-        extract.getType(), read.getSource(), indices,
-        read.getPermutationMapAttr(), read.getPadding(), read.getMask(),
-        read.getInBoundsAttr());
-    Value dest = lb.create<arith::ConstantOp>(
-        read.getType(), rewriter.getZeroAttr(read.getType()));
-    newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.getIds());
-    rewriter.replaceOp(read, newRead);
-    return success();
-  }
-};
-
-struct TransferWriteInsertPattern
-    : public OpRewritePattern<vector::TransferWriteOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(vector::TransferWriteOp write,
-                                PatternRewriter &rewriter) const override {
-    // TODO: support 0-d corner case.
-    if (write.getTransferRank() == 0)
-      return failure();
-
-    auto insert = write.getVector().getDefiningOp<vector::InsertMapOp>();
-    if (!insert)
-      return failure();
-    if (write.getMask())
-      return failure();
-    SmallVector<Value, 4> indices(write.getIndices().begin(),
-                                  write.getIndices().end());
-    AffineMap indexMap = insert.map().compose(write.getPermutationMap());
-    unsigned idCount = 0;
-    Location loc = write.getLoc();
-    for (auto it :
-         llvm::zip(indexMap.getResults(), insert.map().getResults())) {
-      AffineExpr d0, d1;
-      bindDims(write.getContext(), d0, d1);
-      auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
-      if (!indexExpr)
-        continue;
-      unsigned indexPos = indexExpr.getPosition();
-      unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
-      auto scale = getAffineConstantExpr(
-          insert.getSourceVectorType().getDimSize(vectorPos),
-          write.getContext());
-      indices[indexPos] = makeComposedAffineApply(
-          rewriter, loc, d0 + scale * d1,
-          {indices[indexPos], insert.getIds()[idCount++]});
-    }
-    rewriter.create<vector::TransferWriteOp>(
-        loc, insert.getVector(), write.getSource(), indices,
-        write.getPermutationMapAttr(), write.getInBoundsAttr());
-    rewriter.eraseOp(write);
-    return success();
-  }
-};
-
 struct UnrollReductionPattern : public OpRewritePattern<vector::ReductionOp> {
  UnrollReductionPattern(MLIRContext *context,
                         const vector::UnrollVectorOptions &options,
@@ -841,10 +645,3 @@ void mlir::vector::populateVectorUnrollPatterns(
               UnrollReductionPattern, UnrollMultiReductionPattern,
               UnrollTranposePattern>(patterns.getContext(), options, benefit);
 }
-
-void mlir::vector::populatePropagateVectorDistributionPatterns(
-    RewritePatternSet &patterns, PatternBenefit benefit) {
-  patterns.add<PointwiseExtractPattern, ContractExtractPattern,
-               TransferReadExtractPattern, TransferWriteInsertPattern>(
-      patterns.getContext(), benefit);
-}
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1470,48 +1470,6 @@ func.func @compress_memref_mismatch(%base: memref<?x?xf32>, %mask: vector<16xi1>

 // -----

-func.func @extract_map_rank(%v: vector<32xf32>, %id : index) {
-  // expected-error@+1 {{'vector.extract_map' op expected source and destination vectors of same rank}}
-  %0 = vector.extract_map %v[%id] : vector<32xf32> to vector<2x1xf32>
-}
-
-// -----
-
-func.func @extract_map_size(%v: vector<63xf32>, %id : index) {
-  // expected-error@+1 {{'vector.extract_map' op source vector dimensions must be a multiple of destination vector dimensions}}
-  %0 = vector.extract_map %v[%id] : vector<63xf32> to vector<2xf32>
-}
-
-// -----
-
-func.func @extract_map_id(%v: vector<2x32xf32>, %id : index) {
-  // expected-error@+1 {{'vector.extract_map' op expected number of ids must match the number of dimensions distributed}}
-  %0 = vector.extract_map %v[%id] : vector<2x32xf32> to vector<1x1xf32>
-}
-
-// -----
-
-func.func @insert_map_rank(%v: vector<2x1xf32>, %v1: vector<32xf32>, %id : index) {
-  // expected-error@+1 {{'vector.insert_map' op expected source and destination vectors of same rank}}
-  %0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<32xf32>
-}
-
-// -----
-
-func.func @insert_map_size(%v: vector<3xf32>, %v1: vector<64xf32>, %id : index) {
-  // expected-error@+1 {{'vector.insert_map' op destination vector size must be a multiple of source vector size}}
-  %0 = vector.insert_map %v, %v1[%id] : vector<3xf32> into vector<64xf32>
-}
-
-// -----
-
-func.func @insert_map_id(%v: vector<2x1xf32>, %v1: vector<4x32xf32>, %id : index) {
-  // expected-error@+1 {{'vector.insert_map' op expected number of ids must match the number of dimensions distributed}}
-  %0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<4x32xf32>
-}
-
-// -----
-
 func.func @scan_reduction_dim_constraint(%arg0: vector<2x3xi32>, %arg1: vector<3xi32>) -> vector<3xi32> {
  // expected-error@+1 {{'vector.scan' op reduction dimension 5 has to be less than 2}}
  %0:2 = vector.scan <add>, %arg0, %arg1 {inclusive = true, reduction_dim = 5} :
--- a/mlir/test/Dialect/Vector/ops.mlir
+++ b/mlir/test/Dialect/Vector/ops.mlir
@@ -754,21 +754,6 @@ func.func @expand_and_compress2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %
  return
 }

-// CHECK-LABEL: @extract_insert_map
-func.func @extract_insert_map(%v: vector<32xf32>, %v2: vector<16x32xf32>,
-  %id0 : index, %id1 : index) -> (vector<32xf32>, vector<16x32xf32>) {
-  // CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}}] : vector<32xf32> to vector<2xf32>
-  %vd = vector.extract_map %v[%id0] : vector<32xf32> to vector<2xf32>
-  // CHECK: %[[V1:.*]] = vector.extract_map %{{.*}}[%{{.*}}, %{{.*}}] : vector<16x32xf32> to vector<4x2xf32>
-  %vd2 = vector.extract_map %v2[%id0, %id1] : vector<16x32xf32> to vector<4x2xf32>
-  // CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}[%{{.*}}] : vector<2xf32> into vector<32xf32>
-  %r = vector.insert_map %vd, %v[%id0] : vector<2xf32> into vector<32xf32>
-  // CHECK: %[[R1:.*]] = vector.insert_map %[[V1]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x2xf32> into vector<16x32xf32>
-  %r2 = vector.insert_map %vd2, %v2[%id0, %id1] : vector<4x2xf32> into vector<16x32xf32>
-  // CHECK: return %[[R]], %[[R1]] : vector<32xf32>, vector<16x32xf32>
-  return %r, %r2 : vector<32xf32>, vector<16x32xf32>
-}
-
 // CHECK-LABEL: @multi_reduction
 func.func @multi_reduction(%0: vector<4x8x16x32xf32>, %acc0: vector<4x16xf32>,
                           %acc1: f32) -> f32 {
--- a/mlir/test/Dialect/Vector/vector-distribution.mlir
+++ b/mlir/test/Dialect/Vector/vector-distribution.mlir
@@ -1,204 +0,0 @@
-// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,1,32 -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,4 -split-input-file | FileCheck %s --check-prefix=CHECK2D
-
-// CHECK-LABEL: func @distribute_vector_add
-//  CHECK-SAME: (%[[ID:.*]]: index
-//  CHECK-NEXT:    %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<32xf32>
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
-//  CHECK-NEXT:    %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
-//  CHECK-NEXT:    return %[[INS]] : vector<32xf32>
-func.func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
-  %0 = arith.addf %A, %B : vector<32xf32>
-  return %0: vector<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @distribute_vector_add_exp
-//  CHECK-SAME: (%[[ID:.*]]: index
-//  CHECK-NEXT:    %[[EXPV:.*]] = math.exp %{{.*}} : vector<32xf32>
-//  CHECK-NEXT:    %[[ADDV:.*]] = arith.addf %[[EXPV]], %{{.*}} : vector<32xf32>
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-//  CHECK-NEXT:    %[[EXC:.*]] = math.exp %[[EXA]] : vector<1xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXC]], %[[EXB]] : vector<1xf32>
-//  CHECK-NEXT:    %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
-//  CHECK-NEXT:    return %[[INS]] : vector<32xf32>
-func.func @distribute_vector_add_exp(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
-  %C = math.exp %A : vector<32xf32>
-  %0 = arith.addf %C, %B : vector<32xf32>
-  return %0: vector<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @vector_add_read_write
-//  CHECK-SAME: (%[[ID:.*]]: index
-//       CHECK:    %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-//  CHECK-NEXT:    %[[ADD1:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
-//  CHECK-NEXT:    %[[EXC:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-//  CHECK-NEXT:    %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[EXC]] : vector<1xf32>
-//  CHECK-NEXT:    vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] {{.*}} : vector<1xf32>, memref<32xf32>
-//  CHECK-NEXT:    return
-func.func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>
-  %b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>
-  %acc = arith.addf %a, %b: vector<32xf32>
-  %c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>
-  %d = arith.addf %acc, %c: vector<32xf32>
-  vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>
-  return
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-
-//       CHECK: func @vector_add_cycle
-//  CHECK-SAME: (%[[ID:.*]]: index
-//       CHECK:    %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
-//  CHECK-NEXT:    %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2xf32>
-//  CHECK-NEXT:    %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-//  CHECK-NEXT:    vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] {{.*}} : vector<2xf32>, memref<64xf32>
-//  CHECK-NEXT:    return
-func.func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>
-  %b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>
-  %acc = arith.addf %a, %b: vector<64xf32>
-  vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>
-  return
-}
-
-// -----
-
-// Negative test to make sure nothing is done in case the vector size is not a
-// multiple of multiplicity.
-// CHECK-LABEL: func @vector_negative_test
-//       CHECK:    %[[C0:.*]] = arith.constant 0 : index
-//       CHECK:    %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<16xf32>
-//  CHECK-NEXT:    vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]]] {{.*}} : vector<16xf32>, memref<64xf32>
-//  CHECK-NEXT:    return
-func.func @vector_negative_test(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<16xf32>
-  %b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<16xf32>
-  %acc = arith.addf %a, %b: vector<16xf32>
-  vector.transfer_write %acc, %C[%c0]: vector<16xf32>, memref<64xf32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @distribute_vector_add_3d
-//  CHECK-SAME: (%[[ID0:.*]]: index, %[[ID1:.*]]: index
-//  CHECK-NEXT:    %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<64x4x32xf32>
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID0]], %[[ID1]]] : vector<2x4x1xf32> into vector<64x4x32xf32>
-//  CHECK-NEXT:    return %[[INS]] : vector<64x4x32xf32>
-func.func @distribute_vector_add_3d(%id0 : index, %id1 : index,
-  %A: vector<64x4x32xf32>, %B: vector<64x4x32xf32>) -> vector<64x4x32xf32> {
-  %0 = arith.addf %A, %B : vector<64x4x32xf32>
-  return %0: vector<64x4x32xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-
-//       CHECK: func @vector_add_transfer_3d
-//  CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
-//       CHECK:    %[[C0:.*]] = arith.constant 0 : index
-//       CHECK:    %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-//  CHECK-NEXT:    vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]], %[[C0]], %[[ID_1]]] {{.*}} : vector<2x4x1xf32>, memref<64x64x64xf32>
-//  CHECK-NEXT:    return
-func.func @vector_add_transfer_3d(%id0 : index, %id1 : index, %A: memref<64x64x64xf32>,
-  %B: memref<64x64x64xf32>, %C: memref<64x64x64xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
-  %b = vector.transfer_read %B[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
-  %acc = arith.addf %a, %b: vector<64x4x32xf32>
-  vector.transfer_write %acc, %C[%c0, %c0, %c0]: vector<64x4x32xf32>, memref<64x64x64xf32>
-  return
-}
-
-// -----
-
-#map0 = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
-// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
-// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
-
-//       CHECK: func @vector_add_transfer_permutation
-//  CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
-//       CHECK:    %[[C0:.*]] = arith.constant 0 : index
-//       CHECK:    %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-//  CHECK-NEXT:    %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[ID2]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID_0]], %[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP2]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-//  CHECK-NEXT:    %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-//  CHECK-NEXT:    vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]], %[[ID_1]], %[[C0]], %[[ID3]]] {permutation_map = #[[MAP3]]} : vector<2x4x1xf32>, memref<?x?x?x?xf32>
-//  CHECK-NEXT:    return
-func.func @vector_add_transfer_permutation(%id0 : index, %id1 : index, %A: memref<?x?x?x?xf32>,
-  %B: memref<?x?x?x?xf32>, %C: memref<?x?x?x?xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map0} : memref<?x?x?x?xf32>, vector<64x4x32xf32>
-  %b = vector.transfer_read %B[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map1}: memref<?x?x?x?xf32>, vector<64x4x32xf32>
-  %acc = arith.addf %a, %b: vector<64x4x32xf32>
-  vector.transfer_write %acc, %C[%c0, %c0, %c0, %c0] {permutation_map = #map2}: vector<64x4x32xf32>, memref<?x?x?x?xf32>
-  return
-}
-
-// -----
-
-// CHECK2D-LABEL: vector_add_contract
-//       CHECK2D:   %[[A:.+]] = vector.transfer_read %arg2[%0, %c0], %cst : memref<?x?xf32>, vector<2x4xf32>
-//       CHECK2D:   %[[B:.+]] = vector.transfer_read %arg3[%2, %c0], %cst : memref<?x?xf32>, vector<16x4xf32>
-//       CHECK2D:   %[[C:.+]] = vector.transfer_read %arg4[%4, %5], %cst : memref<?x?xf32>, vector<2x16xf32>
-//       CHECK2D:   %[[E:.+]] = vector.transfer_read %arg5[%7, %8], %cst : memref<?x?xf32>, vector<2x16xf32>
-//       CHECK2D:   %[[D:.+]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]] : vector<2x4xf32>, vector<16x4xf32> into vector<2x16xf32>
-//       CHECK2D:   %[[R:.+]] = arith.addf %[[D]], %[[E]] : vector<2x16xf32>
-//       CHECK2D:   vector.transfer_write %[[R]], {{.*}} : vector<2x16xf32>, memref<?x?xf32>
-func.func @vector_add_contract(%id0 : index, %id1 : index, %A: memref<?x?xf32>,
-  %B: memref<?x?xf32>, %C: memref<?x?xf32>, %D: memref<?x?xf32>) {
-  %c0 = arith.constant 0 : index
-  %cf0 = arith.constant 0.0 : f32
-  %a = vector.transfer_read %A[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
-  %b = vector.transfer_read %B[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
-  %c = vector.transfer_read %C[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
-  %d = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
-                                         affine_map<(d0, d1, d2) -> (d1, d2)>,
-                                         affine_map<(d0, d1, d2) -> (d0, d1)>],
-                        iterator_types = ["parallel", "parallel", "reduction"],
-                        kind = #vector.kind<add>}
-    %a, %b, %c : vector<64x4xf32>, vector<64x4xf32> into vector<64x64xf32>
-  %e = vector.transfer_read %D[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
-  %r = arith.addf %d, %e : vector<64x64xf32>
-  vector.transfer_write %r, %C[%c0, %c0] : vector<64x64xf32>, memref<?x?xf32>
-  return
-}
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -1,66 +0,0 @@
-// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void  \
-// RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
-// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
-// RUN: -entry-point-result=void \
-// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
-// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
-
-
-func.func private @printMemrefF32(memref<*xf32>)
-
-func.func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0 = memref.alloc(%arg0) : memref<?xf32>
-  scf.for %arg2 = %c0 to %arg0 step %c1 {
-    %tmp = arith.index_cast %arg2 : index to i32
-    %tmp1 = arith.sitofp %tmp : i32 to f32
-    %tmp2 = arith.addf %tmp1, %arg1 : f32
-    memref.store %tmp2, %0[%arg2] : memref<?xf32>
-  }
-  return %0 : memref<?xf32>
-}
-
-// Large vector addf that can be broken down into a loop of smaller vector addf.
-func.func @main() {
-  %cf0 = arith.constant 0.0 : f32
-  %cf1 = arith.constant 1.0 : f32
-  %cf2 = arith.constant 2.0 : f32
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c32 = arith.constant 32 : index
-  %c64 = arith.constant 64 : index
-  %out = memref.alloc(%c64) : memref<?xf32>
-  %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
-  %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
-  // Check that the tansformatio correctly happened.
-  // TRANSFORM: scf.for
-  // TRANSFORM:   vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
-  // TRANSFORM:   vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
-  // TRANSFORM:   %{{.*}} = arith.addf %{{.*}}, %{{.*}} : vector<2xf32>
-  // TRANSFORM:   vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
-  // TRANSFORM: }
-  %a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
-  %b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
-  %acc = arith.addf %a, %b: vector<64xf32>
-  vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
-  %converted = memref.cast %out : memref<?xf32> to memref<*xf32>
-  call @printMemrefF32(%converted): (memref<*xf32>) -> ()
-  // CHECK:      Unranked{{.*}}data =
-  // CHECK:      [
-  // CHECK-SAME:  3,  5,  7,  9,  11,  13,  15,  17,  19,  21,  23,  25,  27,
-  // CHECK-SAME:  29,  31,  33,  35,  37,  39,  41,  43,  45,  47,  49,  51,
-  // CHECK-SAME:  53,  55,  57,  59,  61,  63,  65,  67,  69,  71,  73,  75,
-  // CHECK-SAME:  77,  79,  81,  83,  85,  87,  89,  91,  93,  95,  97,  99,
-  // CHECK-SAME:  101,  103,  105,  107,  109,  111,  113,  115,  117,  119,
-  // CHECK-SAME:  121,  123,  125,  127,  129]
-  memref.dealloc %out : memref<?xf32>
-  memref.dealloc %in1 : memref<?xf32>
-  memref.dealloc %in2 : memref<?xf32>
-  return
-}
--- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -364,126 +364,6 @@ struct TestVectorUnrollingPatterns
      llvm::cl::init(false)};
 };

-struct TestVectorDistributePatterns
-    : public PassWrapper<TestVectorDistributePatterns,
-                         OperationPass<func::FuncOp>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorDistributePatterns)
-
-  StringRef getArgument() const final {
-    return "test-vector-distribute-patterns";
-  }
-  StringRef getDescription() const final {
-    return "Test lowering patterns to distribute vector ops in the vector "
-           "dialect";
-  }
-  TestVectorDistributePatterns() = default;
-  TestVectorDistributePatterns(const TestVectorDistributePatterns &pass)
-      : PassWrapper(pass) {}
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<VectorDialect>();
-    registry.insert<AffineDialect>();
-  }
-  ListOption<int32_t> multiplicity{
-      *this, "distribution-multiplicity",
-      llvm::cl::desc("Set the multiplicity used for distributing vector")};
-
-  void runOnOperation() override {
-    MLIRContext *ctx = &getContext();
-    RewritePatternSet patterns(ctx);
-    func::FuncOp func = getOperation();
-    func.walk([&](arith::AddFOp op) {
-      OpBuilder builder(op);
-      if (auto vecType = op.getType().dyn_cast<VectorType>()) {
-        SmallVector<int64_t, 2> mul;
-        SmallVector<AffineExpr, 2> perm;
-        SmallVector<Value, 2> ids;
-        unsigned count = 0;
-        // Remove the multiplicity of 1 and calculate the affine map based on
-        // the multiplicity.
-        SmallVector<int32_t, 4> m(multiplicity.begin(), multiplicity.end());
-        for (unsigned i = 0, e = vecType.getRank(); i < e; i++) {
-          if (i < m.size() && m[i] != 1 && vecType.getDimSize(i) % m[i] == 0) {
-            mul.push_back(m[i]);
-            ids.push_back(func.getArgument(count++));
-            perm.push_back(getAffineDimExpr(i, ctx));
-          }
-        }
-        auto map = AffineMap::get(op.getType().cast<VectorType>().getRank(), 0,
-                                  perm, ctx);
-        Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
-            builder, op.getOperation(), ids, mul, map);
-        if (ops) {
-          SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
-          op.getResult().replaceAllUsesExcept(ops->insert.getResult(),
-                                              extractOp);
-        }
-      }
-    });
-    populatePropagateVectorDistributionPatterns(patterns);
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-};
-
-struct TestVectorToLoopPatterns
-    : public PassWrapper<TestVectorToLoopPatterns,
-                         OperationPass<func::FuncOp>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorToLoopPatterns)
-
-  StringRef getArgument() const final { return "test-vector-to-forloop"; }
-  StringRef getDescription() const final {
-    return "Test lowering patterns to break up a vector op into a for loop";
-  }
-  TestVectorToLoopPatterns() = default;
-  TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass)
-      : PassWrapper(pass) {}
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<VectorDialect>();
-    registry.insert<AffineDialect>();
-  }
-  Option<int32_t> multiplicity{
-      *this, "distribution-multiplicity",
-      llvm::cl::desc("Set the multiplicity used for distributing vector"),
-      llvm::cl::init(32)};
-  void runOnOperation() override {
-    MLIRContext *ctx = &getContext();
-    RewritePatternSet patterns(ctx);
-    func::FuncOp func = getOperation();
-    func.walk([&](arith::AddFOp op) {
-      // Check that the operation type can be broken down into a loop.
-      VectorType type = op.getType().dyn_cast<VectorType>();
-      if (!type || type.getRank() != 1 ||
-          type.getNumElements() % multiplicity != 0)
-        return mlir::WalkResult::advance();
-      auto filterAlloc = [](Operation *op) {
-        return !isa<arith::ConstantOp, memref::AllocOp, func::CallOp>(op);
-      };
-      auto dependentOps = getSlice(op, filterAlloc);
-      // Create a loop and move instructions from the Op slice into the loop.
-      OpBuilder builder(op);
-      auto zero = builder.create<arith::ConstantIndexOp>(op.getLoc(), 0);
-      auto one = builder.create<arith::ConstantIndexOp>(op.getLoc(), 1);
-      auto numIter =
-          builder.create<arith::ConstantIndexOp>(op.getLoc(), multiplicity);
-      auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
-      for (Operation *it : dependentOps) {
-        it->moveBefore(forOp.getBody()->getTerminator());
-      }
-      auto map = AffineMap::getMultiDimIdentityMap(1, ctx);
-      // break up the original op and let the patterns propagate.
-      Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
-          builder, op.getOperation(), {forOp.getInductionVar()}, {multiplicity},
-          map);
-      if (ops) {
-        SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
-        op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
-      }
-      return mlir::WalkResult::interrupt();
-    });
-    populatePropagateVectorDistributionPatterns(patterns);
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-};
-
 struct TestVectorTransferUnrollingPatterns
    : public PassWrapper<TestVectorTransferUnrollingPatterns,
                         OperationPass<func::FuncOp>> {
@@ -918,10 +798,6 @@ void registerTestVectorLowerings() {

  PassRegistration<TestVectorTransferFullPartialSplitPatterns>();

-  PassRegistration<TestVectorDistributePatterns>();
-
-  PassRegistration<TestVectorToLoopPatterns>();
-
  PassRegistration<TestVectorTransferOpt>();

  PassRegistration<TestVectorTransferLoweringPatterns>();