Support composition of symbols in AffineApplyOp

This CL revisits the composition of AffineApplyOp for the special case where a symbol itself comes from an AffineApplyOp. This is achieved by rewriting such symbols into dims to allow composition to occur mathematically. The implementation is also refactored to improve readability. Rationale for locally rewriting symbols as dims: ================================================ The mathematical composition of AffineMap must always concatenate symbols because it does not have enough information to do otherwise. For example, composing `(d0)[s0] -> (d0 + s0)` with itself must produce `(d0)[s0, s1] -> (d0 + s0 + s1)`. The result is only equivalent to `(d0)[s0] -> (d0 + 2 * s0)` when applied to the same mlir::Value* for both s0 and s1. As a consequence mathematical composition of AffineMap always concatenates symbols. When AffineMaps are used in AffineApplyOp however, they may specify composition via symbols, which is ambiguous mathematically. This corner case is handled by locally rewriting such symbols that come from AffineApplyOp into dims and composing through dims. PiperOrigin-RevId: 239791597
2026-02-08 17:28:30 +08:00 · 2019-03-22 07:31:00 -07:00
parent 9e210f9884
commit 071ca8da91
6 changed files with 428 additions and 111 deletions
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@@ -23,6 +23,7 @@
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/StandardOps/Ops.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Support/Debug.h"
 using namespace mlir;
@@ -253,7 +254,7 @@ private:
  /// Helper function to insert `v` into the coordinate system of the current
  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
  /// renumbered position.
-  AffineDimExpr applyOneDim(Value *v);
+  AffineDimExpr renumberOneDim(Value *v);

  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
  /// coordinate system of the current AffineApplyNormalizer.
@@ -261,12 +262,6 @@ private:
  /// `this`.
  AffineMap renumber(const AffineApplyNormalizer &other);

-  /// Given an `app`, rewrites `app.getAffineMap()` in the coordinate system of
-  /// the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyOp &app);
-
  /// Maps of Value* to position in `affineMap`.
  DenseMap<Value *, unsigned> dimValueToPosition;

@@ -278,10 +273,10 @@ private:
  AffineMap affineMap;

  /// Used with RAII to control the depth at which AffineApply are composed
-  /// recursively. Only accepts depth 1 for now.
-  /// Note that if one wishes to compose all AffineApply in the program and
-  /// follows program order, maxdepth 1 is sufficient. This is as much as this
-  /// abstraction is willing to support for now.
+  /// recursively. Only accepts depth 1 for now to allow a behavior where a
+  /// newly composed AffineApplyOp does not increase the length of the chain of
+  /// AffineApplyOps. Full composition is implemented iteratively on top of
+  /// this behavior.
  static unsigned &affineApplyDepth() {
    static thread_local unsigned depth = 0;
    return depth;
@@ -307,7 +302,7 @@ struct SimplifyAffineApplyState : public PatternState {

 } // end anonymous namespace.

-AffineDimExpr AffineApplyNormalizer::applyOneDim(Value *v) {
+AffineDimExpr AffineApplyNormalizer::renumberOneDim(Value *v) {
  DenseMap<Value *, unsigned>::iterator iterPos;
  bool inserted = false;
  std::tie(iterPos, inserted) =
@@ -325,7 +320,7 @@ AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
    auto kvp = other.dimValueToPosition.find(v);
    if (dimRemapping.size() <= kvp->second)
      dimRemapping.resize(kvp->second + 1);
-    dimRemapping[kvp->second] = applyOneDim(kvp->first);
+    dimRemapping[kvp->second] = renumberOneDim(kvp->first);
  }
  unsigned numSymbols = concatenatedSymbols.size();
  unsigned numOtherSymbols = other.concatenatedSymbols.size();
@@ -342,63 +337,209 @@ AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
                                   dimRemapping.size(), symRemapping.size());
 }

-AffineMap AffineApplyNormalizer::renumber(const AffineApplyOp &app) {
-  assert(app.getAffineMap().getRangeSizes().empty() && "Non-empty range sizes");
-
-  // Create the AffineApplyNormalizer for the operands of this
-  // AffineApplyOp and combine it with the current AffineApplyNormalizer.
-  SmallVector<Value *, 8> operands(
-      const_cast<AffineApplyOp &>(app).getOperands().begin(),
-      const_cast<AffineApplyOp &>(app).getOperands().end());
-  AffineApplyNormalizer normalizer(app.getAffineMap(), operands);
-  return renumber(normalizer);
+// Gather the positions of the operands that are produced by an AffineApplyOp.
+static llvm::SetVector<unsigned>
+indicesFromAffineApplyOp(ArrayRef<Value *> operands) {
+  llvm::SetVector<unsigned> res;
+  for (auto en : llvm::enumerate(operands)) {
+    auto *t = en.value();
+    if (t->getDefiningInst() && t->getDefiningInst()->isa<AffineApplyOp>()) {
+      res.insert(en.index());
+    }
+  }
+  return res;
 }

+// Support the special case of a symbol coming from an AffineApplyOp that needs
+// to be composed into the current AffineApplyOp.
+// This case is handled by rewriting all such symbols into dims for the purpose
+// of allowing mathematical AffineMap composition.
+// Returns an AffineMap where symbols that come from an AffineApplyOp have been
+// rewritten as dims and are ordered after the original dims.
+// TODO(andydavis,ntv): This promotion makes AffineMap lose track of which
+// symbols are represented as dims. This loss is static but can still be
+// recovered dynamically (with `isValidSymbol`). Still this is annoying for the
+// semi-affine map case. A dynamic canonicalization of all dims that are valid
+// symbols (a.k.a `canonicalizePromotedSymbols`) into symbols helps and even
+// results in better simplifications and foldings. But we should evaluate
+// whether this behavior is what we really want after using more.
+static AffineMap promoteComposedSymbolsAsDims(AffineMap map,
+                                              ArrayRef<Value *> symbols) {
+  if (symbols.empty()) {
+    return map;
+  }
+
+  // Sanity check on symbols.
+  for (auto *sym : symbols) {
+    assert(isValidSymbol(sym) && "Expected only valid symbols");
+    (void)sym;
+  }
+
+  // Extract the symbol positions that come from an AffineApplyOp and
+  // needs to be rewritten as dims.
+  auto symPositions = indicesFromAffineApplyOp(symbols);
+  if (symPositions.empty()) {
+    return map;
+  }
+
+  // Create the new map by replacing each symbol at pos by the next new dim.
+  unsigned numDims = map.getNumDims();
+  unsigned numSymbols = map.getNumSymbols();
+  unsigned numNewDims = 0;
+  unsigned numNewSymbols = 0;
+  SmallVector<AffineExpr, 8> symReplacements(numSymbols);
+  for (unsigned i = 0; i < numSymbols; ++i) {
+    symReplacements[i] =
+        symPositions.count(i) > 0
+            ? getAffineDimExpr(numDims + numNewDims++, map.getContext())
+            : getAffineSymbolExpr(numNewSymbols++, map.getContext());
+  }
+  assert(numSymbols >= numNewDims);
+  AffineMap newMap = map.replaceDimsAndSymbols(
+      {}, symReplacements, numDims + numNewDims, numNewSymbols);
+
+  return newMap;
+}
+
+/// The AffineNormalizer composes AffineApplyOp recursively. Its purpose is to
+/// keep a correspondence between the mathematical `map` and the `operands` of
+/// a given AffineApplyOp. This correspondence is maintained by iterating over
+/// the operands and forming an `auxiliaryMap` that can be composed
+/// mathematically with `map`. To keep this correspondence in cases where
+/// symbols are produced by affine.apply operations, we perform a local rewrite
+/// of symbols as dims.
+///
+/// Rationale for locally rewriting symbols as dims:
+/// ================================================
+/// The mathematical composition of AffineMap must always concatenate symbols
+/// because it does not have enough information to do otherwise. For example,
+/// composing `(d0)[s0] -> (d0 + s0)` with itself must produce
+/// `(d0)[s0, s1] -> (d0 + s0 + s1)`.
+///
+/// The result is only equivalent to `(d0)[s0] -> (d0 + 2 * s0)` when
+/// applied to the same mlir::Value* for both s0 and s1.
+/// As a consequence mathematical composition of AffineMap always concatenates
+/// symbols.
+///
+/// When AffineMaps are used in AffineApplyOp however, they may specify
+/// composition via symbols, which is ambiguous mathematically. This corner case
+/// is handled by locally rewriting such symbols that come from AffineApplyOp
+/// into dims and composing through dims.
+/// TODO(andydavis, ntv): Composition via symbols comes at a significant code
+/// complexity. Alternatively we should investigate whether we want to
+/// explicitly disallow symbols coming from affine.apply and instead force the
+/// user to compose symbols beforehand. The annoyances may be small (i.e. 1 or 2
+/// extra API calls for such uses, which haven't popped up until now) and the
+/// benefit potentially big: simpler and more maintainable code for a
+/// non-trivial, recursive, procedure.
 AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
                                             ArrayRef<Value *> operands)
    : AffineApplyNormalizer() {
+  static_assert(kMaxAffineApplyDepth > 0, "kMaxAffineApplyDepth must be > 0");
  assert(map.getRangeSizes().empty() && "Unbounded map expected");
  assert(map.getNumInputs() == operands.size() &&
         "number of operands does not match the number of map inputs");

-  SmallVector<AffineExpr, 8> exprs;
-  for (auto en : llvm::enumerate(operands)) {
-    auto *t = en.value();
-    assert(t->getType().isIndex());
-    bool operandNotFromAffineApply =
-        !t->getDefiningInst() || !t->getDefiningInst()->isa<AffineApplyOp>();
-    if (operandNotFromAffineApply ||
-        affineApplyDepth() > kMaxAffineApplyDepth) {
-      if (en.index() < map.getNumDims()) {
-        exprs.push_back(applyOneDim(t));
+  LLVM_DEBUG(map.print(dbgs() << "\nInput map: "));
+
+  // Promote symbols that come from an AffineApplyOp to dims by rewriting the
+  // map to always refer to:
+  //   (dims, symbols coming from AffineApplyOp, other symbols).
+  // The order of operands can remain unchanged.
+  // This is a simplification that relies on 2 ordering properties:
+  //   1. rewritten symbols always appear after the original dims in the map;
+  //   2. operands are traversed in order and either dispatched to:
+  //      a. auxiliaryExprs (dims and symbols rewritten as dims);
+  //      b. concatenatedSymbols (all other symbols)
+  // This allows operand order to remain unchanged.
+  unsigned numDimsBeforeRewrite = map.getNumDims();
+  map = promoteComposedSymbolsAsDims(map,
+                                     operands.take_back(map.getNumSymbols()));
+
+  LLVM_DEBUG(map.print(dbgs() << "\nRewritten map: "));
+
+  SmallVector<AffineExpr, 8> auxiliaryExprs;
+  bool furtherCompose = (affineApplyDepth() <= kMaxAffineApplyDepth);
+  // We fully spell out the 2 cases below. In this particular instance a little
+  // code duplication greatly improves readability.
+  // Note that the first branch would disappear if we only supported full
+  // composition (i.e. infinite kMaxAffineApplyDepth).
+  if (!furtherCompose) {
+    // 1. Only dispatch dims or symbols.
+    for (auto en : llvm::enumerate(operands)) {
+      auto *t = en.value();
+      assert(t->getType().isIndex());
+      bool isDim = (en.index() < map.getNumDims());
+      if (isDim) {
+        // a. The mathematical composition of AffineMap composes dims.
+        auxiliaryExprs.push_back(renumberOneDim(t));
      } else {
-        // Composition of mathematical symbols must occur by concatenation.
-        // A subsequent canonicalization will drop duplicates. Duplicates are
-        // not dropped here because it would just amount to code duplication.
+        // b. The mathematical composition of AffineMap concatenates symbols.
+        //    We do the same for symbol operands.
        concatenatedSymbols.push_back(t);
      }
-    } else {
-      auto *inst = t->getDefiningInst();
-      auto app = inst->dyn_cast<AffineApplyOp>();
-      auto tmpMap = renumber(*app);
-      exprs.push_back(tmpMap.getResult(0));
+    }
+  } else {
+    assert(numDimsBeforeRewrite <= operands.size());
+    // 2. Compose AffineApplyOps and dispatch dims or symbols.
+    for (unsigned i = 0, e = operands.size(); i < e; ++i) {
+      auto *t = operands[i];
+      auto affineApply = t->getDefiningInst()
+                             ? t->getDefiningInst()->dyn_cast<AffineApplyOp>()
+                             : OpPointer<AffineApplyOp>();
+      if (affineApply) {
+        // a. Compose affine.apply instructions.
+        LLVM_DEBUG(affineApply->getInstruction()->print(
+            dbgs() << "\nCompose AffineApplyOp recursively: "));
+        AffineMap affineApplyMap = affineApply->getAffineMap();
+        SmallVector<Value *, 8> affineApplyOperands(
+            affineApply->getOperands().begin(),
+            affineApply->getOperands().end());
+        AffineApplyNormalizer normalizer(affineApplyMap, affineApplyOperands);
+
+        LLVM_DEBUG(normalizer.affineMap.print(
+            dbgs() << "\nRenumber into current normalizer: "));
+
+        auto renumberedMap = renumber(normalizer);
+
+        LLVM_DEBUG(
+            renumberedMap.print(dbgs() << "\nRecursive composition yields: "));
+
+        auxiliaryExprs.push_back(renumberedMap.getResult(0));
+      } else {
+        if (i < numDimsBeforeRewrite) {
+          // b. The mathematical composition of AffineMap composes dims.
+          auxiliaryExprs.push_back(renumberOneDim(t));
+        } else {
+          // c. The mathematical composition of AffineMap concatenates symbols.
+          //    We do the same for symbol operands.
+          concatenatedSymbols.push_back(t);
+        }
+      }
    }
  }

-  // Map is already composed.
-  if (exprs.empty()) {
+  // Early exit if `map` is already composed.
+  if (auxiliaryExprs.empty()) {
    affineMap = map;
    return;
  }

+  assert(concatenatedSymbols.size() >= map.getNumSymbols() &&
+         "Unexpected number of concatenated symbols");
  auto numDims = dimValueToPosition.size();
  auto numSymbols = concatenatedSymbols.size() - map.getNumSymbols();
-  auto exprsMap = AffineMap::get(numDims, numSymbols, exprs, {});
-  LLVM_DEBUG(map.print(dbgs() << "\nCompose map: "));
-  LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: "));
-  LLVM_DEBUG(map.compose(exprsMap).print(dbgs() << "\nResult: "));
+  auto auxiliaryMap = AffineMap::get(numDims, numSymbols, auxiliaryExprs, {});
+
+  LLVM_DEBUG(map.print(dbgs() << "\nCompose map: "));
+  LLVM_DEBUG(auxiliaryMap.print(dbgs() << "\nWith map: "));
+  LLVM_DEBUG(map.compose(auxiliaryMap).print(dbgs() << "\nResult: "));
+
+  // TODO(andydavis,ntv): Disabling simplification results in major speed gains.
+  // Another option is to cache the results as it is expected a lot of redundant
+  // work is performed in practice.
+  affineMap = simplifyAffineMap(map.compose(auxiliaryMap));

-  affineMap = simplifyAffineMap(map.compose(exprsMap));
  LLVM_DEBUG(affineMap.print(dbgs() << "\nSimplified result: "));
  LLVM_DEBUG(dbgs() << "\n");
 }
@@ -437,6 +578,50 @@ mlir::makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
  return b->create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
 }

+// A symbol may appear as a dim in affine.apply operations. This function
+// canonicalizes dims that are valid symbols into actual symbols.
+static void
+canonicalizePromotedSymbols(AffineMap *map,
+                            llvm::SmallVectorImpl<Value *> *operands) {
+  if (!map || operands->empty())
+    return;
+
+  assert(map->getNumInputs() == operands->size() &&
+         "map inputs must match number of operands");
+
+  auto *context = map->getContext();
+  SmallVector<Value *, 8> resultOperands;
+  resultOperands.reserve(operands->size());
+  SmallVector<Value *, 8> remappedSymbols;
+  remappedSymbols.reserve(operands->size());
+  unsigned nextDim = 0;
+  unsigned nextSym = 0;
+  unsigned oldNumSyms = map->getNumSymbols();
+  SmallVector<AffineExpr, 8> dimRemapping(map->getNumDims());
+  for (unsigned i = 0, e = map->getNumInputs(); i != e; ++i) {
+    if (i < map->getNumDims()) {
+      if (isValidSymbol((*operands)[i])) {
+        // This is a valid symbols that appears as a dim, canonicalize it.
+        dimRemapping[i] = getAffineSymbolExpr(oldNumSyms + nextSym++, context);
+        remappedSymbols.push_back((*operands)[i]);
+      } else {
+        dimRemapping[i] = getAffineDimExpr(nextDim++, context);
+        resultOperands.push_back((*operands)[i]);
+      }
+    } else {
+      resultOperands.push_back((*operands)[i]);
+    }
+  }
+
+  resultOperands.append(remappedSymbols.begin(), remappedSymbols.end());
+  *operands = resultOperands;
+  *map = map->replaceDimsAndSymbols(dimRemapping, {}, nextDim,
+                                    oldNumSyms + nextSym);
+
+  assert(map->getNumInputs() == operands->size() &&
+         "map inputs must match number of operands");
+}
+
 void mlir::canonicalizeMapAndOperands(
    AffineMap *map, llvm::SmallVectorImpl<Value *> *operands) {
  if (!map || operands->empty())
@@ -445,6 +630,8 @@ void mlir::canonicalizeMapAndOperands(
  assert(map->getNumInputs() == operands->size() &&
         "map inputs must match number of operands");

+  canonicalizePromotedSymbols(map, operands);
+
  // Check to see what dims are used.
  llvm::SmallBitVector usedDims(map->getNumDims());
  llvm::SmallBitVector usedSyms(map->getNumSymbols());
@@ -1024,7 +1211,6 @@ void mlir::extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
    ivs->push_back(forInst->getInductionVar());
 }

-
 //===----------------------------------------------------------------------===//
 // AffineIfOp
 //===----------------------------------------------------------------------===//
--- a/mlir/test/AffineOps/canonicalize.mlir
+++ b/mlir/test/AffineOps/canonicalize.mlir
@@ -23,10 +23,25 @@
 // CHECK-DAG: [[MAP13B:#map[0-9]+]] = (d0) -> ((d0 * 4 - 4) floordiv 3)

 // Affine maps for test case: arg_used_as_dim_and_symbol
-// CHECK-DAG: [[MAP14:#map[0-9]+]] = (d0, d1, d2)[s0, s1] -> (-d0 - d1 + d2 + s0 + s1)
+// CHECK-DAG: [[MAP14:#map[0-9]+]] = (d0) -> (d0)

 // Affine maps for test case: partial_fold_map
-// CHECK-DAG: [[MAP15:#map[0-9]+]] = (d0, d1) -> (d0 - d1)
+// CHECK-DAG: [[MAP15:#map[0-9]+]] = ()[s0, s1] -> (s0 - s1)
+
+// Affine maps for test cases: symbolic_composition_*
+// CHECK-DAG: [[map_symbolic_composition_a:#map[0-9]+]] = ()[s0] -> (s0 * 512)
+// CHECK-DAG: [[map_symbolic_composition_b:#map[0-9]+]] = ()[s0] -> (s0 * 4)
+// CHECK-DAG: [[map_symbolic_composition_c:#map[0-9]+]] = ()[s0, s1] -> (s0 * 3 + s1)
+// CHECK-DAG: [[map_symbolic_composition_d:#map[0-9]+]] = ()[s0, s1] -> (s1 * 3 + s0)
+
+// Affine maps for test cases: map_mix_dims_and_symbols_*
+// CHECK-DAG: [[map_mix_dims_and_symbols_b:#map[0-9]+]] = ()[s0, s1] -> (s1 + s0 * 42 + 6)
+// CHECK-DAG: [[map_mix_dims_and_symbols_c:#map[0-9]+]] = ()[s0, s1] -> (s1 * 4 + s0 * 168 - 4)
+// CHECK-DAG: [[map_mix_dims_and_symbols_d:#map[0-9]+]] = ()[s0, s1] -> ((s1 + s0 * 42 + 6) ceildiv 8)
+// CHECK-DAG: [[map_mix_dims_and_symbols_e:#map[0-9]+]] = ()[s0, s1] -> ((s1 * 4 + s0 * 168 - 4) floordiv 3)
+
+// Affine maps for test case: symbolic_semi_affine
+// CHECK-DAG: [[symbolic_semi_affine:#map[0-9]+]] = (d0)[s0] -> (d0 floordiv (s0 + 1))

 // CHECK-LABEL: func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_no_symbols() {
@@ -223,7 +238,7 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
        (%i0, %i1)[%arg1, %c9]
      %4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
        (%arg1, %c9, %3)
-      // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP14]](%arg1, %c9, %i1)[%arg1, %c9]
+      // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP14]](%i1)
      // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], %arg1{{\]}}
      %5 = load %1[%4, %arg1] : memref<100x100xf32, 1>
    }
@@ -256,12 +271,124 @@ func @partial_fold_map(%arg0: memref<index>, %arg1: index, %arg2: index) {
  %c42 = constant 42 : index
  %2 = affine.apply (d0, d1) -> (d0 - d1) (%arg1, %c42)
  store %2, %arg0[] : memref<index>
-  // CHECK: [[X:%[0-9]+]] = affine.apply [[MAP15]](%arg1, %c42)
+  // CHECK: [[X:%[0-9]+]] = affine.apply [[MAP15]]()[%arg1, %c42]
  // CHECK-NEXT: store [[X]], %arg0

  return
 }

+// CHECK-LABEL: func @symbolic_composition_a(%arg0: index, %arg1: index) -> index {
+func @symbolic_composition_a(%arg0: index, %arg1: index) -> index {
+  %0 = affine.apply (d0) -> (d0 * 4)(%arg0)
+  %1 = affine.apply ()[s0, s1] -> (8 * s0)()[%0, %arg0]
+  %2 = affine.apply ()[s0, s1] -> (16 * s1)()[%arg1, %1]
+  // CHECK: %{{.*}} = affine.apply [[map_symbolic_composition_a]]()[%arg0]
+  return %2 : index
+}
+
+// CHECK-LABEL: func @symbolic_composition_b(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+func @symbolic_composition_b(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+  %0 = affine.apply (d0) -> (d0)(%arg0)
+  %1 = affine.apply ()[s0, s1, s2, s3] -> (s0 + s1 + s2 + s3)()[%0, %0, %0, %0]
+  // CHECK: %{{.*}} = affine.apply [[map_symbolic_composition_b]]()[%arg0]
+  return %1 : index
+}
+
+// CHECK-LABEL: func @symbolic_composition_c(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+func @symbolic_composition_c(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+  %0 = affine.apply (d0) -> (d0)(%arg0)
+  %1 = affine.apply (d0) -> (d0)(%arg1)
+  %2 = affine.apply ()[s0, s1, s2, s3] -> (s0 + s1 + s2 + s3)()[%0, %0, %0, %1]
+  // CHECK: %{{.*}} = affine.apply [[map_symbolic_composition_c]]()[%arg0, %arg1]
+  return %2 : index
+}
+
+// CHECK-LABEL: func @symbolic_composition_d(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+func @symbolic_composition_d(%arg0: index, %arg1: index, %arg2: index, %arg3: index) -> index {
+  %0 = affine.apply (d0) -> (d0)(%arg0)
+  %1 = affine.apply ()[s0] -> (s0)()[%arg1]
+  %2 = affine.apply ()[s0, s1, s2, s3] -> (s0 + s1 + s2 + s3)()[%0, %0, %0, %1]
+  // CHECK: %{{.*}} = affine.apply [[map_symbolic_composition_d]]()[%arg1, %arg0]
+  return %2 : index
+}
+
+
+// CHECK-LABEL: func @mix_dims_and_symbols_b(%arg0: index, %arg1: index) -> index {
+func @mix_dims_and_symbols_b(%arg0: index, %arg1: index) -> index {
+  %a = affine.apply (d0)[s0] -> (d0 - 1 + 42 * s0) (%arg0)[%arg1]
+  %b = affine.apply (d0) -> (d0 + 7) (%a)
+  // CHECK: {{.*}} = affine.apply [[map_mix_dims_and_symbols_b]]()[%arg1, %arg0]
+
+  return %b : index
+}
+
+// CHECK-LABEL: func @mix_dims_and_symbols_c(%arg0: index, %arg1: index) -> index {
+func @mix_dims_and_symbols_c(%arg0: index, %arg1: index) -> index {
+  %a = affine.apply (d0)[s0] -> (d0 - 1 + 42 * s0) (%arg0)[%arg1]
+  %b = affine.apply (d0) -> (d0 + 7) (%a)
+  %c = affine.apply (d0) -> (d0 * 4) (%a)
+  // CHECK: {{.*}} = affine.apply [[map_mix_dims_and_symbols_c]]()[%arg1, %arg0]
+  return %c : index
+}
+
+// CHECK-LABEL: func @mix_dims_and_symbols_d(%arg0: index, %arg1: index) -> index {
+func @mix_dims_and_symbols_d(%arg0: index, %arg1: index) -> index {
+  %a = affine.apply (d0)[s0] -> (d0 - 1 + 42 * s0) (%arg0)[%arg1]
+  %b = affine.apply (d0) -> (d0 + 7) (%a)
+  %c = affine.apply (d0) -> (d0 * 4) (%a)
+  %d = affine.apply ()[s0] -> (s0 ceildiv 8) ()[%b]
+  // CHECK: {{.*}} = affine.apply [[map_mix_dims_and_symbols_d]]()[%arg1, %arg0]
+  return %d : index
+}
+
+// CHECK-LABEL: func @mix_dims_and_symbols_e(%arg0: index, %arg1: index) -> index {
+func @mix_dims_and_symbols_e(%arg0: index, %arg1: index) -> index {
+  %a = affine.apply (d0)[s0] -> (d0 - 1 + 42 * s0) (%arg0)[%arg1]
+  %b = affine.apply (d0) -> (d0 + 7) (%a)
+  %c = affine.apply (d0) -> (d0 * 4) (%a)
+  %d = affine.apply ()[s0] -> (s0 ceildiv 8) ()[%b]
+  %e = affine.apply (d0) -> (d0 floordiv 3) (%c)
+  // CHECK: {{.*}} = affine.apply [[map_mix_dims_and_symbols_e]]()[%arg1, %arg0]
+  return %e : index
+}
+
+// CHECK-LABEL: func @mix_dims_and_symbols_f(%arg0: index, %arg1: index) -> index {
+func @mix_dims_and_symbols_f(%arg0: index, %arg1: index) -> index {
+  %a = affine.apply (d0)[s0] -> (d0 - 1 + 42 * s0) (%arg0)[%arg1]
+  %b = affine.apply (d0) -> (d0 + 7) (%a)
+  %c = affine.apply (d0) -> (d0 * 4) (%a)
+  %d = affine.apply ()[s0] -> (s0 ceildiv 8) ()[%b]
+  %e = affine.apply (d0) -> (d0 floordiv 3) (%c)
+  %f = affine.apply (d0, d1)[s0, s1] -> (d0 - s1 +  d1 - s0) (%d, %e)[%e, %d]
+  // CHECK: {{.*}} = constant 0 : index
+
+  return %f : index
+}
+
+// CHECK-LABEL: func @mix_dims_and_symbols_g(%arg0: index, %arg1: index) -> (index, index, index) {
+func @mix_dims_and_symbols_g(%M: index, %N: index) -> (index, index, index) {
+  %K = affine.apply (d0) -> (4*d0) (%M)
+  %res1 = affine.apply ()[s0, s1] -> (4 * s0)()[%N, %K]
+  %res2 = affine.apply ()[s0, s1] -> (s1)()[%N, %K]
+  %res3 = affine.apply ()[s0, s1] -> (1024)()[%N, %K]
+  // CHECK-DAG: {{.*}} = constant 1024 : index
+  // CHECK-DAG: {{.*}} = affine.apply [[map_symbolic_composition_b]]()[%arg1]
+  // CHECK-DAG: {{.*}} = affine.apply [[map_symbolic_composition_b]]()[%arg0]
+  return %res1, %res2, %res3 : index, index, index
+}
+
+// CHECK-LABEL: func @symbolic_semi_affine(%arg0: index, %arg1: index, %arg2: memref<?xf32>) {
+func @symbolic_semi_affine(%M: index, %N: index, %A: memref<?xf32>) {
+  %f1 = constant 1.0 : f32
+  for %i0 = 1 to 100 {
+    %1 = affine.apply ()[s0] -> (s0 + 1) ()[%M]
+    %2 = affine.apply (d0)[s0] -> (d0 floordiv s0) (%i0)[%1]
+    // CHECK-DAG: {{.*}} = affine.apply [[symbolic_semi_affine]](%i0)[%arg0]
+    store %f1, %A[%2] : memref<?xf32>
+  }
+  return
+}
+
 // -----

 // CHECK: [[MAP0:#map[0-9]+]] = ()[s0] -> (0, s0)
@@ -294,5 +421,4 @@ func @constant_fold_bounds(%N : index) {
    "foo"(%k, %c3) : (index, index) -> ()
  }
  return
-}
-
+}
--- a/mlir/test/EDSC/api-test.cpp
+++ b/mlir/test/EDSC/api-test.cpp
@@ -165,9 +165,9 @@ TEST_FUNC(dynamic_for_func_args) {
  // clang-format off
  // CHECK-LABEL: func @dynamic_for_func_args(%arg0: index, %arg1: index) {
  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
-  // CHECK:  {{.*}} = affine.apply (d0) -> (d0 * 3)(%arg0)
-  // CHECK:  {{.*}} = affine.apply (d0, d1) -> (d0 * 3 + d1)(%arg0, %arg1)
-  // CHECK:  {{.*}} = affine.apply (d0) -> (d0 + 3)(%arg0)
+  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
+  // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
+  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
  // clang-format on
  f->print(llvm::outs());
 }
@@ -198,8 +198,8 @@ TEST_FUNC(dynamic_for) {

  // clang-format off
  // CHECK-LABEL: func @dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:        %0 = affine.apply (d0, d1) -> (d0 - d1)(%arg0, %arg1)
-  // CHECK-NEXT:   %1 = affine.apply (d0, d1) -> (d0 + d1)(%arg2, %arg3)
+  // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
+  // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
  // clang-format on
  f->print(llvm::outs());
@@ -401,33 +401,33 @@ TEST_FUNC(tile_2d) {

  // clang-format off
  // CHECK-LABEL: func @tile_2d
-  // CHECK: [[ZERO:%.*]] = constant 0 : index
-  // CHECK: [[M:%[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
-  // CHECK: [[N:%[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
-  // CHECK: [[P:%[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
-  //      CHECK:   for %i0 = (d0) -> (d0)([[ZERO]]) to (d0) -> (d0)([[M]]) step 512 {
-  // CHECK-NEXT:     for %i1 = (d0) -> (d0)([[ZERO]]) to (d0) -> (d0)([[N]]) step 1024 {
-  // CHECK-NEXT:       for %i2 = (d0) -> (d0)([[ZERO]]) to (d0) -> (d0)([[P]]) {
-  // CHECK-NEXT:         for %i3 = max (d0, d1) -> (d0, d1)([[ZERO]], %i0) to min (d0, d1) -> (d0, d1 + 512)(%0, %i0) step 16 {
-  // CHECK-NEXT:           for %i4 = max (d0, d1) -> (d0, d1)([[ZERO]], %i1) to min (d0, d1) -> (d0, d1 + 1024)(%1, %i1) step 32 {
-  // CHECK-NEXT:             for %i5 = max (d0, d1, d2) -> (d0, d1, d2)([[ZERO]], %i1, %i4) to min (d0, d1, d2) -> (d0, d1 + 1024, d2 + 32)(%1, %i1, %i4) {
-  // CHECK-NEXT:               for %i6 = max (d0, d1, d2) -> (d0, d1, d2)([[ZERO]], %i0, %i3) to min (d0, d1, d2) -> (d0, d1 + 512, d2 + 16)(%0, %i0, %i3) {
-  // CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
-  // CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
-  // CHECK-NEXT:                 {{.*}} = addf {{.*}}, {{.*}} : f32
-  // CHECK-NEXT:                 store {{.*}}, {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
-  //      CHECK:               }
-  // CHECK-NEXT:             }
-  // CHECK-NEXT:           }
-  // CHECK-NEXT:         }
-  // CHECK-NEXT:       }
-  // CHECK-NEXT:       for %i7 = (d0) -> (d0)([[ZERO]]) to (d0) -> (d0)(%2) {
-  // CHECK-NEXT:         for %i8 = max (d0, d1) -> (d0, d1)([[ZERO]], %i0) to min (d0, d1) -> (d0, d1 + 512)(%0, %i0) {
-  // CHECK-NEXT:           for %i9 = max (d0, d1) -> (d0, d1)([[ZERO]], %i1) to min (d0, d1) -> (d0, d1 + 1024)(%1, %i1) {
-  // CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
-  // CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
-  // CHECK-NEXT:             {{.*}}= addf {{.*}}, {{.*}} : f32
-  // CHECK-NEXT:             store {{.*}}, {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
+  //       CHECK: %[[ZERO:.*]] = constant 0 : index
+  //       CHECK: %[[M:[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
+  //  CHECK-NEXT: %[[N:[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
+  //  CHECK-NEXT: %[[P:[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
+  //       CHECK:   for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
+  //  CHECK-NEXT:     for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
+  //  CHECK-NEXT:       for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
+  //  CHECK-NEXT:           for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
+  //  CHECK-NEXT:             for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
+  //  CHECK-NEXT:               for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
+  //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
+  //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
+  //  CHECK-NEXT:                 {{.*}} = addf {{.*}}, {{.*}} : f32
+  //  CHECK-NEXT:                 store {{.*}}, {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
+  //       CHECK:               }
+  //  CHECK-NEXT:             }
+  //  CHECK-NEXT:           }
+  //  CHECK-NEXT:         }
+  //  CHECK-NEXT:       }
+  //  CHECK-NEXT:       for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
+  //  CHECK-NEXT:           for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
+  //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
+  //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
+  //  CHECK-NEXT:             {{.*}}= addf {{.*}}, {{.*}} : f32
+  //  CHECK-NEXT:             store {{.*}}, {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
  // clang-format on
  f->print(llvm::outs());
 }
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -81,9 +81,9 @@ TEST_FUNC(builder_dynamic_for_func_args) {
  // clang-format off
  // CHECK-LABEL: func @builder_dynamic_for_func_args(%arg0: index, %arg1: index) {
  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
-  // CHECK:  {{.*}} = affine.apply (d0) -> (d0 * 3)(%arg0)
-  // CHECK:  {{.*}} = affine.apply (d0, d1) -> (d0 * 3 + d1)(%arg0, %arg1)
-  // CHECK:  {{.*}} = affine.apply (d0) -> (d0 + 3)(%arg0)
+  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
+  // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
+  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
  // CHECK:  for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
  // CHECK:    {{.*}} = affine.apply (d0, d1) -> ((d0 + d1 * 3) floordiv 32)(%i0, %i1)
  // CHECK:    {{.*}} = affine.apply (d0, d1) -> (((d0 + d1 * 3) floordiv 32) * 31)(%i0, %i1)
@@ -117,8 +117,8 @@ TEST_FUNC(builder_dynamic_for) {

  // clang-format off
  // CHECK-LABEL: func @builder_dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:        %0 = affine.apply (d0, d1) -> (d0 - d1)(%arg0, %arg1)
-  // CHECK-NEXT:   %1 = affine.apply (d0, d1) -> (d0 + d1)(%arg2, %arg3)
+  // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
+  // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
  // clang-format on
  f->print(llvm::outs());
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -lower-vector-transfers | FileCheck %s

 // CHECK: #[[ADD:map[0-9]+]] = (d0, d1) -> (d0 + d1)
-// CHECK: #[[SUB:map[0-9]+]] = (d0) -> (d0 - 1)
+// CHECK: #[[SUB:map[0-9]+]] = ()[s0] -> (s0 - 1)

 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
@@ -74,19 +74,19 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D0]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D0]]]
  // CHECK-NEXT:                {{.*}} = select
  // CHECK-NEXT:                %[[L0:.*]] = select
  //
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D1]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D1]]]
  // CHECK-NEXT:                {{.*}} = select
  // CHECK-NEXT:                %[[L1:.*]] = select
  //
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D2]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D2]]]
  // CHECK-NEXT:                {{.*}} = select
  // CHECK-NEXT:                %[[L2:.*]] = select
  //
@@ -95,7 +95,7 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I3]], %[[I6]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I3]], %[[I6]])
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D3]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D3]]]
  // CHECK-NEXT:                {{.*}} = select
  // CHECK-NEXT:                %[[L3:.*]] = select
  //
@@ -154,7 +154,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D0]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D0]]]
  // CHECK-NEXT:                {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
  // CHECK-NEXT:                %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
  //
@@ -163,13 +163,13 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I1]], %[[I5]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I1]], %[[I5]])
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D1]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D1]]]
  // CHECK-NEXT:                {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
  // CHECK-NEXT:                %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
  //
  // CHECK-NEXT:                {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index
  // CHECK-NEXT:                {{.*}} = cmpi "slt", %[[I2]], %3 : index
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D2]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D2]]]
  // CHECK-NEXT:                {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index
  // CHECK-NEXT:                %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
  //
@@ -178,7 +178,7 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I3]], %[[I6]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I3]], %[[I6]])
-  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]](%[[D3]])
+  // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%[[D3]]]
  // CHECK-NEXT:                {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index
  // CHECK-NEXT:                %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index
  //
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@@ -542,16 +542,21 @@ func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
 // UNROLL-BY-4-NOT: for
 // UNROLL-BY-4: return

-// Commented due to b/128340045
-// xUNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor
-// func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
-//  %K = affine.apply (d0) -> (4*d0) (%M)
-//  for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
-//    "foo"() : () -> ()
-//  }
-//  return
-//}
-
+// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
+func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
+  %K = affine.apply (d0) -> (4*d0) (%M)
+  for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
+    "foo"() : () -> ()
+  }
+  // UNROLL-BY-4: for %i0 = 0 to min
+  // UNROLL-BY-4-NEXT: "foo"
+  // UNROLL-BY-4-NEXT: "foo"
+  // UNROLL-BY-4-NEXT: "foo"
+  // UNROLL-BY-4-NEXT: "foo"
+  // UNROLL-BY-4-NOT for
+  // UNROLL-BY-4: return
+  return
+}

 // UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
 func @unroll_by_one_should_promote_single_iteration_loop() {