[MLIR] Propagate input side effect information

Summary: Previously operations like std.load created methods for obtaining their effects but did not inherit from the SideEffect interfaces when their parameters were decorated with the information. The resulting situation was that passes had no information on the SideEffects of std.load/store and had to treat them more cautiously. This adds the inheritance information when creating the methods. As a side effect, many tests are modified, as they were using std.load for testing and this oepration would be folded away as part of pattern rewriting. Tests are modified to use store or to reutn the result of the std.load. Reviewers: mravishankar, antiagainst, nicolasvasilache, herhut, aartbik, ftynse! Subscribers: mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, csigg, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, bader, grosul1, frgossen, Kayjukh, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78802
2026-01-24 00:20:25 +08:00 · 2020-04-23 18:13:44 +02:00
parent 807fe05d35
commit 2d2d696137
15 changed files with 97 additions and 74 deletions
--- a/mlir/include/mlir/Interfaces/SideEffects.td
+++ b/mlir/include/mlir/Interfaces/SideEffects.td
@@ -118,6 +118,9 @@ class SideEffect<EffectOpInterfaceBase interface, string effectName,
  /// The name of the base effects class.
  string baseEffectName = interface.baseEffectName;

+  /// The parent interface that the effect belongs to.
+  string interfaceTrait = interface.trait;
+
  /// The derived effect that is being applied.
  string effect = effectName;

--- a/mlir/include/mlir/Support/LLVM.h
+++ b/mlir/include/mlir/Support/LLVM.h
@@ -49,6 +49,9 @@ class DenseMap;
 template <typename Fn> class function_ref;
 template <typename IteratorT> class iterator_range;
 template <typename T, typename ResultT> class TypeSwitch;
+class MallocAllocator;
+template <typename AllocatorTy>
+class StringSet;

 // Other common classes.
 class raw_ostream;
@@ -74,6 +77,8 @@ template <typename KeyT, typename ValueT,
 using DenseMap = llvm::DenseMap<KeyT, ValueT, KeyInfoT, BucketT>;
 template <typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>>
 using DenseSet = llvm::DenseSet<ValueT, ValueInfoT>;
+template <typename AllocatorTy = llvm::MallocAllocator>
+using StringSet = llvm::StringSet<AllocatorTy>;
 template <typename Fn> using function_ref = llvm::function_ref<Fn>;
 using llvm::iterator_range;
 using llvm::MutableArrayRef;
--- a/mlir/include/mlir/TableGen/OpClass.h
+++ b/mlir/include/mlir/TableGen/OpClass.h
@@ -26,6 +26,7 @@
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"

 #include <string>

@@ -157,7 +158,8 @@ public:

 private:
  StringRef extraClassDeclaration;
-  SmallVector<std::string, 4> traits;
+  SmallVector<std::string, 4> traitsVec;
+  StringSet<> traitsSet;
  bool hasOperandAdaptor;
 };

--- a/mlir/include/mlir/TableGen/SideEffects.h
+++ b/mlir/include/mlir/TableGen/SideEffects.h
@@ -29,6 +29,9 @@ public:
  // Return the name of the base C++ effect.
  StringRef getBaseEffectName() const;

+  // Return the name of the Interface that the effect belongs to.
+  StringRef getInterfaceTrait() const;
+
  // Return the name of the resource class.
  StringRef getResource() const;

--- a/mlir/lib/TableGen/OpClass.cpp
+++ b/mlir/lib/TableGen/OpClass.cpp
@@ -195,12 +195,15 @@ void tblgen::OpClass::setHasOperandAdaptorClass(bool has) {
  hasOperandAdaptor = has;
 }

-// Adds the given trait to this op.
-void tblgen::OpClass::addTrait(Twine trait) { traits.push_back(trait.str()); }
+void tblgen::OpClass::addTrait(Twine trait) {
+  auto traitStr = trait.str();
+  if (traitsSet.insert(traitStr).second)
+    traitsVec.push_back(std::move(traitStr));
+}

 void tblgen::OpClass::writeDeclTo(raw_ostream &os) const {
  os << "class " << className << " : public Op<" << className;
-  for (const auto &trait : traits)
+  for (const auto &trait : traitsVec)
    os << ", " << trait;
  os << "> {\npublic:\n";
  os << "  using Op::Op;\n";
--- a/mlir/lib/TableGen/SideEffects.cpp
+++ b/mlir/lib/TableGen/SideEffects.cpp
@@ -24,6 +24,10 @@ StringRef SideEffect::getBaseEffectName() const {
  return def->getValueAsString("baseEffectName");
 }

+StringRef SideEffect::getInterfaceTrait() const {
+  return def->getValueAsString("interfaceTrait");
+}
+
 StringRef SideEffect::getResource() const {
  auto value = def->getValueAsString("resource");
  return value.empty() ? "::mlir::SideEffects::DefaultResource" : value;
--- a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
@@ -2,7 +2,7 @@

 // CHECK-LABEL: @fold_static_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index
-func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) {
+func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> f32 {
  // CHECK-NOT: subview
  // CHECK: [[C2:%.*]] = constant 2 : index
  // CHECK: [[C3:%.*]] = constant 3 : index
@@ -13,12 +13,12 @@ func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : in
  // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
  %0 = subview %arg0[%arg1, %arg2][][] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
-  return
+  return %1 : f32
 }

 // CHECK-LABEL: @fold_dynamic_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index
-func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) {
+func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> f32 {
  // CHECK-NOT: subview
  // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[ARG5]] : index
  // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
@@ -27,7 +27,7 @@ func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : i
  // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
  %0 = subview %arg0[%arg1, %arg2][][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [?, ?]>
  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
-  return
+  return %1 : f32
 }

 // CHECK-LABEL: @fold_static_stride_subview_with_store
--- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
@@ -64,7 +64,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
  //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
-  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4x3xf32>
  // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
  // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
  // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
@@ -99,7 +98,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:              }
  // CHECK-NEXT:            }
  // CHECK-NEXT:          }
-  //      CHECK:          {{.*}} = load %[[VECTOR_VIEW]][] : memref<vector<5x4x3xf32>>
  // CHECK-NEXT:          dealloc %[[ALLOC]] : memref<5x4x3xf32>
  // CHECK-NEXT:        }
  // CHECK-NEXT:      }
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -54,30 +54,30 @@ func @compose_affine_maps_1dto2d_no_symbols() {
    %x1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %x0)

    // CHECK: [[I0A:%[0-9]+]] = affine.apply [[MAP0]](%{{.*}})
-    // CHECK-NEXT: load %0{{\[}}[[I0A]], [[I0A]]{{\]}}
+    // CHECK-NEXT: [[V0:%[0-9]+]] = load %0{{\[}}[[I0A]], [[I0A]]{{\]}}
    %v0 = load %0[%x1_0, %x1_1] : memref<4x4xf32>

-    // Test load[%y, %y]
+    // Test store[%y, %y]
    %y0 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
    %y1_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%y0, %y0)
    %y1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %y0)

    // CHECK-NEXT: [[I1A:%[0-9]+]] = affine.apply [[MAP1]](%{{.*}})
-    // CHECK-NEXT: load %0{{\[}}[[I1A]], [[I1A]]{{\]}}
-    %v1 = load %0[%y1_0, %y1_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I1A]], [[I1A]]{{\]}}
+    store %v0, %0[%y1_0, %y1_1] : memref<4x4xf32>

-    // Test load[%x, %y]
+    // Test store[%x, %y]
    %xy_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%x0, %y0)
    %xy_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %y0)

-    // CHECK-NEXT: load %0{{\[}}[[I0A]], [[I1A]]{{\]}}
-    %v2 = load %0[%xy_0, %xy_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I0A]], [[I1A]]{{\]}}
+    store %v0, %0[%xy_0, %xy_1] : memref<4x4xf32>

-    // Test load[%y, %x]
+    // Test store[%y, %x]
    %yx_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%y0, %x0)
    %yx_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %x0)
-    // CHECK-NEXT: load %0{{\[}}[[I1A]], [[I0A]]{{\]}}
-    %v3 = load %0[%yx_0, %yx_1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %0{{\[}}[[I1A]], [[I0A]]{{\]}}
+    store %v0, %0[%yx_0, %yx_1] : memref<4x4xf32>
  }
  return
 }
@@ -92,29 +92,29 @@ func @compose_affine_maps_1dto2d_with_symbols() {
    %x0 = affine.apply affine_map<(d0)[s0] -> (d0 - s0)> (%i0)[%c4]

    // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP4]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I0]]{{\]}}
+    // CHECK-NEXT: [[V0:%[0-9]+]] = load %{{[0-9]+}}{{\[}}[[I0]], [[I0]]{{\]}}
    %v0 = load %0[%x0, %x0] : memref<4x4xf32>

    // Test load[%x0, %x1] with symbol %c4 captured by '%x0' map.
    %x1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
    %y1 = affine.apply affine_map<(d0, d1) -> (d0+d1)> (%x0, %x1)
    // CHECK-NEXT: [[I1:%[0-9]+]] = affine.apply [[MAP7]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I1]], [[I1]]{{\]}}
-    %v1 = load %0[%y1, %y1] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I1]], [[I1]]{{\]}}
+    store %v0, %0[%y1, %y1] : memref<4x4xf32>

-    // Test load[%x1, %x0] with symbol %c4 captured by '%x0' map.
+    // Test store[%x1, %x0] with symbol %c4 captured by '%x0' map.
    %y2 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x1, %x0)
    // CHECK-NEXT: [[I2:%[0-9]+]] = affine.apply [[MAP7]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I2]], [[I2]]{{\]}}
-    %v2 = load %0[%y2, %y2] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I2]], [[I2]]{{\]}}
+    store %v0, %0[%y2, %y2] : memref<4x4xf32>

-    // Test load[%x2, %x0] with symbol %c4 from '%x0' and %c5 from '%x2'
+    // Test store[%x2, %x0] with symbol %c4 from '%x0' and %c5 from '%x2'
    %c5 = constant 5 : index
    %x2 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)> (%i0)[%c5]
    %y3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x2, %x0)
    // CHECK: [[I3:%[0-9]+]] = affine.apply [[MAP7a]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I3]], [[I3]]{{\]}}
-    %v3 = load %0[%y3, %y3] : memref<4x4xf32>
+    // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I3]], [[I3]]{{\]}}
+    store %v0, %0[%y3, %y3] : memref<4x4xf32>
  }
  return
 }
@@ -175,15 +175,15 @@ func @compose_affine_maps_dependent_loads() {
        // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP9]](%{{.*}})
        // CHECK: [[I1:%[0-9]+]] = affine.apply [[MAP4b]](%{{.*}})
        // CHECK: [[I2:%[0-9]+]] = affine.apply [[MAP10]](%{{.*}})
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
+        // CHECK-NEXT: [[V0:%[0-9]+]] = load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
        %v0 = load %0[%x00, %x01] : memref<16x32xf32>

-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I2]]{{\]}}
-        %v1 = load %0[%x00, %x02] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I0]], [[I2]]{{\]}}
+        store %v0, %0[%x00, %x02] : memref<16x32xf32>

        // Swizzle %i0, %i1
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I1]], [[I0]]{{\]}}
-        %v2 = load %0[%x01, %x00] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I1]], [[I0]]{{\]}}
+        store %v0, %0[%x01, %x00] : memref<16x32xf32>

        // Swizzle %x00, %x01 and %c3, %c7
        %x10 = affine.apply affine_map<(d0, d1)[s0, s1] -> (d0 * s1)>
@@ -193,18 +193,16 @@ func @compose_affine_maps_dependent_loads() {

        // CHECK-NEXT: [[I2A:%[0-9]+]] = affine.apply [[MAP12]](%{{.*}})
        // CHECK-NEXT: [[I2B:%[0-9]+]] = affine.apply [[MAP11]](%{{.*}})
-        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I2A]], [[I2B]]{{\]}}
-        %v3 = load %0[%x10, %x11] : memref<16x32xf32>
+        // CHECK-NEXT: store [[V0]], %{{[0-9]+}}{{\[}}[[I2A]], [[I2B]]{{\]}}
+        store %v0, %0[%x10, %x11] : memref<16x32xf32>
      }
    }
  }
  return
 }

-// CHECK-LABEL: func @compose_affine_maps_diamond_dependency() {
-func @compose_affine_maps_diamond_dependency() {
-  %0 = alloc() : memref<4x4xf32>
-
+// CHECK-LABEL: func @compose_affine_maps_diamond_dependency
+func @compose_affine_maps_diamond_dependency(%arg0: f32, %arg1: memref<4x4xf32>) {
  affine.for %i0 = 0 to 15 {
    %a = affine.apply affine_map<(d0) -> (d0 - 1)> (%i0)
    %b = affine.apply affine_map<(d0) -> (d0 + 7)> (%a)
@@ -213,15 +211,15 @@ func @compose_affine_maps_diamond_dependency() {
    %d1 = affine.apply affine_map<(d0, d1) -> (d1 floordiv 3)> (%b, %c)
    // CHECK: [[I0:%[0-9]+]] = affine.apply [[MAP13A]](%{{.*}})
    // CHECK: [[I1:%[0-9]+]] = affine.apply [[MAP13B]](%{{.*}})
-    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I1]]{{\]}}
-    %v = load %0[%d0, %d1] : memref<4x4xf32>
+    // CHECK-NEXT: store %arg0, %arg1{{\[}}[[I0]], [[I1]]{{\]}}
+    store %arg0, %arg1[%d0, %d1] : memref<4x4xf32>
  }

  return
 }

 // CHECK-LABEL: func @arg_used_as_dim_and_symbol
-func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
+func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index, %arg2: f32) {
  %c9 = constant 9 : index
  %1 = alloc() : memref<100x100xf32, 1>
  %2 = alloc() : memref<1xi32>
@@ -231,8 +229,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
        (%i0, %i1)[%arg1, %c9]
      %4 = affine.apply affine_map<(d0, d1, d3) -> (d3 - (d0 + d1))>
        (%arg1, %c9, %3)
-      // CHECK: load %{{[0-9]+}}{{\[}}%{{.*}}, %{{.*}}{{\]}}
-      %5 = load %1[%4, %arg1] : memref<100x100xf32, 1>
+      // CHECK: store %arg2, %{{[0-9]+}}{{\[}}%{{.*}}, %{{.*}}{{\]}}
+      store %arg2, %1[%4, %arg1] : memref<100x100xf32, 1>
    }
  }
  return
@@ -252,7 +250,7 @@ func @trivial_maps() {

    %3 = affine.apply affine_map<()[] -> (0)>()[]
    store %cst, %0[%3] : memref<10xf32>
-    %4 = load %0[%c0] : memref<10xf32>
+    store %2, %0[%c0] : memref<10xf32>
  }
  return
 }
--- a/mlir/test/Dialect/GPU/all-reduce-max.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir
@@ -195,7 +195,6 @@ gpu.module @kernels {
    // CHECK:   br ^bb42
    // CHECK: ^bb42:
    // CHECK:   gpu.barrier
-    // CHECK:   [[VAL_134:%.*]] = load [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
    %sum = "gpu.all_reduce"(%arg0) ({}) {op = "max"} : (f32) -> (f32)
    gpu.return
  }
--- a/mlir/test/Dialect/GPU/all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce.mlir
@@ -175,7 +175,6 @@ gpu.module @kernels {
    // CHECK:   br ^bb42
    // CHECK: ^bb42:
    // CHECK:   gpu.barrier
-    // CHECK:   [[VAL_114:%.*]] = load [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3>
    %sum = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32)
    gpu.return
  }
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -856,7 +856,6 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //   CHECKLOOP-NOT: loop.for
 //   CHECKLOOP-DAG: load %[[ARG0]][]
 //   CHECKLOOP-DAG: load %[[ARG1]][]
-//   CHECKLOOP-DAG: load %[[ARG2]][]
 //       CHECKLOOP: addf
 //       CHECKLOOP: store %{{.*}}, %[[ARG2]][]

@@ -867,6 +866,5 @@ func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>)
 //   CHECKPARALLEL-NOT: loop.for
 //   CHECKPARALLEL-DAG: load %[[ARG0]][]
 //   CHECKPARALLEL-DAG: load %[[ARG1]][]
-//   CHECKPARALLEL-DAG: load %[[ARG2]][]
 //       CHECKPARALLEL: addf
 //       CHECKPARALLEL: store %{{.*}}, %[[ARG2]][]
--- a/mlir/test/Dialect/Linalg/parallel_loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir
@@ -24,7 +24,6 @@ func @linalg_generic_sum(%lhs: memref<2x2xf32>,
 // CHECK: loop.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
 // CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
 // CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
-// CHECK:   %[[SUM_ELEM:.*]] = load %[[SUM]][%[[I]], %[[J]]]
 // CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
 // CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
 // CHECK:   loop.yield
@@ -60,4 +59,4 @@ func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
 //       CHECK:   loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
 //       CHECK:     loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
 //       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
+//       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -56,6 +56,16 @@ func @trivial_dce(%arg0: tensor<8x4xf32>) {
  return
 }

+// CHECK-LABEL: func @load_dce
+func @load_dce(%arg0: index) {
+  %c4 = constant 4 : index
+  %a = alloc(%c4) : memref<?xf32>
+  %2 = load %a[%arg0] : memref<?xf32>
+  dealloc %a: memref<?xf32>
+  // CHECK-NEXT: return
+  return
+}
+
 // CHECK-LABEL: func @addi_zero
 func @addi_zero(%arg0: i32) -> i32 {
  // CHECK-NEXT: return %arg0
@@ -648,7 +658,7 @@ func @cast_values(%arg0: tensor<*xi32>, %arg1: memref<?xi32>) -> (tensor<2xi32>,
 // CHECK-DAG: #[[VIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 7 + d1)>

 // CHECK-LABEL: func @view
-func @view(%arg0 : index) {
+func @view(%arg0 : index) -> (f32, f32, f32, f32, f32, f32) {
  // CHECK: %[[ALLOC_MEM:.*]] = alloc() : memref<2048xi8>
  %0 = alloc() : memref<2048xi8>
  %c0 = constant 0 : index
@@ -660,41 +670,41 @@ func @view(%arg0 : index) {
  // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP0]]>
  %1 = view %0[%c15][%c7, %c11]
    : memref<2048xi8> to memref<?x?xf32, #TEST_VIEW_MAP0>
-  load %1[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r0 = load %1[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>

  // Test: fold constant sizes but not offset, update map with static stride.
  // Test that we do not a fold dynamic dim which is not produced by a constant.
  // CHECK: std.view %[[ALLOC_MEM]][%arg0][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP1]]>
  %2 = view %0[%arg0][%c7, %c11]
    : memref<2048xi8> to memref<?x?xf32, #TEST_VIEW_MAP0>
-  load %2[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r1 = load %2[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>

  // Test: fold constant offset but not sizes, update map with constant offset.
  // Test that we fold constant offset but not dynamic dims.
  // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref<?x?xf32, #[[VIEW_MAP2]]>
  %3 = view %0[%c15][%arg0, %arg0]
    : memref<2048xi8> to memref<?x?xf32,  #TEST_VIEW_MAP0>
-  load %3[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>
+  %r2 = load %3[%c0, %c0] : memref<?x?xf32, #TEST_VIEW_MAP0>

  // Test: fold one constant dim, no offset, should update with constant
  // stride on dim 1, but leave dynamic stride on dim 0.
  // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32, #[[VIEW_MAP3]]>
  %4 = view %0[][%arg0, %arg0, %c7]
    : memref<2048xi8> to memref<?x?x?xf32, #TEST_VIEW_MAP1>
-  load %4[%c0, %c0, %c0] : memref<?x?x?xf32, #TEST_VIEW_MAP1>
+  %r3 = load %4[%c0, %c0, %c0] : memref<?x?x?xf32, #TEST_VIEW_MAP1>

  // Test: preserve an existing static dim size while folding a dynamic
  // dimension and offset.
  // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<7x4xf32, #[[VIEW_MAP4]]>
  %5 = view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32, #TEST_VIEW_MAP2>
-  load %5[%c0, %c0] : memref<?x4xf32, #TEST_VIEW_MAP2>
+  %r4 = load %5[%c0, %c0] : memref<?x4xf32, #TEST_VIEW_MAP2>

  // Test: folding static alloc and memref_cast into a view.
  // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<15x7xf32, #[[VIEW_MAP5]]>
  %6 = memref_cast %0 : memref<2048xi8> to memref<?xi8>
  %7 = view %6[%c15][%c7] : memref<?xi8> to memref<?x?xf32>
-  load %7[%c0, %c0] : memref<?x?xf32>
-  return
+  %r5 = load %7[%c0, %c0] : memref<?x?xf32>
+  return %r0, %r1, %r2, %r3, %r4, %r5 : f32, f32, f32, f32, f32, f32
 }

 // -----
@@ -735,7 +745,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
    : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
      memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %1[%c0, %c0, %c0] : memref<?x?x?xf32,
+  %v0 = load %1[%c0, %c0, %c0] : memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>

  // Test: subview with one dynamic operand should not be folded.
@@ -744,7 +754,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
    : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
      memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %2[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>

  // CHECK: %[[ALLOC1:.*]] = alloc(%[[ARG0]])
@@ -755,7 +765,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
    : memref<?x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
      memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %4[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>

  // Test: subview offset operands are folded correctly w.r.t. base strides.
@@ -764,7 +774,7 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
    : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
      memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %5[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>

  // Test: subview stride operands are folded correctly w.r.t. base strides.
@@ -773,40 +783,40 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
    : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to
      memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>
-  load %6[%c0, %c0, %c0] : memref<?x?x?xf32,
+  store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32,
       affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>>

  // Test: subview shape are folded, but offsets and strides are not even if base memref is static
  // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<7x11x2xf32, #[[SUBVIEW_MAP3]]>
  %10 = subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] : memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %10[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %10[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // Test: subview strides are folded, but offsets and shape are not even if base memref is static
  // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<?x?x?xf32, #[[SUBVIEW_MAP4]]
  %11 = subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] : memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %11[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %11[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // Test: subview offsets are folded, but strides and shape are not even if base memref is static
  // CHECK: subview %[[ALLOC0]][] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<?x?x?xf32, #[[SUBVIEW_MAP5]]
  %13 = subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :  memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %13[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %13[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // CHECK: %[[ALLOC2:.*]] = alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
  %14 = alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
  // Test: subview shape are folded, even if base memref is not static
  // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<?x?x?xf32> to memref<7x11x2xf32, #[[SUBVIEW_MAP3]]>
  %15 = subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // TEST: subview strides are not folded when the base memref is not static
  // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[C2]], %[[C2]], %[[C2]]] : memref<?x?x?xf32> to memref<?x?x?xf32, #[[SUBVIEW_MAP3]]
  %16 = subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // TEST: subview offsets are not folded when the base memref is not static
  // CHECK: subview %[[ALLOC2]][%[[C1]], %[[C1]], %[[C1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] : memref<?x?x?xf32> to memref<?x?x?xf32, #[[SUBVIEW_MAP3]]
  %17 = subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] : memref<?x?x?xf32> to memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  load %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>

  // CHECK: %[[ALLOC3:.*]] = alloc() : memref<12x4xf32>
  %18 = alloc() : memref<12x4xf32>
@@ -815,12 +825,12 @@ func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
  // TEST: subview strides are maintained when sizes are folded
  // CHECK: subview %[[ALLOC3]][%arg1, %arg1] [] [] : memref<12x4xf32> to memref<2x4xf32, #[[SUBVIEW_MAP6]]>
  %19 = subview %18[%arg1, %arg1] [%c2, %c4] [] : memref<12x4xf32> to memref<?x?xf32, offset: ?, strides:[4, 1]>
-  load %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
+  store %v0, %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>

  // TEST: subview strides and sizes are maintained when offsets are folded
  // CHECK: subview %[[ALLOC3]][] [] [] : memref<12x4xf32> to memref<12x4xf32, #[[SUBVIEW_MAP7]]>
  %20 = subview %18[%c2, %c4] [] [] : memref<12x4xf32> to memref<12x4xf32, offset: ?, strides:[4, 1]>
-  load %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
+  store %v0, %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>

  // Test: dim on subview is rewritten to size operand.
  %7 = dim %4, 0 : memref<?x?x?xf32,
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -1286,9 +1286,11 @@ void OpEmitter::genSideEffectInterfaceMethods() {
  auto resolveDecorators = [&](Operator::var_decorator_range decorators,
                               unsigned index, unsigned kind) {
    for (auto decorator : decorators)
-      if (SideEffect *effect = dyn_cast<SideEffect>(&decorator))
+      if (SideEffect *effect = dyn_cast<SideEffect>(&decorator)) {
+        opClass.addTrait(effect->getInterfaceTrait());
        interfaceEffects[effect->getBaseEffectName()].push_back(
            EffectLocation{*effect, index, kind});
+      }
  };

  // Collect effects that were specified via: