mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
[MLIR][Affine] Generalize/fix mdg init for region-holding ops with known control flow (#71754)
Generalize/fix mdg init for region-holding ops with well-defined control flow. Use the memory effect interface to determine when to create a node. While on this, remove the special treatment there for call ops. This allows fusion of affine nests even in the presence of scf region ops elsewhere in the Block. Previously, even a single scf.for/if/while op in a block would have made fusion bail out on all affine fusion that was possible. Addressed.
This commit is contained in:
@@ -152,28 +152,28 @@ bool MemRefDependenceGraph::init() {
|
||||
auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
|
||||
memrefAccesses[memref].insert(node.id);
|
||||
nodes.insert({node.id, node});
|
||||
} else if (op.getNumRegions() != 0) {
|
||||
// Return false if another region is found (not currently supported).
|
||||
return false;
|
||||
} else if (op.getNumResults() > 0 && !op.use_empty()) {
|
||||
// Create graph node for top-level producer of SSA values, which
|
||||
// could be used by loop nest nodes.
|
||||
Node node(nextNodeId++, &op);
|
||||
nodes.insert({node.id, node});
|
||||
} else if (isa<CallOpInterface>(op)) {
|
||||
// Create graph node for top-level Call Op that takes any argument of
|
||||
// memref type. Call Op that returns one or more memref type results
|
||||
// is already taken care of, by the previous conditions.
|
||||
if (llvm::any_of(op.getOperandTypes(),
|
||||
[&](Type t) { return isa<MemRefType>(t); })) {
|
||||
Node node(nextNodeId++, &op);
|
||||
nodes.insert({node.id, node});
|
||||
}
|
||||
} else if (hasEffect<MemoryEffects::Write, MemoryEffects::Free>(&op)) {
|
||||
// Create graph node for top-level op, which could have a memory write
|
||||
// side effect.
|
||||
} else if (!isMemoryEffectFree(&op) &&
|
||||
(op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
|
||||
// Create graph node for top-level op unless it is known to be
|
||||
// memory-effect free. This covers all unknown/unregistered ops,
|
||||
// non-affine ops with memory effects, and region-holding ops with a
|
||||
// well-defined control flow. During the fusion validity checks, we look
|
||||
// for non-affine ops on the path from source to destination, at which
|
||||
// point we check which memrefs if any are used in the region.
|
||||
Node node(nextNodeId++, &op);
|
||||
nodes.insert({node.id, node});
|
||||
} else if (op.getNumRegions() != 0) {
|
||||
// Return false if non-handled/unknown region-holding ops are found. We
|
||||
// won't know what such ops do or what its regions mean; for e.g., it may
|
||||
// not be an imperative op.
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "MDG init failed; unknown region-holding op found!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
51
mlir/test/Dialect/Affine/loop-fusion-scf-mixed.mlir
Normal file
51
mlir/test/Dialect/Affine/loop-fusion-scf-mixed.mlir
Normal file
@@ -0,0 +1,51 @@
|
||||
// RUN: mlir-opt -pass-pipeline='builtin.module(func.func(affine-loop-fusion))' %s | FileCheck %s
|
||||
|
||||
// Test fusion of affine nests in the presence of other region-holding ops
|
||||
// (scf.for in the test case below) in the block.
|
||||
|
||||
// CHECK-LABEL: func @scf_and_affine
|
||||
func.func @scf_and_affine(%A : memref<10xf32>) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%c1 = arith.constant 1 : index
|
||||
%c10 = arith.constant 10 : index
|
||||
%cst = arith.constant 0.0 : f32
|
||||
|
||||
%B = memref.alloc() : memref<10xf32>
|
||||
%C = memref.alloc() : memref<10xf32>
|
||||
|
||||
affine.for %j = 0 to 10 {
|
||||
%v = affine.load %A[%j] : memref<10xf32>
|
||||
affine.store %v, %B[%j] : memref<10xf32>
|
||||
}
|
||||
|
||||
affine.for %j = 0 to 10 {
|
||||
%v = affine.load %B[%j] : memref<10xf32>
|
||||
affine.store %v, %C[%j] : memref<10xf32>
|
||||
}
|
||||
// Nests are fused.
|
||||
// CHECK: affine.for %{{.*}} = 0 to 10
|
||||
// CHECK-NOT: affine.for
|
||||
// CHECK: scf.for
|
||||
|
||||
scf.for %i = %c0 to %c10 step %c1 {
|
||||
memref.store %cst, %B[%i] : memref<10xf32>
|
||||
}
|
||||
|
||||
// The nests below shouldn't be fused.
|
||||
affine.for %j = 0 to 10 {
|
||||
%v = affine.load %A[%j] : memref<10xf32>
|
||||
affine.store %v, %B[%j] : memref<10xf32>
|
||||
}
|
||||
scf.for %i = %c0 to %c10 step %c1 {
|
||||
memref.store %cst, %B[%i] : memref<10xf32>
|
||||
}
|
||||
affine.for %j = 0 to 10 {
|
||||
%v = affine.load %B[%j] : memref<10xf32>
|
||||
affine.store %v, %C[%j] : memref<10xf32>
|
||||
}
|
||||
// CHECK: affine.for
|
||||
// CHECK: scf.for
|
||||
// CHECK: affine.for
|
||||
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user