mirror of
https://github.com/intel/llvm.git
synced 2026-01-18 07:57:36 +08:00
[mlir] cleanup of structured.tile* transform ops (#67320)
Rename and restructure tiling-related transform ops from the structured extension to be more homogeneous. In particular, all ops now follow a consistent naming scheme: - `transform.structured.tile_using_for`; - `transform.structured.tile_using_forall`; - `transform.structured.tile_reduction_using_for`; - `transform.structured.tile_reduction_using_forall`. This drops the "_op" naming artifact from `tile_to_forall_op` that shouldn't have been included in the first place, consistently specifies the name of the control flow op to be produced for loops (instead of `tile_reduction_using_scf` since `scf.forall` also belongs to `scf`), and opts for the `using` connector to avoid ambiguity. The loops produced by tiling are now systematically placed as *trailing* results of the transform op. While this required changing 3 out of 4 ops (except for `tile_using_for`), this is the only choice that makes sense when producing multiple `scf.for` ops that can be associated with a variadic number of handles. This choice is also most consistent with *other* transform ops from the structured extension, in particular with fusion ops, that produce the structured op as the leading result and the loop as the trailing result.
This commit is contained in:
committed by
GitHub
parent
9f276d4ddd
commit
96ff0255f2
@@ -119,13 +119,13 @@ transform.sequence failures(propagate) {
|
||||
%arg1: !transform.op<"linalg.matmul">,
|
||||
%arg2: !transform.op<"linalg.elemwise_binary">):
|
||||
// The actual tiling transformation takes tile sizes as attributes.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
|
||||
: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
transform.yield
|
||||
}
|
||||
```
|
||||
|
||||
The transformation returns two handles, as indicated in its [documentation](https://mlir.llvm.org/docs/Dialects/Transform/#transformstructuredtile_to_forall_op-transformtiletoforallop):
|
||||
The transformation returns two handles, as indicated in its [documentation](https://mlir.llvm.org/docs/Dialects/Transform/#transformstructuredtile_using_forall-transformtiletoforallop):
|
||||
|
||||
* A handle to the `scf.forall` “multi-for” loop around tensors.
|
||||
* A handle to `linalg.generic` operating on the subset of the original data.
|
||||
@@ -176,7 +176,7 @@ transform.sequence failures(propagate) {
|
||||
%arg1: !transform.op<"linalg.matmul">,
|
||||
%arg2: !transform.op<"linalg.elemwise_binary">):
|
||||
// The actual tiling transformation takes tile sizes as attributes.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
|
||||
: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// This is trying to use an invalidated handle leading to undefined behavior.
|
||||
@@ -203,7 +203,7 @@ matmul.mlir:26:9: note: handle to invalidated ops
|
||||
%mm = transform.cast %matmul : !transform.op<"linalg.matmul"> to !transform.any_op
|
||||
^
|
||||
matmul.mlir:27:19: note: invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %mm tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %mm tile_sizes [4, 32]
|
||||
```
|
||||
|
||||
One may observe that some operations such as `transform.cast` do not consume the operand (because they don’t erase the corresponding operation). So what would happen if we tried to use that operand instead?
|
||||
@@ -219,7 +219,7 @@ transform.sequence failures(propagate) {
|
||||
to !transform.any_op
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
|
||||
: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Consuming an operand invalidates the consumed handle and any other handle that is
|
||||
@@ -240,7 +240,7 @@ matmul.mlir:21:29: note: handle to invalidated ops
|
||||
^bb0(%root: !transform.any_op, %matmul: !transform.op<"linalg.matmul">, %elemwise: !transform.op<"linalg.elemwise_binary">):
|
||||
^
|
||||
matmul.mlir:27:19: note: invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %mm tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %mm tile_sizes [4, 32]
|
||||
```
|
||||
|
||||
## Chaining Transformations with Handles
|
||||
@@ -262,7 +262,7 @@ transform.sequence failures(propagate) {
|
||||
// The actual tiling transformation takes tile sizes as attributes. It
|
||||
// produces a handle to the loop generated during tiling.
|
||||
%loop, %tiled_max =
|
||||
transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -304,7 +304,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It
|
||||
// produces a handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -328,7 +328,7 @@ transform.sequence failures(propagate) {
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_2, %tiled_2 =
|
||||
transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2, %loop_3 =
|
||||
transform.structured.fuse_into_containing_op %matmul_fused into %loop_2
|
||||
@@ -339,7 +339,7 @@ transform.sequence failures(propagate) {
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%outline_target, %_ =
|
||||
transform.structured.tile_to_forall_op %tiled_2 tile_sizes [1]
|
||||
transform.structured.tile_using_forall %tiled_2 tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.fuse_into_containing_op %matmul_fused_2
|
||||
into %outline_target
|
||||
@@ -361,7 +361,7 @@ test/Examples/transform/Ch1/invalidation-2.mlir:109:3: error: op uses a handle i
|
||||
transform.test_print_remark_at_operand %outline_target, "outlined loop" : !transform.any_op
|
||||
^
|
||||
test/Examples/transform/Ch1/invalidation-2.mlir:102:25: note: handle to invalidated ops
|
||||
%outline_target, %_ = transform.structured.tile_to_forall_op %tiled_2 tile_sizes [1]
|
||||
%outline_target, %_ = transform.structured.tile_using_forall %tiled_2 tile_sizes [1]
|
||||
^
|
||||
test/Examples/transform/Ch1/invalidation-2.mlir:106:18: note: invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them
|
||||
%func, %call = transform.loop.outline %outline_target {func_name = "outlined"}
|
||||
|
||||
@@ -292,7 +292,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It produces a
|
||||
// handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -311,7 +311,7 @@ transform.sequence failures(propagate) {
|
||||
// "max" operation. This illustrates the precise targeting with the transform
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_2, %tiled_2 = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
%loop_2, %tiled_2 = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2 = transform.structured.fuse_into_containing_op %matmul_fused into %loop_2
|
||||
: (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
@@ -319,7 +319,7 @@ transform.sequence failures(propagate) {
|
||||
// Since outlining is currently only implemented for region-holding operations
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%outline_target, %_ = transform.structured.tile_to_forall_op %tiled_2 tile_sizes [1]
|
||||
%outline_target, %_ = transform.structured.tile_using_forall %tiled_2 tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.fuse_into_containing_op %matmul_fused_2 into %outline_target
|
||||
: (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
|
||||
@@ -218,7 +218,7 @@ Linalg as described below.
|
||||
the inner loop having at most the given number of iterations. This can be
|
||||
understood as loop _strip-mining_ or a degenerate case of tiling a single
|
||||
dimension using any of `linalg.tile_` transform ops. We will be using
|
||||
`transform.structured.tile_to_forall_op` as this kind of loop is best
|
||||
`transform.structured.tile_using_forall` as this kind of loop is best
|
||||
supported by bufferization and can also be turned into a parallel loop later
|
||||
on. Unlike Halide, this doesn’t add new dimensions to the original
|
||||
operation, but rather creates a loop around it and rewrites the operation
|
||||
@@ -275,9 +275,9 @@ The remaining dimensions can be materialized as loops in one transformation.
|
||||
|
||||
```mlir
|
||||
// [n y x c]
|
||||
%co, %relu2 = transform.structured.tile_to_forall_op %relu
|
||||
%co, %relu2 = transform.structured.tile_using_forall %relu
|
||||
tile_sizes [0, 0, 0, 64]
|
||||
%n_y_xo, %relu3 = transform.structured.tile_to_forall_op %relu2
|
||||
%n_y_xo, %relu3 = transform.structured.tile_using_forall %relu2
|
||||
tile_sizes [1, 1, 5, 0]
|
||||
```
|
||||
|
||||
@@ -355,7 +355,7 @@ more than one dimension at the moment of writing.)
|
||||
|
||||
```mlir
|
||||
%rz_ry_rx, %red_fill, %conv4, %comb
|
||||
= transform.structured.tile_reduction_using_scf %conv3
|
||||
= transform.structured.tile_reduction_using_for %conv3
|
||||
// n y x c rz ry rx
|
||||
by tile_sizes=[0, 0, 0, 0, 1, 1, 1]
|
||||
```
|
||||
@@ -386,10 +386,10 @@ dimension:
|
||||
|
||||
```mlir
|
||||
// n y xi ci
|
||||
%1, %c5 = transform.structured.tile_to_forall_op %conv4 tile_sizes [0, 0, 1, 16]
|
||||
%2, %b4 = transform.structured.tile_to_forall_op %bias3 tile_sizes [0, 0, 1, 16]
|
||||
%3, %r4 = transform.structured.tile_to_forall_op %relu3 tile_sizes [0, 0, 1, 16]
|
||||
%4, %c2 = transform.structured.tile_to_forall_op %comb tile_sizes [0, 0, 1, 16]
|
||||
%1, %c5 = transform.structured.tile_using_forall %conv4 tile_sizes [0, 0, 1, 16]
|
||||
%2, %b4 = transform.structured.tile_using_forall %bias3 tile_sizes [0, 0, 1, 16]
|
||||
%3, %r4 = transform.structured.tile_using_forall %relu3 tile_sizes [0, 0, 1, 16]
|
||||
%4, %c2 = transform.structured.tile_using_forall %comb tile_sizes [0, 0, 1, 16]
|
||||
```
|
||||
|
||||
Note that the combiner operation produced by reduction tiling is also tiled here.
|
||||
@@ -638,7 +638,7 @@ bufferization invalidates all loop handles including to loops that we are
|
||||
willing to unroll. This hurdle can be overcome by matching the payload IR
|
||||
operations after bufferization to produce new handles. We will first change the
|
||||
kind of loops produced in the schedule from `scf.for` to `scf.forall` to have
|
||||
less operations to match by using `transform.structured.tile_to_forall_op`
|
||||
less operations to match by using `transform.structured.tile_using_forall`
|
||||
instead of `transform.structured.tile` when tiling with sizes `[0, 0, 1, 16]`.
|
||||
Then we can match all `scf.forall` operations in the payload IR and transform
|
||||
them into single-iterator `scf.for` loops _after bufferization_.
|
||||
|
||||
@@ -618,10 +618,10 @@ def MultiTileSizesOp : Op<Transform_Dialect, "structured.multitile_sizes",
|
||||
!transform.param<i64>, !transform.param<i64>
|
||||
%low, %high = structured.split %target after %split { dimension = 1 }
|
||||
: !transform.any_op, !transform.param<i64>
|
||||
%tiled_low, %loop1 = structured.tile %low [0, %sz1]
|
||||
%tiled_low, %loop1 = structured.tile_using_for %low [0, %sz1]
|
||||
: (!transform.any_op, !transform.param<i64>)
|
||||
-> (!transform.any_op, !transform.any_op)
|
||||
%tiled_high, %loop2 = structured.tile %high [0, %sz2]
|
||||
%tiled_high, %loop2 = structured.tile_using_for %high [0, %sz2]
|
||||
: (!transform.any_op, !transform.param<i64>)
|
||||
-> (!transform.any_op, !transform.any_op)
|
||||
%common = merge_handles %tiled_low, %tiled_high : !transform.any_op
|
||||
@@ -1514,10 +1514,10 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileReductionUsingScfOp
|
||||
// TileReductionUsingForOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_using_scf",
|
||||
def TileReductionUsingForOp : Op<Transform_Dialect, "structured.tile_reduction_using_for",
|
||||
[FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
|
||||
TransformEachOpTrait, TransformOpInterface,
|
||||
ReportTrackingListenerFailuresOpTrait]> {
|
||||
@@ -1536,11 +1536,11 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
|
||||
|
||||
#### Return modes
|
||||
|
||||
This 4 returned handles point to:
|
||||
- the parent for op,
|
||||
Returns 4 handles associated with (in order):
|
||||
- the fill op used to initialize the neutral element,
|
||||
- the parallel tiled op and
|
||||
- the result-combining op.
|
||||
- the result-combining op,
|
||||
- the parent `for` op.
|
||||
|
||||
#### Example:
|
||||
|
||||
@@ -1590,13 +1590,13 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
|
||||
```
|
||||
}];
|
||||
|
||||
// TODO: support mixed static-dynamic (see TileToForallOp).
|
||||
// TODO: support mixed static-dynamic (see TileUsingForallOp).
|
||||
let arguments = (ins TransformHandleTypeInterface:$target,
|
||||
DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes);
|
||||
let results = (outs TransformHandleTypeInterface:$for_op,
|
||||
TransformHandleTypeInterface:$fill_op,
|
||||
let results = (outs TransformHandleTypeInterface:$fill_op,
|
||||
TransformHandleTypeInterface:$split_linalg_op,
|
||||
TransformHandleTypeInterface:$combining_linalg_op);
|
||||
TransformHandleTypeInterface:$combining_linalg_op,
|
||||
TransformHandleTypeInterface:$for_op);
|
||||
|
||||
let builders = [
|
||||
OpBuilder<(ins "Value":$target,
|
||||
@@ -1644,11 +1644,11 @@ def TileReductionUsingForallOp :
|
||||
|
||||
#### Return modes
|
||||
|
||||
This 4 returned handles point to:
|
||||
- the parent forall op,
|
||||
Returns 4 handles associated with (in order):
|
||||
- the fill op used to initialize the neutral element,
|
||||
- the parallel tiled op and
|
||||
- the result-combining op.
|
||||
- the result-combining op,
|
||||
- the parent `forall` op.
|
||||
|
||||
#### Example:
|
||||
|
||||
@@ -1694,15 +1694,15 @@ def TileReductionUsingForallOp :
|
||||
```
|
||||
}];
|
||||
|
||||
// TODO: support mixed static-dynamic (see TileToForallOp).
|
||||
// TODO: support mixed static-dynamic (see TileUsingForallOp).
|
||||
let arguments = (ins TransformHandleTypeInterface:$target,
|
||||
DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$num_threads,
|
||||
DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$tile_sizes,
|
||||
OptionalAttr<DeviceMappingArrayAttr>:$mapping);
|
||||
let results = (outs TransformHandleTypeInterface:$forall_op,
|
||||
TransformHandleTypeInterface:$fill_op,
|
||||
let results = (outs TransformHandleTypeInterface:$fill_op,
|
||||
TransformHandleTypeInterface:$split_linalg_op,
|
||||
TransformHandleTypeInterface:$combining_linalg_op);
|
||||
TransformHandleTypeInterface:$combining_linalg_op,
|
||||
TransformHandleTypeInterface:$forall_op);
|
||||
|
||||
let builders = [
|
||||
OpBuilder<(ins "Value":$target,
|
||||
@@ -1732,10 +1732,10 @@ def TileReductionUsingForallOp :
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileOp
|
||||
// TileUsingForOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TileOp : Op<Transform_Dialect, "structured.tile",
|
||||
def TileUsingForOp : Op<Transform_Dialect, "structured.tile_using_for",
|
||||
[DeclareOpInterfaceMethods<TransformOpInterface>,
|
||||
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
|
||||
ReportTrackingListenerFailuresOpTrait]> {
|
||||
@@ -1820,11 +1820,11 @@ def TileOp : Op<Transform_Dialect, "structured.tile",
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileToForallOp
|
||||
// TileUsingForallOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TileToForallOp :
|
||||
Op<Transform_Dialect, "structured.tile_to_forall_op",
|
||||
def TileUsingForallOp :
|
||||
Op<Transform_Dialect, "structured.tile_using_forall",
|
||||
[AttrSizedOperandSegments,
|
||||
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
|
||||
TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> {
|
||||
@@ -1834,9 +1834,9 @@ def TileToForallOp :
|
||||
Tiling is applied by either specifying `num_threads` or `tile_size`. If
|
||||
`num_threads` is specified, then the tile size for each dimension `i` is
|
||||
calculated dynamically via `ceilDiv(dimSize[i], num_threads[i])`.
|
||||
`num_threads` and `tile_size` can be either static index attributes or SSA
|
||||
values of PDL operation handle type (or a mix thereof). Operation handles
|
||||
must be mapped to exactly one op that has exactly one result of index type.
|
||||
`num_threads` and `tile_size` can be either static index attributes or
|
||||
operation handles (or a mix thereof). Operation handles must be mapped to
|
||||
exactly one op that has exactly one result of index type.
|
||||
|
||||
Static zero tile sizes indicate that the dimension is not tiled and can be
|
||||
thought of as tiling by the full size of data.
|
||||
@@ -1872,7 +1872,7 @@ def TileToForallOp :
|
||||
|
||||
```
|
||||
%0 = pdl_match @match_matmul in %arg1
|
||||
%3:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20]
|
||||
%3:2 = transform.structured.tile_using_forall %0 num_threads [10, 20]
|
||||
```
|
||||
|
||||
#### Example using `tile_sizes`
|
||||
@@ -1880,7 +1880,7 @@ def TileToForallOp :
|
||||
```
|
||||
%0 = pdl_match @match_matmul in %arg1
|
||||
%sz = pdl_match @match_size_op in %arg1
|
||||
%3:2 = transform.structured.tile_to_forall_op %0 tile_sizes [0, %sz, 20]
|
||||
%3:2 = transform.structured.tile_using_forall %0 tile_sizes [0, %sz, 20]
|
||||
```
|
||||
}];
|
||||
|
||||
@@ -1892,8 +1892,8 @@ def TileToForallOp :
|
||||
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_num_threads,
|
||||
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_tile_sizes,
|
||||
OptionalAttr<DeviceMappingArrayAttr>:$mapping);
|
||||
let results = (outs TransformHandleTypeInterface:$forall_op,
|
||||
TransformHandleTypeInterface:$tiled_op);
|
||||
let results = (outs TransformHandleTypeInterface:$tiled_op,
|
||||
TransformHandleTypeInterface:$forall_op);
|
||||
|
||||
let builders = [
|
||||
OpBuilder<(ins "Value":$target,
|
||||
|
||||
@@ -2302,17 +2302,17 @@ DiagnosedSilenceableFailure transform::SplitReductionOp::applyToOne(
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileReductionUsingScfOp
|
||||
// TileReductionUsingForOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void transform::TileReductionUsingScfOp::build(
|
||||
void transform::TileReductionUsingForOp::build(
|
||||
OpBuilder &builder, OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticTileSizes) {
|
||||
// Call the default builder.
|
||||
// This is future-proof re mixed static-dynamic and setting up the proper
|
||||
// operands segment sizes attributes for multiple variadic operands.
|
||||
// In the absence of this, horrible bugs ensue.
|
||||
// TODO: support mixed static-dynamic (see TileToForallOp).
|
||||
// TODO: support mixed static-dynamic (see TileUsingForallOp).
|
||||
MLIRContext *ctx = builder.getContext();
|
||||
auto opTy = transform::AnyOpType::get(ctx);
|
||||
auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);
|
||||
@@ -2322,7 +2322,7 @@ void transform::TileReductionUsingScfOp::build(
|
||||
/*tile_sizes=*/staticTileSizesAttr);
|
||||
}
|
||||
|
||||
DiagnosedSilenceableFailure transform::TileReductionUsingScfOp::applyToOne(
|
||||
DiagnosedSilenceableFailure transform::TileReductionUsingForOp::applyToOne(
|
||||
transform::TransformRewriter &rewriter, LinalgOp target,
|
||||
transform::ApplyToEachResultList &results,
|
||||
transform::TransformState &state) {
|
||||
@@ -2333,10 +2333,10 @@ DiagnosedSilenceableFailure transform::TileReductionUsingScfOp::applyToOne(
|
||||
|
||||
if (failed(result))
|
||||
return emitDefaultSilenceableFailure(target);
|
||||
results.push_back(result->loops.front());
|
||||
results.push_back(result->initialOp);
|
||||
results.push_back(result->parallelTiledOp);
|
||||
results.push_back(result->mergeOp);
|
||||
results.push_back(result->loops.front());
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
@@ -2352,7 +2352,7 @@ void transform::TileReductionUsingForallOp::build(
|
||||
// This is future-proof re mixed static-dynamic and setting up the proper
|
||||
// operands segment sizes attributes for multiple variadic operands.
|
||||
// In the absence of this, horrible bugs ensue.
|
||||
// TODO: support mixed static-dynamic (see TileToForallOp).
|
||||
// TODO: support mixed static-dynamic (see TileUsingForallOp).
|
||||
MLIRContext *ctx = builder.getContext();
|
||||
auto opTy = transform::AnyOpType::get(ctx);
|
||||
auto staticNumThreadsAttr = builder.getDenseI64ArrayAttr(staticNumThreads);
|
||||
@@ -2384,22 +2384,22 @@ DiagnosedSilenceableFailure transform::TileReductionUsingForallOp::applyToOne(
|
||||
diag.attachNote(target.getLoc()) << "target operation";
|
||||
return diag;
|
||||
}
|
||||
results.push_back(result->loops);
|
||||
results.push_back(result->initialOp);
|
||||
results.push_back(result->parallelTiledOp);
|
||||
results.push_back(result->mergeOp);
|
||||
results.push_back(result->loops);
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileOp
|
||||
// TileUsingForOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange loopTypes, Value target,
|
||||
ArrayRef<int64_t> staticTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
void transform::TileUsingForOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange loopTypes,
|
||||
Value target, ArrayRef<int64_t> staticTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
return build(builder, result, loopTypes,
|
||||
/*target=*/target,
|
||||
/*mixedTileSizes=*/
|
||||
@@ -2407,20 +2407,19 @@ void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
interchange, scalableSizes);
|
||||
}
|
||||
|
||||
void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
Value target, ArrayRef<int64_t> staticTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
void transform::TileUsingForOp::build(
|
||||
OpBuilder &builder, OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticTileSizes, ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
build(builder, result, target,
|
||||
getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),
|
||||
interchange, scalableSizes);
|
||||
}
|
||||
|
||||
void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
Value target,
|
||||
ArrayRef<OpFoldResult> mixedTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
void transform::TileUsingForOp::build(
|
||||
OpBuilder &builder, OperationState &result, Value target,
|
||||
ArrayRef<OpFoldResult> mixedTileSizes, ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
// Loop types are automaticaly splat by the callee, setting up one is
|
||||
// enough.
|
||||
SmallVector<Type> loopTypes(1, builder.getType<transform::AnyOpType>());
|
||||
@@ -2428,11 +2427,11 @@ void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
scalableSizes);
|
||||
}
|
||||
|
||||
void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange loopTypes, Value target,
|
||||
ArrayRef<OpFoldResult> mixedTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
void transform::TileUsingForOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange loopTypes,
|
||||
Value target, ArrayRef<OpFoldResult> mixedTileSizes,
|
||||
ArrayRef<int64_t> interchange,
|
||||
std::optional<ArrayRef<bool>> scalableSizes) {
|
||||
SmallVector<int64_t> staticTileSizes;
|
||||
SmallVector<Value> dynamicTileSizes;
|
||||
dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes);
|
||||
@@ -2462,7 +2461,7 @@ void transform::TileOp::build(OpBuilder &builder, OperationState &result,
|
||||
/*scalable_sizes=*/expandedScalableSizes);
|
||||
}
|
||||
|
||||
LogicalResult transform::TileOp::verify() {
|
||||
LogicalResult transform::TileUsingForOp::verify() {
|
||||
if (getMixedSizes().size() != getScalableSizes().size())
|
||||
return emitOpError("expected same number of sizes (")
|
||||
<< getMixedSizes().size() << ") and scalable sizes ()"
|
||||
@@ -2471,9 +2470,9 @@ LogicalResult transform::TileOp::verify() {
|
||||
}
|
||||
|
||||
DiagnosedSilenceableFailure
|
||||
transform::TileOp::apply(transform::TransformRewriter &rewriter,
|
||||
TransformResults &transformResults,
|
||||
TransformState &state) {
|
||||
transform::TileUsingForOp::apply(transform::TransformRewriter &rewriter,
|
||||
TransformResults &transformResults,
|
||||
TransformState &state) {
|
||||
ArrayRef<int64_t> tileSizes = getStaticSizes();
|
||||
|
||||
SmallVector<Operation *> targets =
|
||||
@@ -2612,7 +2611,7 @@ transform::TileOp::apply(transform::TransformRewriter &rewriter,
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
SmallVector<OpFoldResult> transform::TileOp::getMixedSizes() {
|
||||
SmallVector<OpFoldResult> transform::TileUsingForOp::getMixedSizes() {
|
||||
ValueRange dynamic = getDynamicSizes();
|
||||
ArrayRef<int64_t> tileSizes = getStaticSizes();
|
||||
SmallVector<OpFoldResult> results;
|
||||
@@ -2656,8 +2655,8 @@ void printOptionalInterchange(OpAsmPrinter &p,
|
||||
}
|
||||
}
|
||||
|
||||
ParseResult transform::TileOp::parse(OpAsmParser &parser,
|
||||
OperationState &result) {
|
||||
ParseResult transform::TileUsingForOp::parse(OpAsmParser &parser,
|
||||
OperationState &result) {
|
||||
OpAsmParser::UnresolvedOperand target;
|
||||
SmallVector<OpAsmParser::UnresolvedOperand> dynamicSizes;
|
||||
DenseI64ArrayAttr staticSizes;
|
||||
@@ -2696,7 +2695,7 @@ ParseResult transform::TileOp::parse(OpAsmParser &parser,
|
||||
return success();
|
||||
}
|
||||
|
||||
void TileOp::print(OpAsmPrinter &p) {
|
||||
void TileUsingForOp::print(OpAsmPrinter &p) {
|
||||
p << ' ' << getTarget();
|
||||
printDynamicIndexList(p, getOperation(), getDynamicSizes(), getStaticSizes(),
|
||||
/*valueTypes=*/{}, getScalableSizesAttr(),
|
||||
@@ -2706,7 +2705,7 @@ void TileOp::print(OpAsmPrinter &p) {
|
||||
p.printFunctionalType(getOperands().getTypes(), getResults().getTypes());
|
||||
}
|
||||
|
||||
void transform::TileOp::getEffects(
|
||||
void transform::TileUsingForOp::getEffects(
|
||||
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
|
||||
consumesHandle(getTarget(), effects);
|
||||
onlyReadsHandle(getDynamicSizes(), effects);
|
||||
@@ -2716,14 +2715,14 @@ void transform::TileOp::getEffects(
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TileToForallOp
|
||||
// TileUsingForallOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticTileSizes,
|
||||
transform::TileSizesSpec,
|
||||
ArrayAttr mapping) {
|
||||
void transform::TileUsingForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticTileSizes,
|
||||
transform::TileSizesSpec,
|
||||
ArrayAttr mapping) {
|
||||
return build(builder, result,
|
||||
/*target=*/target,
|
||||
/*mixedTileSizes=*/
|
||||
@@ -2732,11 +2731,11 @@ void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
/*mapping=*/mapping);
|
||||
}
|
||||
|
||||
void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<OpFoldResult> mixedTileSizes,
|
||||
transform::TileSizesSpec,
|
||||
ArrayAttr mapping) {
|
||||
void transform::TileUsingForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<OpFoldResult> mixedTileSizes,
|
||||
transform::TileSizesSpec,
|
||||
ArrayAttr mapping) {
|
||||
SmallVector<int64_t> staticTileSizes;
|
||||
SmallVector<Value> dynamicTileSizes;
|
||||
dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes);
|
||||
@@ -2758,21 +2757,21 @@ void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
/*mapping=*/mapping);
|
||||
}
|
||||
|
||||
void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticNumThreads,
|
||||
transform::NumThreadsSpec,
|
||||
ArrayAttr mapping) {
|
||||
void transform::TileUsingForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<int64_t> staticNumThreads,
|
||||
transform::NumThreadsSpec,
|
||||
ArrayAttr mapping) {
|
||||
return build(builder, result, target,
|
||||
getAsOpFoldResult(builder.getI64ArrayAttr(staticNumThreads)),
|
||||
NumThreadsSpec(), mapping);
|
||||
}
|
||||
|
||||
void transform::TileToForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<OpFoldResult> mixedNumThreads,
|
||||
transform::NumThreadsSpec,
|
||||
ArrayAttr mapping) {
|
||||
void transform::TileUsingForallOp::build(OpBuilder &builder,
|
||||
OperationState &result, Value target,
|
||||
ArrayRef<OpFoldResult> mixedNumThreads,
|
||||
transform::NumThreadsSpec,
|
||||
ArrayAttr mapping) {
|
||||
SmallVector<int64_t> staticNumThreads;
|
||||
SmallVector<Value> dynamicNumThreads;
|
||||
dispatchIndexOpFoldResults(mixedNumThreads, dynamicNumThreads,
|
||||
@@ -2828,10 +2827,10 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
DiagnosedSilenceableFailure
|
||||
transform::TileToForallOp::apply(transform::TransformRewriter &rewriter,
|
||||
transform::TransformResults &transformResults,
|
||||
transform::TransformState &state) {
|
||||
DiagnosedSilenceableFailure transform::TileUsingForallOp::apply(
|
||||
transform::TransformRewriter &rewriter,
|
||||
transform::TransformResults &transformResults,
|
||||
transform::TransformState &state) {
|
||||
auto transformOp = cast<TransformOpInterface>(getOperation());
|
||||
|
||||
// Result payload ops.
|
||||
@@ -2874,7 +2873,7 @@ transform::TileToForallOp::apply(transform::TransformRewriter &rewriter,
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
void transform::TileToForallOp::getEffects(
|
||||
void transform::TileUsingForallOp::getEffects(
|
||||
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
|
||||
consumesHandle(getTarget(), effects);
|
||||
onlyReadsHandle(getTileSizes(), effects);
|
||||
@@ -2885,17 +2884,17 @@ void transform::TileToForallOp::getEffects(
|
||||
modifiesPayload(effects);
|
||||
}
|
||||
|
||||
SmallVector<OpFoldResult> TileToForallOp::getMixedNumThreads() {
|
||||
SmallVector<OpFoldResult> TileUsingForallOp::getMixedNumThreads() {
|
||||
Builder b(getContext());
|
||||
return getMixedValues(getStaticNumThreads(), getNumThreads(), b);
|
||||
}
|
||||
|
||||
SmallVector<OpFoldResult> TileToForallOp::getMixedTileSizes() {
|
||||
SmallVector<OpFoldResult> TileUsingForallOp::getMixedTileSizes() {
|
||||
Builder b(getContext());
|
||||
return getMixedValues(getStaticTileSizes(), getTileSizes(), b);
|
||||
}
|
||||
|
||||
LogicalResult TileToForallOp::verify() {
|
||||
LogicalResult TileUsingForallOp::verify() {
|
||||
int numThreadsSpec = static_cast<int>(!getMixedNumThreads().empty()) +
|
||||
static_cast<int>(getPackedNumThreads() != Value());
|
||||
if (numThreadsSpec > 1)
|
||||
|
||||
@@ -567,8 +567,8 @@ class SplitOp:
|
||||
)
|
||||
|
||||
|
||||
class TileOp:
|
||||
"""Specialization for TileOp class."""
|
||||
class TileUsingForOp:
|
||||
"""Specialization for TileUsingForOp class."""
|
||||
|
||||
@overload
|
||||
def __init__(
|
||||
@@ -616,7 +616,9 @@ class TileOp:
|
||||
if isinstance(loop_types_or_target, (Operation, Value, OpView)):
|
||||
loop_types = [transform.AnyOpType.get()] * num_loops
|
||||
target = loop_types_or_target
|
||||
assert target_or_none is None, "Cannot construct TileOp with two targets."
|
||||
assert (
|
||||
target_or_none is None
|
||||
), "Cannot construct TileUsingForOp with two targets."
|
||||
else:
|
||||
loop_types = (
|
||||
([loop_types_or_target] * num_loops)
|
||||
@@ -638,8 +640,8 @@ class TileOp:
|
||||
)
|
||||
|
||||
|
||||
class TileToForallOp:
|
||||
"""Specialization for TileToForallOp class."""
|
||||
class TileUsingForallOp:
|
||||
"""Specialization for TileUsingForallOp class."""
|
||||
|
||||
@overload
|
||||
def __init__(
|
||||
|
||||
@@ -175,7 +175,7 @@ func.func @simple_matmul(%t6 : index, %t7 : index, %t8 : index, %arg0: memref<25
|
||||
// CHECK-DAG: [[UBI0:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s1, s0)>
|
||||
// CHECK-DAG: [[UBO0:#map[0-9]*]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
|
||||
|
||||
// CHECK: func @tile_with_symbolic_loop_upper_bounds([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
// CHECK: func @tile_using_symbolic_loop_upper_bounds([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
// CHECK: affine.for [[ARG2:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG0]]{{.*}}
|
||||
// CHECK-NEXT: affine.for [[ARG3:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG1]]{{.*}}
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = [[LBI0]]{{.*}}[[ARG2]]{{.*}}[[ARG0]]{{.*}} to min [[UBI0]]{{.*}}[[ARG2]]{{.*}}[[ARG0]]{{.*}}
|
||||
@@ -188,7 +188,7 @@ func.func @simple_matmul(%t6 : index, %t7 : index, %t8 : index, %arg0: memref<25
|
||||
// CHECK-NEXT: affine.load %{{.*}}%[[I0]], %[[I1]]
|
||||
// CHECK-NEXT: arith.addf
|
||||
// CHECK-NEXT: affine.store %{{.*}}%[[I0]], %[[I1]]
|
||||
func.func @tile_with_symbolic_loop_upper_bounds(%t9 : index, %t10: index, %arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
|
||||
func.func @tile_using_symbolic_loop_upper_bounds(%t9 : index, %t10: index, %arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
|
||||
%cst = arith.constant 0.000000e+00 : f32
|
||||
%c0 = arith.constant 0 : index
|
||||
%0 = memref.dim %arg0, %c0 : memref<?x?xf32>
|
||||
@@ -214,8 +214,8 @@ func.func @tile_with_symbolic_loop_upper_bounds(%t9 : index, %t10: index, %arg0:
|
||||
// CHECK-DAG: [[UBI0:#map[0-9]*]] = affine_map<(d0)[s0, s1, s2] -> (d0 * s2 + s2, s0 + s1)>
|
||||
// CHECK-DAG: [[UBO0:#map[0-9]*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1) ceildiv s2)>
|
||||
|
||||
// CHECK: func @tile_with_loop_upper_bounds_in_two_symbols([[ARG0:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
func.func @tile_with_loop_upper_bounds_in_two_symbols(%t11 : index, %arg0: memref<?xf32>, %limit: index) {
|
||||
// CHECK: func @tile_using_loop_upper_bounds_in_two_symbols([[ARG0:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
func.func @tile_using_loop_upper_bounds_in_two_symbols(%t11 : index, %arg0: memref<?xf32>, %limit: index) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%dim0 = memref.dim %arg0, %c0 : memref<?xf32>
|
||||
affine.for %i0 = 0 to affine_map<()[s0, s1] -> (s0 + s1)> ()[%dim0, %limit] {
|
||||
@@ -235,12 +235,12 @@ func.func @tile_with_loop_upper_bounds_in_two_symbols(%t11 : index, %arg0: memre
|
||||
// CHECK-DAG: [[UBO1:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 4) ceildiv s1)>
|
||||
// CHECK-DAG: [[UBO0:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 2) ceildiv s1)>
|
||||
|
||||
// CHECK: func @tile_with_upper_bounds_in_dimensions_and_symbols([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, [[ARG3:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
// CHECK: func @tile_using_upper_bounds_in_dimensions_and_symbols([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, [[ARG3:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO0]]({{.*}}){{.*}}[[ARG0]]
|
||||
// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO1]]({{.*}}){{.*}}[[ARG1]]
|
||||
// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG4]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}}, [[ARG4]]){{.*}}[[ARG0]]{{.*}}
|
||||
// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG5]]){{.*}}[[ARG1]]{{.*}} to min [[UBI1]]({{.*}}, [[ARG5]]){{.*}}[[ARG1]]{{.*}}
|
||||
func.func @tile_with_upper_bounds_in_dimensions_and_symbols(%t12 : index, %t13 :index, %M: index, %N: index, %K: index) {
|
||||
func.func @tile_using_upper_bounds_in_dimensions_and_symbols(%t12 : index, %t13 :index, %M: index, %N: index, %K: index) {
|
||||
affine.for %i = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 2)>(%M)[%K] {
|
||||
affine.for %j = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 4)>(%N)[%K] {
|
||||
"test.foo" () : () -> ()
|
||||
@@ -257,13 +257,13 @@ func.func @tile_with_upper_bounds_in_dimensions_and_symbols(%t12 : index, %t13 :
|
||||
// CHECK-DAG: [[UBO1:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 4) ceildiv s1)>
|
||||
// CHECK-DAG: [[UBO0:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 2) ceildiv s1)>
|
||||
|
||||
// CHECK: func @tile_with_upper_bounds_in_dimensions_and_symbols_non_unit_steps
|
||||
// CHECK: func @tile_using_upper_bounds_in_dimensions_and_symbols_non_unit_steps
|
||||
// CHECK-SAME: ([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, [[ARG3:%arg[0-9]+]]: index{{.*}}){{.*}}
|
||||
// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO0]]({{.*}}){{.*}}[[ARG0]]{{.*}} step 2{{.*}}
|
||||
// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO1]]({{.*}}){{.*}}[[ARG1]]{{.*}} step 4{{.*}}
|
||||
// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG4]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}}, [[ARG4]]){{.*}}[[ARG0]]{{.*}} step 2{{.*}}
|
||||
// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG5]]){{.*}}[[ARG1]]{{.*}} to min [[UBI1]]({{.*}}, [[ARG5]]){{.*}}[[ARG1]]{{.*}} step 4{{.*}}
|
||||
func.func @tile_with_upper_bounds_in_dimensions_and_symbols_non_unit_steps(%t12 : index, %t13 :index, %M: index, %N : index, %K: index) {
|
||||
func.func @tile_using_upper_bounds_in_dimensions_and_symbols_non_unit_steps(%t12 : index, %t13 :index, %M: index, %N : index, %K: index) {
|
||||
affine.for %i = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 2)>(%M)[%K] step 2 {
|
||||
affine.for %j = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 4)>(%N)[%K] step 4 {
|
||||
"test.foo" () : () -> ()
|
||||
|
||||
@@ -110,7 +110,7 @@ func.func @simple_matmul(%arg0: memref<256x256xvector<64xf32>>, %arg1: memref<25
|
||||
|
||||
// CHECK-DAG: [[$UBMAP:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + 32, s0)>
|
||||
|
||||
func.func @tile_with_symbolic_loop_upper_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
|
||||
func.func @tile_using_symbolic_loop_upper_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
|
||||
%cst = arith.constant 0.000000e+00 : f32
|
||||
%c0 = arith.constant 0 : index
|
||||
%0 = memref.dim %arg0, %c0 : memref<?x?xf32>
|
||||
@@ -156,7 +156,7 @@ func.func @tile_with_symbolic_loop_upper_bounds(%arg0: memref<?x?xf32>, %arg1: m
|
||||
// CHECK-DAG: [[MAP1:#map[0-9]*]] = affine_map<()[s0, s1] -> (s0 + s1)>
|
||||
// CHECK-DAG: [[$UBMAP:#map[0-9]*]] = affine_map<(d0)[s0, s1] -> (d0 + 32, s0 + s1)>
|
||||
|
||||
func.func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: index) {
|
||||
func.func @tile_using_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: index) {
|
||||
%c0 = arith.constant 0 : index
|
||||
%dim0 = memref.dim %arg0, %c0 : memref<?xf32>
|
||||
affine.for %i0 = 0 to affine_map<()[s0, s1] -> (s0 + s1)> ()[%dim0, %limit] {
|
||||
|
||||
@@ -135,7 +135,7 @@ func.func @map_nested_forall_to_threads_not_buffer(%x: tensor<32x32xf32>, %y: te
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg0: !transform.any_op):
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%forall, %tiled = transform.structured.tile_to_forall_op %matmul num_threads [2, 3, 1] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
|
||||
%forall, %tiled = transform.structured.tile_using_forall %matmul num_threads [2, 3, 1] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
// expected-error @below {{only bufferized scf.forall can be mapped}}
|
||||
|
||||
@@ -15,7 +15,7 @@ func.func @matmul_tensors(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%module_op: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2 = get_parent_op %1 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.vectorize_children_and_apply_patterns %2 : (!transform.any_op) -> !transform.any_op
|
||||
%b = transform.bufferization.one_shot_bufferize layout{IdentityLayoutMap}
|
||||
|
||||
@@ -27,7 +27,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:4 = transform.structured.tile %0 [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:4 = transform.structured.tile_using_for %0 [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -56,7 +56,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -89,5 +89,5 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>)
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ func.func @KCRSsr_to_KCRS(%arg0: tensor<1x1x4x8x8x32xf32>, %arg1: tensor<1x1x128
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:4 = transform.structured.tile %0 [1, 1, 32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:4 = transform.structured.tile_using_for %0 [1, 1, 32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
|
||||
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 8)>
|
||||
@@ -70,7 +70,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [8, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [8, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -104,5 +104,5 @@ func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>)
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -46,13 +46,13 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%fill_op = transform.structured.match ops{["linalg.fill"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%forall_op, %tiled_matmul_op = transform.structured.tile_to_forall_op %matmul_op num_threads [] tile_sizes [128, 128](mapping = [#gpu.block<y>, #gpu.block<x>])
|
||||
%tiled_matmul_op, %forall_op = transform.structured.tile_using_forall %matmul_op num_threads [] tile_sizes [128, 128](mapping = [#gpu.block<y>, #gpu.block<x>])
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%fused_op, %new_containing_op = transform.structured.fuse_into_containing_op %fill_op into %forall_op
|
||||
: (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Tile linalg.matmul a second time.
|
||||
%tiled_linalg_op, %loops = transform.structured.tile %tiled_matmul_op[0, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_linalg_op, %loops = transform.structured.tile_using_for %tiled_matmul_op[0, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Pad linalg.matmul.
|
||||
%padded, %pad, %copy_back = transform.structured.pad %tiled_linalg_op
|
||||
@@ -163,13 +163,13 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%fill_op = transform.structured.match ops{["linalg.fill"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%forall_op, %tiled_matmul_op = transform.structured.tile_to_forall_op %matmul_op num_threads [] tile_sizes [128, 128](mapping = [#gpu.block<y>, #gpu.block<x>])
|
||||
%tiled_matmul_op, %forall_op = transform.structured.tile_using_forall %matmul_op num_threads [] tile_sizes [128, 128](mapping = [#gpu.block<y>, #gpu.block<x>])
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%fused_op, %new_containing_op = transform.structured.fuse_into_containing_op %fill_op into %forall_op
|
||||
: (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Tile linalg.matmul a second time.
|
||||
%tiled_linalg_op, %loops = transform.structured.tile %tiled_matmul_op[0, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_linalg_op, %loops = transform.structured.tile_using_for %tiled_matmul_op[0, 0, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Pad linalg.matmul.
|
||||
%padded, %pad, %copy_back = transform.structured.pad %tiled_linalg_op
|
||||
|
||||
@@ -8,13 +8,13 @@ transform.sequence failures(propagate) {
|
||||
%1:3 = transform.structured.multitile_sizes %0 { dimension = 0, target_size = 3} : (!transform.any_op) -> !transform.any_op
|
||||
%t:3 = transform.structured.multitile_sizes %0 { dimension = 1, target_size = 10} : (!transform.any_op) -> !transform.any_op
|
||||
%2:2 = transform.structured.split %0 after %1#2 { dimension = 0 } : !transform.any_op, !transform.any_op
|
||||
%3:2 = transform.structured.tile %2#0 [%1#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%4:2 = transform.structured.tile %2#1 [%1#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%3:2 = transform.structured.tile_using_for %2#0 [%1#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%4:2 = transform.structured.tile_using_for %2#1 [%1#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%5 = merge_handles %3#0, %4#0 : !transform.any_op
|
||||
%tt:3 = replicate num(%5) %t#0, %t#1, %t#2 : !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op
|
||||
%6:2 = transform.structured.split %5 after %tt#2 { dimension = 1 } : !transform.any_op, !transform.any_op
|
||||
transform.structured.tile %6#0 [0, %tt#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile %6#1 [0, %tt#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %6#0 [0, %tt#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %6#1 [0, %tt#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32
|
||||
@@ -108,13 +108,13 @@ transform.sequence failures(propagate) {
|
||||
%1:3 = transform.structured.multitile_sizes %0 { dimension = 0, target_size = 3} : (!transform.any_op) -> !transform.param<i64>
|
||||
%t:3 = transform.structured.multitile_sizes %0 { dimension = 1, target_size = 10} : (!transform.any_op) -> !transform.param<i64>
|
||||
%2:2 = transform.structured.split %0 after %1#2 { dimension = 0 } : !transform.any_op, !transform.param<i64>
|
||||
%3:2 = transform.structured.tile %2#0 [%1#0] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
%4:2 = transform.structured.tile %2#1 [%1#1] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
%3:2 = transform.structured.tile_using_for %2#0 [%1#0] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
%4:2 = transform.structured.tile_using_for %2#1 [%1#1] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
%5 = merge_handles %3#0, %4#0 : !transform.any_op
|
||||
%tt:3 = replicate num(%5) %t#0, %t#1, %t#2 : !transform.any_op, !transform.param<i64>, !transform.param<i64>, !transform.param<i64>
|
||||
%6:2 = transform.structured.split %5 after %tt#2 { dimension = 1 } : !transform.any_op, !transform.param<i64>
|
||||
transform.structured.tile %6#0 [0, %tt#0] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile %6#1 [0, %tt#1] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %6#0 [0, %tt#0] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %6#1 [0, %tt#1] : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32
|
||||
|
||||
@@ -183,7 +183,7 @@ func.func @gemm_shared(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2 = transform.structured.promote %1 { operands_to_promote = [0, 1], mapping = [#gpu.memory_space<workgroup>] } : (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
@@ -225,7 +225,7 @@ func.func @gemm_private(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2 = transform.structured.promote %1 { operands_to_promote = [0, 1], mapping = [#gpu.memory_space<private>] } : (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,6 @@ func.func @gemm(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [16, 16, 16] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2 = transform.structured.promote %1 { operands_to_promote = [0, 2], force_full_tiles = [false, false], use_full_tiles_by_default } : (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop:2 = transform.structured.tile %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loop:2 = transform.structured.tile_using_for %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK: func @conv
|
||||
|
||||
@@ -14,7 +14,7 @@ func.func @indexed_vector(%arg0: memref<50xindex>) {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [10] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [10] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
|
||||
@@ -44,7 +44,7 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop:2 = transform.structured.tile %0 [10, 25] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loop:2 = transform.structured.tile_using_for %0 [10, 25] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
|
||||
|
||||
@@ -39,12 +39,12 @@ func.func @softmax(%arg0: tensor<16x64x256xf32>) -> tensor<16x64x256xf32> {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.softmax"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop:2 = transform.structured.tile %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loop:2 = transform.structured.tile_using_for %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// Test the softmax tiling interface with the tile_to_forall_op transform and
|
||||
// Test the softmax tiling interface with the tile_using_forall transform and
|
||||
// check that it composes properly with the fuse transform.
|
||||
// This should sink the linalg.generic inside the scf.forall and run that
|
||||
// generic on 2x4x256 tensors (2==16/8, 4==64/16).
|
||||
@@ -105,7 +105,7 @@ transform.sequence failures(propagate) {
|
||||
%0 = transform.structured.match ops{["linalg.softmax"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
|
||||
// Tile the root.
|
||||
%forall_op, %tiled_op = transform.structured.tile_to_forall_op %0 num_threads [8, 16]
|
||||
%tiled_op, %forall_op = transform.structured.tile_using_forall %0 num_threads [8, 16]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Fuse all producers.
|
||||
@@ -145,5 +145,5 @@ func.func @softmax_memref(%arg0: memref<16x64x256xf32>, %arg1: memref<16x64x256x
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.softmax"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop:2 = transform.structured.tile %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loop:2 = transform.structured.tile_using_for %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ func.func @matmul_tensors(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -61,7 +61,7 @@ func.func @generic_op_tensors(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @generic_op_tensors
|
||||
@@ -132,5 +132,5 @@ func.func @fold_extract_slice(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
%1:2 = transform.structured.tile_using_forall %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
@@ -57,7 +57,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
%1:2 = transform.structured.tile_using_forall %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
@@ -80,7 +80,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
%1:2 = transform.structured.tile_using_forall %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
@@ -124,7 +124,7 @@ transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%sz = transform.structured.match ops{["test.dummy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 tile_sizes *(%sz : !transform.any_op)
|
||||
%1:2 = transform.structured.tile_using_forall %0 tile_sizes *(%sz : !transform.any_op)
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -163,7 +163,7 @@ func.func @matmul_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: t
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 21]
|
||||
%1:2 = transform.structured.tile_using_forall %0 num_threads [10, 21]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -205,7 +205,7 @@ func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 tile_sizes [10, 20]
|
||||
%1:2 = transform.structured.tile_using_forall %0 tile_sizes [10, 20]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 tile_sizes [10, 21]
|
||||
%1:2 = transform.structured.tile_using_forall %0 tile_sizes [10, 21]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -265,7 +265,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 num_threads [2] ( mapping = [#gpu.thread<x>])
|
||||
%1:2 = transform.structured.tile_using_forall %0 num_threads [2] ( mapping = [#gpu.thread<x>])
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
@@ -317,7 +317,7 @@ transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%sz = transform.structured.match ops{["test.dummy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1:2 = transform.structured.tile_to_forall_op %0 tile_sizes [%sz : !transform.any_op, 20]
|
||||
%1:2 = transform.structured.tile_using_forall %0 tile_sizes [%sz : !transform.any_op, 20]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -371,7 +371,7 @@ transform.sequence failures(propagate) {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%forall, %tiled_generic = transform.structured.tile_to_forall_op %0 num_threads [7]
|
||||
%forall, %tiled_generic = transform.structured.tile_using_forall %0 num_threads [7]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -425,7 +425,7 @@ transform.sequence failures(propagate) {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%IN_MAT2: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %IN_MAT2 : (!transform.any_op) -> !transform.any_op
|
||||
%forall, %tiled_generic = transform.structured.tile_to_forall_op %0 num_threads [4]
|
||||
%forall, %tiled_generic = transform.structured.tile_using_forall %0 num_threads [4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
|
||||
@@ -22,9 +22,9 @@ transform.sequence failures(propagate) {
|
||||
^bb0(%module: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %module
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%tiled_linalg_op, %loops:3 = transform.structured.tile %0[64, 128, 256]
|
||||
%tiled_linalg_op, %loops:3 = transform.structured.tile_using_for %0[64, 128, 256]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%tiled_linalg_op_0, %loops_1:3 = transform.structured.tile %tiled_linalg_op[8, 8, 8]
|
||||
%tiled_linalg_op_0, %loops_1:3 = transform.structured.tile_using_for %tiled_linalg_op[8, 8, 8]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
transform.structured.vectorize %tiled_linalg_op_0 vector_sizes [8, 8, 8]
|
||||
: !transform.any_op
|
||||
|
||||
@@ -651,7 +651,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%1 = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
%forall_op, %tiled_op = transform.structured.tile_to_forall_op %1
|
||||
%tiled_op, %forall_op = transform.structured.tile_using_forall %1
|
||||
num_threads [] tile_sizes [50, 16]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
// Note that we pass in %tiled_op, which isn't a container op.
|
||||
|
||||
@@ -91,7 +91,7 @@ transform.sequence failures(propagate) {
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [5, 0, 7], tile_interchange = [0, 2, 1]}
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops_2 = transform.structured.tile %1 [0, 4]
|
||||
%2, %loops_2 = transform.structured.tile_using_for %1 [0, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ transform.sequence failures(propagate) {
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -47,7 +47,7 @@ transform.sequence failures(propagate) {
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -85,7 +85,7 @@ transform.sequence failures(propagate) {
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -123,7 +123,7 @@ transform.sequence failures(propagate) {
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -159,7 +159,7 @@ transform.sequence failures(propagate) {
|
||||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
%matmul_l1, %loops_l1:2 = transform.structured.tile %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1:2 = transform.structured.tile_using_for %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
|
||||
@@ -15,7 +15,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -51,7 +51,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -94,7 +94,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -139,7 +139,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
@@ -183,7 +183,7 @@ transform.sequence failures(propagate) {
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
|
||||
%matmul_l1, %loops_l1:2 = transform.structured.tile %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%matmul_l1, %loops_l1:2 = transform.structured.tile_using_for %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
|
||||
%matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 {
|
||||
padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
|
||||
|
||||
@@ -21,6 +21,6 @@ func.func @scalarize(%arg0: tensor<24x12xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops = transform.structured.tile %0 [10, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loops = transform.structured.tile_using_for %0 [10, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%2 = transform.structured.scalarize %1 : (!transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [4, 4, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [4, 4, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @tile_linalg_matmul(
|
||||
@@ -40,7 +40,7 @@ transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1 = transform.structured.match ops{["func.call"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%2, %loops:3 = transform.structured.tile %0 [%1, %1, 4] : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops:3 = transform.structured.tile_using_for %0 [%1, %1, 4] : (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func private @get_dynamic_tile_size() -> index
|
||||
@@ -82,7 +82,7 @@ transform.sequence failures(propagate) {
|
||||
// expected-note @below {{for this parameter}}
|
||||
%1 = transform.test_produce_param (0 : i64) : !transform.param<i64>
|
||||
// expected-error @below {{expected as many parameter values (0) as target ops (2)}}
|
||||
transform.structured.tile %0 [%1, %1, %1]
|
||||
transform.structured.tile_using_for %0 [%1, %1, %1]
|
||||
: (!transform.any_op, !transform.param<i64>, !transform.param<i64>, !transform.param<i64>)
|
||||
-> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
@@ -107,7 +107,7 @@ transform.sequence failures(propagate) {
|
||||
// expected-note @below {{for this handle}}
|
||||
%1 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
// expected-error @below {{expected as many dynamic size-producing operations (0) as target ops (2)}}
|
||||
transform.structured.tile %0 [%1, %1, 1]
|
||||
transform.structured.tile_using_for %0 [%1, %1, 1]
|
||||
: (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
-> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
@@ -146,7 +146,7 @@ func.func @tile_tensor_pad(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.tile_to_forall_op %0 tile_sizes[1, 1]
|
||||
transform.structured.tile_using_forall %0 tile_sizes[1, 1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -184,7 +184,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [[4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [[4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -218,7 +218,7 @@ func.func @scalable_and_fixed_length_tile(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [4, 4, [4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [4, 4, [4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -235,5 +235,5 @@ transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
// expected-error @below {{too many tiles provided, expected at most 3 found 4}}
|
||||
%1, %loops = transform.structured.tile %0 [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loops = transform.structured.tile_using_for %0 [1, 0, 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg0: !transform.any_op):
|
||||
// CHECK %{{.*}}, %{{.*}}:2 = transform.structured.tile
|
||||
%0, %1:2 = transform.structured.tile %arg0 [2, 0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%0, %1:2 = transform.structured.tile_using_for %arg0 [2, 0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
|
||||
@@ -12,7 +12,7 @@ func.func @dot(%x: memref<?xf32, strided<[1], offset: ?>>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.dot"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [8000] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [8000] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @dot
|
||||
@@ -36,7 +36,7 @@ func.func @matvec(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [5, 6] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [5, 6] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matvec
|
||||
@@ -63,10 +63,10 @@ func.func @matmul(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%4, %loops_4:3 = transform.structured.tile %3 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2000, 3000, 4000] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops_2:3 = transform.structured.tile_using_for %1 [200, 300, 400] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%3, %loops_3:3 = transform.structured.tile_using_for %2 [20, 30, 40] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%4, %loops_4:3 = transform.structured.tile_using_for %3 [2, 3, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matmul
|
||||
@@ -162,7 +162,7 @@ func.func @matvec_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [5, 6] {interchange = [1, 0]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [5, 6] {interchange = [1, 0]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matvec_perm
|
||||
@@ -189,9 +189,9 @@ func.func @matmul_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] {interchange = [1, 2, 0]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] {interchange = [1, 0, 2]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2000, 3000, 4000] {interchange = [1, 2, 0]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2, %loops_2:3 = transform.structured.tile_using_for %1 [200, 300, 400] {interchange = [1, 0, 2]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%3, %loops_3:3 = transform.structured.tile_using_for %2 [20, 30, 40] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @matmul_perm
|
||||
|
||||
@@ -47,7 +47,7 @@ module {
|
||||
%producers = transform.structured.match attributes{"__producer__"} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
|
||||
// Tile the root.
|
||||
%forall_op, %tiled_op = transform.structured.tile_to_forall_op %root num_threads [10, 20]
|
||||
%tiled_op, %forall_op = transform.structured.tile_using_forall %root num_threads [10, 20]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Fuse all producers.
|
||||
@@ -58,7 +58,7 @@ module {
|
||||
|
||||
// -----
|
||||
|
||||
// Inverse the order of the payload ops passed to the tile_to_forall_op
|
||||
// Inverse the order of the payload ops passed to the tile_using_forall
|
||||
// op. Fusion should still work.
|
||||
|
||||
module {
|
||||
@@ -107,7 +107,7 @@ module {
|
||||
%reversed_producers = transform.test_reverse_payload_ops %producers : (!transform.any_op) -> !transform.any_op
|
||||
|
||||
// Tile the root.
|
||||
%forall_op, %tiled_op = transform.structured.tile_to_forall_op %root num_threads [10, 20]
|
||||
%tiled_op, %forall_op = transform.structured.tile_using_forall %root num_threads [10, 20]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Fuse all producers.
|
||||
|
||||
@@ -17,7 +17,7 @@ func.func @reduction_tile(%arg0: tensor<?x?xf32>, %out: tensor<?xf32>) -> tensor
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_scf %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_for %0
|
||||
by tile_sizes = [0, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func.func @reduction_tile_transpose(%arg0: tensor<?x?xf32>, %out: tensor<?xf32>)
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_scf %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_for %0
|
||||
by tile_sizes = [5, 0] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ func.func @reduction_tile_parallel(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_forall %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_forall %0
|
||||
by num_threads = [0, 5], tile_sizes = [] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@ func.func @matmul_tile_parallel(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_forall %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_forall %0
|
||||
by num_threads = [0, 0, 5], tile_sizes = [] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -220,7 +220,7 @@ func.func @reduction_tile_parallel_cyclic_dist(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_forall %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_forall %0
|
||||
by num_threads = [0, 5], tile_sizes = [0, 3], mapping = [#gpu.thread<x>] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -285,7 +285,7 @@ func.func @reduction_tile_parallel_cyclic_dist(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_forall %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_forall %0
|
||||
by num_threads = [0, 5], tile_sizes = [0, 3], mapping = [#gpu.thread<x>] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
|
||||
// CHECK: expecting fill
|
||||
@@ -323,7 +323,7 @@ transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
// expected-error @below {{could not tile reduction}}
|
||||
%loop, %1, %2, %3 = transform.structured.tile_reduction_using_forall %0
|
||||
%1, %2, %3, %loop = transform.structured.tile_reduction_using_forall %0
|
||||
by num_threads = [5], tile_sizes = [3], mapping = [#gpu.thread<x>] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
|
||||
}
|
||||
@@ -349,8 +349,8 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg0: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
// expected-error @below {{transform.structured.tile_reduction_using_scf failed to apply}}
|
||||
%for_op, %fill_op, %split_linalg_op, %combining_linalg_op = transform.structured.tile_reduction_using_scf %0 by tile_sizes = [0, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
// expected-error @below {{transform.structured.tile_reduction_using_for failed to apply}}
|
||||
%fill_op, %split_linalg_op, %combining_linalg_op, %for_op = transform.structured.tile_reduction_using_for %0 by tile_sizes = [0, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,7 +372,7 @@ module {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg0: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%for_op, %fill_op, %split_linalg_op, %combining_linalg_op = transform.structured.tile_reduction_using_scf %0 by tile_sizes = [0, 2, 64] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%fill_op, %split_linalg_op, %combining_linalg_op, %for_op = transform.structured.tile_reduction_using_for %0 by tile_sizes = [0, 2, 64] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,8 +28,8 @@ transform.sequence failures(propagate) {
|
||||
|
||||
%mm1, %mm2 = transform.split_handle %matched : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%loop1, %tiled_mm1 = transform.structured.tile_to_forall_op %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%loop2, %tiled_mm2 = transform.structured.tile_to_forall_op %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm1, %loop1 = transform.structured.tile_using_forall %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm2, %loop2 = transform.structured.tile_using_forall %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%fused_loop = transform.loop.fuse_sibling %loop1 into %loop2 : (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
}
|
||||
@@ -54,8 +54,8 @@ transform.sequence failures(propagate) {
|
||||
|
||||
%mm1, %mm2 = transform.split_handle %matched : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%loop1, %tiled_mm1 = transform.structured.tile_to_forall_op %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%loop2, %tiled_mm2 = transform.structured.tile_to_forall_op %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm1, %loop1 = transform.structured.tile_using_forall %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm2, %loop2 = transform.structured.tile_using_forall %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%fused_loop = transform.loop.fuse_sibling %loop1 into %loop2 : (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
}
|
||||
@@ -80,8 +80,8 @@ transform.sequence failures(propagate) {
|
||||
|
||||
%mm1, %mm2 = transform.split_handle %matched : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%loop1, %tiled_mm1 = transform.structured.tile_to_forall_op %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%loop2, %tiled_mm2 = transform.structured.tile_to_forall_op %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm1, %loop1 = transform.structured.tile_using_forall %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm2, %loop2 = transform.structured.tile_using_forall %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%fused_loop = transform.loop.fuse_sibling %loop2 into %loop1 : (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
}
|
||||
@@ -106,8 +106,8 @@ transform.sequence failures(propagate) {
|
||||
|
||||
%mm1, %mm2 = transform.split_handle %matched : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%loop1, %tiled_mm1 = transform.structured.tile_to_forall_op %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%loop2, %tiled_mm2 = transform.structured.tile_to_forall_op %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm1, %loop1 = transform.structured.tile_using_forall %mm1 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%tiled_mm2, %loop2 = transform.structured.tile_using_forall %mm2 tile_sizes [32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%fused_loop = transform.loop.fuse_sibling %loop2 into %loop1 : (!transform.any_op, !transform.any_op) -> !transform.any_op
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.tile_to_forall_op %0 tile_sizes[1, 1]
|
||||
transform.structured.tile_using_forall %0 tile_sizes[1, 1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
%if = transform.structured.match ops{["scf.if"]} in %arg1
|
||||
|
||||
@@ -34,7 +34,7 @@ func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -71,7 +71,7 @@ func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -105,7 +105,7 @@ func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -137,7 +137,7 @@ func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -175,7 +175,7 @@ func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>,
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -211,7 +211,7 @@ func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>)
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -239,7 +239,7 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>)
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -274,7 +274,7 @@ func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -323,7 +323,7 @@ func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tens
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -377,7 +377,7 @@ func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -424,7 +424,7 @@ func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -470,7 +470,7 @@ func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -506,7 +506,7 @@ func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -548,7 +548,7 @@ func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tenso
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -583,7 +583,7 @@ func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:4 = transform.structured.tile %0 [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:4 = transform.structured.tile_using_for %0 [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -609,7 +609,7 @@ func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?x
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// -----
|
||||
@@ -643,5 +643,5 @@ func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:4 = transform.structured.tile %0 [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:4 = transform.structured.tile_using_for %0 [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
@@ -99,19 +99,19 @@ transform.sequence failures(propagate) {
|
||||
}
|
||||
|
||||
// CHECK: transform.sequence
|
||||
// CHECK: transform.structured.tile %0[4, 4, [4]]
|
||||
// CHECK: transform.structured.tile_using_for %0[4, 4, [4]]
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.tile %0 [4, 4, [4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %0 [4, 4, [4]] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK: transform.sequence
|
||||
// CHECK: transform.structured.tile %0{{\[}}[2], 4, 8]
|
||||
// CHECK: transform.structured.tile_using_for %0{{\[}}[2], 4, 8]
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.tile %0 [[2], 4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %0 [[2], 4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
// CHECK: transform.sequence
|
||||
|
||||
@@ -77,7 +77,7 @@ transform.with_pdl_patterns {
|
||||
transform.sequence %arg0 : !transform.any_op failures(propagate) {
|
||||
^bb1(%arg1: !transform.any_op):
|
||||
%0 = pdl_match @pdl_target_attrA in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.tile %0 [4, 4, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
transform.structured.tile_using_for %0 [4, 4, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1 = pdl_match @pdl_target_attrC in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%2 = get_parent_op %1 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.vectorize_children_and_apply_patterns %2 : (!transform.any_op) -> !transform.any_op
|
||||
|
||||
@@ -16,7 +16,7 @@ func.func @matmul_tensors(
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%module_op: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [8, 4, 2]
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [8, 4, 2]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%2 = get_parent_op %1 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
|
||||
transform.structured.vectorize_children_and_apply_patterns %2 : (!transform.any_op) -> !transform.any_op
|
||||
|
||||
@@ -19,7 +19,7 @@ transform.sequence failures(propagate) {
|
||||
%arg2: !transform.op<"linalg.elemwise_binary">):
|
||||
// The actual tiling transformation takes tile sizes as attributes.
|
||||
// expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}}
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
|
||||
: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// This is trying to use an invalidated handle leading to undefined behavior.
|
||||
@@ -64,7 +64,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes.
|
||||
// expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}}
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32]
|
||||
%loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
|
||||
: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// Consuming an operand invalidates the consumed handle and any other handle that is
|
||||
|
||||
@@ -57,7 +57,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It produces a
|
||||
// handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%tiled, %loop = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -76,7 +76,7 @@ transform.sequence failures(propagate) {
|
||||
// "max" operation. This illustrates the precise targeting with the transform
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_second, %tiled_second = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
%tiled_second, %loop_second = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2, %loop_second_2 =
|
||||
transform.structured.fuse_into_containing_op %matmul_fused into %loop_second
|
||||
@@ -85,7 +85,7 @@ transform.sequence failures(propagate) {
|
||||
// Since outlining is currently only implemented for region-holding operations
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%loop_third, %_0 = transform.structured.tile_to_forall_op %tiled_second tile_sizes [1]
|
||||
%_0, %loop_third = transform.structured.tile_using_forall %tiled_second tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
// expected-note @below {{handle to invalidated ops}}
|
||||
%f, %outline_target = transform.structured.fuse_into_containing_op %matmul_fused_2 into %loop_third
|
||||
|
||||
@@ -72,7 +72,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It produces a
|
||||
// handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%tiled, %loop = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -91,7 +91,7 @@ transform.sequence failures(propagate) {
|
||||
// "max" operation. This illustrates the precise targeting with the transform
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_second, %tiled_second = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
%tiled_second, %loop_second = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2, %loop_second_2 =
|
||||
transform.structured.fuse_into_containing_op %matmul_fused into %loop_second
|
||||
@@ -100,7 +100,7 @@ transform.sequence failures(propagate) {
|
||||
// Since outlining is currently only implemented for region-holding operations
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%loop_third, %_0 = transform.structured.tile_to_forall_op %tiled_second tile_sizes [1]
|
||||
%_0, %loop_third = transform.structured.tile_using_forall %tiled_second tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%_1, %outline_target = transform.structured.fuse_into_containing_op %matmul_fused_2 into %loop_third
|
||||
: (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
@@ -68,7 +68,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It produces a
|
||||
// handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%tiled, %loop = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -87,7 +87,7 @@ transform.sequence failures(propagate) {
|
||||
// "max" operation. This illustrates the precise targeting with the transform
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_second, %tiled_second = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
%tiled_second, %loop_second = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2, %loop_second_2 =
|
||||
transform.structured.fuse_into_containing_op %matmul_fused into %loop_second
|
||||
@@ -96,7 +96,7 @@ transform.sequence failures(propagate) {
|
||||
// Since outlining is currently only implemented for region-holding operations
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%loop_third, %_0 = transform.structured.tile_to_forall_op %tiled_second tile_sizes [1]
|
||||
%_0, %loop_third = transform.structured.tile_using_forall %tiled_second tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%_1, %outline_target = transform.structured.fuse_into_containing_op %matmul_fused_2 into %loop_third
|
||||
: (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
@@ -68,7 +68,7 @@ transform.sequence failures(propagate) {
|
||||
|
||||
// The actual tiling transformation takes tile sizes as attributes. It produces a
|
||||
// handle to the loop generated during tiling.
|
||||
%loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32]
|
||||
%tiled, %loop = transform.structured.tile_using_forall %max tile_sizes [8, 32]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
// We can now fuse the other operations into the loop. Here, we fuse
|
||||
@@ -87,7 +87,7 @@ transform.sequence failures(propagate) {
|
||||
// "max" operation. This illustrates the precise targeting with the transform
|
||||
// dialect. Otherwise, it is difficult to differentiate "add" and "max", both
|
||||
// of which having the same kind.
|
||||
%loop_second, %tiled_second = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4]
|
||||
%tiled_second, %loop_second = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%matmul_fused_2, %loop_second_2 =
|
||||
transform.structured.fuse_into_containing_op %matmul_fused into %loop_second
|
||||
@@ -96,7 +96,7 @@ transform.sequence failures(propagate) {
|
||||
// Since outlining is currently only implemented for region-holding operations
|
||||
// such as loops, use tiling to size 1 to materialize the outer loop that is
|
||||
// going to be outlined.
|
||||
%loop_third, %_0 = transform.structured.tile_to_forall_op %tiled_second tile_sizes [1]
|
||||
%_0, %loop_third = transform.structured.tile_using_forall %tiled_second tile_sizes [1]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%_1, %outline_target = transform.structured.fuse_into_containing_op %matmul_fused_2 into %loop_third
|
||||
: (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
@@ -143,10 +143,10 @@ module attributes { transform.with_named_sequence } {
|
||||
// inside the loops produced by tiling.
|
||||
//
|
||||
// [n y x c]
|
||||
%co, %relu2 = transform.structured.tile_to_forall_op %relu
|
||||
%relu2, %co = transform.structured.tile_using_forall %relu
|
||||
tile_sizes [0, 0, 0, 64]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%n_y_xo, %relu3 = transform.structured.tile_to_forall_op %relu2
|
||||
%relu3, %n_y_xo = transform.structured.tile_using_forall %relu2
|
||||
tile_sizes [1, 1, 5, 0]
|
||||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
@@ -207,8 +207,8 @@ module attributes { transform.with_named_sequence } {
|
||||
// tile_size = 1 along all dimensions, so the reduction is entirely
|
||||
// performed by the generated loops. The combiner structured operation is
|
||||
// still produced and adds up the reduction result with the initial value.
|
||||
%rz_ry_rx, %red_fill, %conv4, %combining
|
||||
= transform.structured.tile_reduction_using_scf %conv3 by
|
||||
%red_fill, %conv4, %combining, %rz_ry_rx
|
||||
= transform.structured.tile_reduction_using_for %conv3 by
|
||||
// n y x c rz ry rx
|
||||
tile_sizes=[0, 0, 0, 0, 1, 1, 1]
|
||||
: (!transform.any_op)
|
||||
|
||||
@@ -27,7 +27,7 @@ func.func @conv_1d(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loop = transform.structured.tile %0 [4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1, %loop = transform.structured.tile_using_for %0 [4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -29,7 +29,7 @@ func.func @conv_1d_nwc_wcf(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -27,7 +27,7 @@ func.func @conv_2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:2 = transform.structured.tile %0 [2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:2 = transform.structured.tile_using_for %0 [2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -29,7 +29,7 @@ func.func @conv_2d_nhwc_hwcf(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:4 = transform.structured.tile_using_for %0 [2, 3, 3, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -27,7 +27,7 @@ func.func @conv_3d(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: me
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -29,7 +29,7 @@ func.func @conv_3d_ndhwc_dhwcf(%arg0: memref<?x?x?x?x?xf32>, %arg1: memref<?x?x?
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [0, 5, 5, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func @main() {
|
||||
|
||||
@@ -39,7 +39,7 @@ func.func @main() {
|
||||
transform.sequence failures(propagate) {
|
||||
^bb0(%arg1: !transform.any_op):
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%1, %loops:3 = transform.structured.tile %0 [1, 2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%1, %loops:3 = transform.structured.tile_using_for %0 [1, 2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
||||
}
|
||||
|
||||
func.func private @printMemrefF32(%ptr : tensor<*xf32>)
|
||||
|
||||
@@ -344,10 +344,10 @@ def testSplit(target):
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileCompact(target):
|
||||
structured.TileOp(target, sizes=[4, 8], interchange=[0, 1])
|
||||
structured.TileUsingForOp(target, sizes=[4, 8], interchange=[0, 1])
|
||||
# CHECK-LABEL: TEST: testTileCompact
|
||||
# CHECK: transform.sequence
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile %{{.*}}[4, 8]
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile_using_for %{{.*}}[4, 8]
|
||||
# CHECK: interchange = [0, 1]
|
||||
|
||||
|
||||
@@ -356,20 +356,20 @@ def testTileCompact(target):
|
||||
def testTileAttributes(target):
|
||||
attr = DenseI64ArrayAttr.get([4, 8])
|
||||
ichange = DenseI64ArrayAttr.get([0, 1])
|
||||
structured.TileOp(target, sizes=attr, interchange=ichange)
|
||||
structured.TileUsingForOp(target, sizes=attr, interchange=ichange)
|
||||
# CHECK-LABEL: TEST: testTileAttributes
|
||||
# CHECK: transform.sequence
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile %{{.*}}[4, 8]
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile_using_for %{{.*}}[4, 8]
|
||||
# CHECK: interchange = [0, 1]
|
||||
|
||||
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileZero(target):
|
||||
structured.TileOp(target, sizes=[4, 0, 2, 0], interchange=[0, 1, 2, 3])
|
||||
structured.TileUsingForOp(target, sizes=[4, 0, 2, 0], interchange=[0, 1, 2, 3])
|
||||
# CHECK-LABEL: TEST: testTileZero
|
||||
# CHECK: transform.sequence
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile %{{.*}}[4, 0, 2, 0]
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile_using_for %{{.*}}[4, 0, 2, 0]
|
||||
# CHECK: interchange = [0, 1, 2, 3]
|
||||
|
||||
|
||||
@@ -387,20 +387,22 @@ def testTileDynamic():
|
||||
m2 = transform_pdl.PDLMatchOp(
|
||||
pdl.OperationType.get(), sequence.bodyTarget, "second"
|
||||
)
|
||||
structured.TileOp(sequence.bodyTarget, sizes=[m1, 3, m2, 0])
|
||||
structured.TileUsingForOp(sequence.bodyTarget, sizes=[m1, 3, m2, 0])
|
||||
transform.YieldOp()
|
||||
# CHECK-LABEL: TEST: testTileDynamic
|
||||
# CHECK: %[[FIRST:.+]] = pdl_match
|
||||
# CHECK: %[[SECOND:.+]] = pdl_match
|
||||
# CHECK: %{{.+}}, %{{.+}}:3 = transform.structured.tile %{{.*}}[%[[FIRST]], 3, %[[SECOND]], 0]
|
||||
# CHECK: %{{.+}}, %{{.+}}:3 = transform.structured.tile_using_for %{{.*}}[%[[FIRST]], 3, %[[SECOND]], 0]
|
||||
|
||||
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileExplicitLoopTypeSingle(target):
|
||||
structured.TileOp(transform.OperationType.get("scf.for"), target, sizes=[2, 3, 4])
|
||||
structured.TileUsingForOp(
|
||||
transform.OperationType.get("scf.for"), target, sizes=[2, 3, 4]
|
||||
)
|
||||
# CHECK-LABEL: TEST: testTileExplicitLoopTypeSingle
|
||||
# CHECK: = transform.structured.tile %{{.*}} : (!{{.*}}) ->
|
||||
# CHECK: = transform.structured.tile_using_for %{{.*}} : (!{{.*}}) ->
|
||||
# CHECK-COUNT-3: !transform.op<"scf.for">
|
||||
|
||||
|
||||
@@ -411,7 +413,7 @@ def testTileExplicitLoopTypeAll(target):
|
||||
transform.OperationType.get(x)
|
||||
for x in ["scf.for", "scf.parallel", "scf.forall"]
|
||||
]
|
||||
structured.TileOp(types, target, sizes=[2, 3, 4])
|
||||
structured.TileUsingForOp(types, target, sizes=[2, 3, 4])
|
||||
# CHECK-LABEL: TEST: testTileExplicitLoopTypeAll
|
||||
# CHECK: = transform.structured.tile
|
||||
# CHECK-SAME : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">,
|
||||
@@ -421,22 +423,22 @@ def testTileExplicitLoopTypeAll(target):
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileScalable(target):
|
||||
structured.TileOp(
|
||||
structured.TileUsingForOp(
|
||||
target,
|
||||
sizes=[4, [2]],
|
||||
)
|
||||
# CHECK-LABEL: TEST: testTileScalable
|
||||
# CHECK: transform.sequence
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile %{{.*}}[4, [2]]
|
||||
# CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.tile_using_for %{{.*}}[4, [2]]
|
||||
|
||||
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileToForallCompact(target):
|
||||
matmul = transform.CastOp(transform.OperationType.get("linalg.matmul"), target)
|
||||
structured.TileToForallOp(matmul, num_threads=[2, 3, 4])
|
||||
structured.TileUsingForallOp(matmul, num_threads=[2, 3, 4])
|
||||
# CHECK-LABEL: TEST: testTileToForallCompact
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: num_threads [2, 3, 4] tile_sizes []
|
||||
# CHECK-SAME: (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
|
||||
|
||||
@@ -444,14 +446,14 @@ def testTileToForallCompact(target):
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileToForallLoopsAndTileOpTypes(target):
|
||||
structured.TileToForallOp(
|
||||
structured.TileUsingForallOp(
|
||||
transform.OperationType.get("scf.forall"), # loops_type
|
||||
transform.OperationType.get("linalg.matmul"), # tiled_op_type
|
||||
target,
|
||||
num_threads=[2, 3, 4],
|
||||
)
|
||||
# CHECK-LABEL: TEST: testTileToForallLoopsAndTileOpTypes
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: num_threads [2, 3, 4] tile_sizes []
|
||||
# CHECK-SAME: (!transform.any_op) -> (!transform.op<"scf.forall">, !transform.op<"linalg.matmul">)
|
||||
|
||||
@@ -459,9 +461,9 @@ def testTileToForallLoopsAndTileOpTypes(target):
|
||||
@run
|
||||
@create_sequence
|
||||
def testTileToForallTileSizes(target):
|
||||
structured.TileToForallOp(target, tile_sizes=[2, 3, 4])
|
||||
structured.TileUsingForallOp(target, tile_sizes=[2, 3, 4])
|
||||
# CHECK-LABEL: TEST: testTileToForallTileSizes
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: num_threads [] tile_sizes [2, 3, 4]
|
||||
|
||||
|
||||
@@ -469,9 +471,9 @@ def testTileToForallTileSizes(target):
|
||||
@create_sequence
|
||||
def testTileToForallMixedDynamic(target):
|
||||
n = structured.MatchOp.match_op_names(target, ["test.dummy"])
|
||||
structured.TileToForallOp(target, num_threads=[n, 3, 4])
|
||||
structured.TileUsingForallOp(target, num_threads=[n, 3, 4])
|
||||
# CHECK-LABEL: TEST: testTileToForallMixedDynamic
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: num_threads [%{{.*}} : !transform.any_op, 3, 4]
|
||||
|
||||
|
||||
@@ -479,9 +481,9 @@ def testTileToForallMixedDynamic(target):
|
||||
@create_sequence
|
||||
def testTileToForallPackedDynamic(target):
|
||||
n = structured.MatchOp.match_op_names(target, ["test.dummy"])
|
||||
structured.TileToForallOp(target, num_threads=n)
|
||||
structured.TileUsingForallOp(target, num_threads=n)
|
||||
# CHECK-LABEL: TEST: testTileToForallPackedDynamic
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: num_threads *(%0 : !transform.any_op)
|
||||
|
||||
|
||||
@@ -489,9 +491,9 @@ def testTileToForallPackedDynamic(target):
|
||||
@create_sequence
|
||||
def testTileToForallMapping(target):
|
||||
mapping = Attribute.parse("[ #gpu.thread<y>, #gpu.thread<x> ]")
|
||||
structured.TileToForallOp(target, num_threads=[2, 3], mapping=mapping)
|
||||
structured.TileUsingForallOp(target, num_threads=[2, 3], mapping=mapping)
|
||||
# CHECK-LABEL: TEST: testTileToForallMapping
|
||||
# CHECK: = transform.structured.tile_to_forall_op
|
||||
# CHECK: = transform.structured.tile_using_forall
|
||||
# CHECK-SAME: mapping = [#gpu.thread<y>, #gpu.thread<x>]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user