mirror of
https://github.com/intel/llvm.git
synced 2026-02-05 22:17:23 +08:00
[mlir][Vector] Update the lowering of vector.transfer_write to SCF
This change updates the lowering of `vector.transfer_write` to SCF when
scalable vectors are used. Specifically, when lowering
`vector.transfer_write` to a loop of `vector.extractelement` ops, make
sure that the upper bound of the generated loop is scaled by
`vector.vscale`:
```
%10 = vector.vscale
%11 = arith.muli %10, %c16 : index
scf.for %arg2 = %c0 to %11 step %c1
```
For reference, this is the current version (i.e. before this change):
```
scf.for %arg2 = %c0 to %c16 step %c1
```
Note that this only valid for fixed-width vectors.
Differential Revision: https://reviews.llvm.org/D154226
This commit is contained in:
committed by
Andrzej Warzynski
parent
7a3ebba9cb
commit
5cebffc276
@@ -1247,8 +1247,13 @@ struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
|
||||
Location loc = xferOp.getLoc();
|
||||
auto vecType = xferOp.getVectorType();
|
||||
auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
|
||||
auto ub =
|
||||
Value ub =
|
||||
rewriter.create<arith::ConstantIndexOp>(loc, vecType.getDimSize(0));
|
||||
if (vecType.isScalable()) {
|
||||
Value vscale =
|
||||
rewriter.create<vector::VectorScaleOp>(loc, rewriter.getIndexType());
|
||||
ub = rewriter.create<arith::MulIOp>(loc, ub, vscale);
|
||||
}
|
||||
auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
|
||||
auto loopState = Strategy1d<OpTy>::initialLoopState(rewriter, xferOp);
|
||||
|
||||
|
||||
@@ -511,3 +511,39 @@ func.func @transfer_read_with_tensor(%arg: tensor<f32>) -> vector<1xf32> {
|
||||
tensor<f32>, vector<1xf32>
|
||||
return %0: vector<1xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: transfer_write_scalable
|
||||
func.func @transfer_write_scalable(%arg0: memref<?xf32, strided<[?], offset: ?>>, %arg1: f32) {
|
||||
%0 = llvm.mlir.constant(0 : i32) : i32
|
||||
%c0 = arith.constant 0 : index
|
||||
%dim = memref.dim %arg0, %c0 : memref<?xf32, strided<[?], offset: ?>>
|
||||
%1 = llvm.intr.experimental.stepvector : vector<[16]xi32>
|
||||
%2 = arith.index_cast %dim : index to i32
|
||||
%3 = llvm.mlir.undef : vector<[16]xi32>
|
||||
%4 = llvm.insertelement %2, %3[%0 : i32] : vector<[16]xi32>
|
||||
%5 = llvm.shufflevector %4, %3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[16]xi32>
|
||||
%6 = arith.cmpi slt, %1, %5 : vector<[16]xi32>
|
||||
%7 = llvm.mlir.undef : vector<[16]xf32>
|
||||
%8 = llvm.insertelement %arg1, %7[%0 : i32] : vector<[16]xf32>
|
||||
%9 = llvm.shufflevector %8, %7 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[16]xf32>
|
||||
vector.transfer_write %9, %arg0[%c0], %6 {in_bounds = [true]} : vector<[16]xf32>, memref<?xf32, strided<[?], offset: ?>>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-SAME: %[[ARG_0:.*]]: memref<?xf32, strided<[?], offset: ?>>,
|
||||
// CHECK: %[[C_0:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[C_16:.*]] = arith.constant 16 : index
|
||||
// CHECK: %[[STEP:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[MASK_VEC:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : vector<[16]xi32>
|
||||
// CHECK: %[[VSCALE:.*]] = vector.vscale
|
||||
// CHECK: %[[UB:.*]] = arith.muli %[[VSCALE]], %[[C_16]] : index
|
||||
// CHECK: scf.for %[[IDX:.*]] = %[[C_0]] to %[[UB]] step %[[STEP]] {
|
||||
// CHECK: %[[MASK_VAL:.*]] = vector.extractelement %[[MASK_VEC]][%[[IDX]] : index] : vector<[16]xi1>
|
||||
// CHECK: scf.if %[[MASK_VAL]] {
|
||||
// CHECK: %[[VAL_TO_STORE:.*]] = vector.extractelement %{{.*}}[%[[IDX]] : index] : vector<[16]xf32>
|
||||
// CHECK: memref.store %[[VAL_TO_STORE]], %[[ARG_0]][%[[IDX]]] : memref<?xf32, strided<[?], offset: ?>>
|
||||
// CHECK: } else {
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
|
||||
Reference in New Issue
Block a user