mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 12:26:52 +08:00
Support VectorTransfer splitting on writes also.
VectorTransfer split previously only split read xfer ops. This adds the same logic to write ops. The resulting code involves 2 conditionals for write ops while read ops only needed 1, but the created ops are built upon the same patterns, so pattern matching/expectations are all consistent other than in regards to the if/else ops. Differential Revision: https://reviews.llvm.org/D102157
This commit is contained in:
@@ -2379,6 +2379,7 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
|
||||
xferOp.indices().take_front(xferOp.getLeadingShapedRank());
|
||||
SmallVector<OpFoldResult, 4> sizes;
|
||||
sizes.append(leadingIndices.begin(), leadingIndices.end());
|
||||
auto isaWrite = isa<vector::TransferWriteOp>(xferOp);
|
||||
xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) {
|
||||
using MapList = ArrayRef<ArrayRef<AffineExpr>>;
|
||||
Value dimMemRef = memref_dim(xferOp.source(), indicesIdx);
|
||||
@@ -2397,7 +2398,7 @@ static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp,
|
||||
SmallVector<OpFoldResult, 4> indices = llvm::to_vector<4>(llvm::map_range(
|
||||
xferOp.indices(), [](Value idx) -> OpFoldResult { return idx; }));
|
||||
return memref_sub_view(
|
||||
xferOp.source(), indices, sizes,
|
||||
isaWrite ? alloc : xferOp.source(), indices, sizes,
|
||||
SmallVector<OpFoldResult>(memrefRank, OpBuilder(xferOp).getIndexAttr(1)));
|
||||
}
|
||||
|
||||
@@ -2509,14 +2510,119 @@ static scf::IfOp createScopedFullPartialVectorTransferRead(
|
||||
return fullPartialIfOp;
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
/// 1. `inBoundsCond` and a `compatibleMemRefType` have been computed.
|
||||
/// 2. a memref of single vector `alloc` has been allocated.
|
||||
/// Produce IR resembling:
|
||||
/// ```
|
||||
/// %1:3 = scf.if (%inBounds) {
|
||||
/// memref.cast %A: memref<A...> to compatibleMemRefType
|
||||
/// scf.yield %view, ... : compatibleMemRefType, index, index
|
||||
/// } else {
|
||||
/// %3 = vector.type_cast %extra_alloc :
|
||||
/// memref<...> to memref<vector<...>>
|
||||
/// %4 = memref.cast %alloc: memref<B...> to compatibleMemRefType
|
||||
/// scf.yield %4, ... : compatibleMemRefType, index, index
|
||||
/// }
|
||||
/// ```
|
||||
static ValueRange getLocationToWriteFullVec(vector::TransferWriteOp xferOp,
|
||||
TypeRange returnTypes,
|
||||
Value inBoundsCond,
|
||||
MemRefType compatibleMemRefType,
|
||||
Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
Value zero = std_constant_index(0);
|
||||
Value memref = xferOp.source();
|
||||
return conditionBuilder(
|
||||
returnTypes, inBoundsCond,
|
||||
[&]() -> scf::ValueVector {
|
||||
Value res = memref;
|
||||
if (compatibleMemRefType != xferOp.getShapedType())
|
||||
res = memref_cast(memref, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{res};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
|
||||
xferOp.indices().end());
|
||||
return viewAndIndices;
|
||||
},
|
||||
[&]() -> scf::ValueVector {
|
||||
Value casted = memref_cast(alloc, compatibleMemRefType);
|
||||
scf::ValueVector viewAndIndices{casted};
|
||||
viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
|
||||
zero);
|
||||
return viewAndIndices;
|
||||
});
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
/// 1. `inBoundsCond` has been computed.
|
||||
/// 2. a memref of single vector `alloc` has been allocated.
|
||||
/// 3. it originally wrote to %view
|
||||
/// Produce IR resembling:
|
||||
/// ```
|
||||
/// %notInBounds = xor %inBounds, %true
|
||||
/// scf.if (%notInBounds) {
|
||||
/// %3 = subview %alloc [...][...][...]
|
||||
/// linalg.copy(%3, %view)
|
||||
/// }
|
||||
/// ```
|
||||
static void createScopedFullPartialLinalgCopy(vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto notInBounds = b.create<XOrOp>(
|
||||
xferOp->getLoc(), inBoundsCond,
|
||||
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
|
||||
|
||||
conditionBuilder(notInBounds, [&]() {
|
||||
Value memRefSubView = createScopedSubViewIntersection(
|
||||
cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
|
||||
linalg_copy(memRefSubView, xferOp.source());
|
||||
});
|
||||
}
|
||||
|
||||
/// Given an `xferOp` for which:
|
||||
/// 1. `inBoundsCond` has been computed.
|
||||
/// 2. a memref of single vector `alloc` has been allocated.
|
||||
/// 3. it originally wrote to %view
|
||||
/// Produce IR resembling:
|
||||
/// ```
|
||||
/// %notInBounds = xor %inBounds, %true
|
||||
/// scf.if (%notInBounds) {
|
||||
/// %2 = load %alloc : memref<vector<...>>
|
||||
/// vector.transfer_write %2, %view[...] : memref<A...>, vector<...>
|
||||
/// }
|
||||
/// ```
|
||||
static void
|
||||
createScopedFullPartialVectorTransferWrite(vector::TransferWriteOp xferOp,
|
||||
Value inBoundsCond, Value alloc) {
|
||||
using namespace edsc;
|
||||
using namespace edsc::intrinsics;
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto notInBounds = b.create<XOrOp>(
|
||||
xferOp->getLoc(), inBoundsCond,
|
||||
b.create<::mlir::ConstantIntOp>(xferOp.getLoc(), true, 1));
|
||||
conditionBuilder(notInBounds, [&]() {
|
||||
BlockAndValueMapping mapping;
|
||||
|
||||
Value load = memref_load(vector_type_cast(
|
||||
MemRefType::get({}, xferOp.vector().getType()), alloc));
|
||||
|
||||
mapping.map(xferOp.vector(), load);
|
||||
b.clone(*xferOp.getOperation(), mapping);
|
||||
});
|
||||
}
|
||||
|
||||
/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds
|
||||
/// masking) fastpath and a slowpath.
|
||||
///
|
||||
/// For vector.transfer_read:
|
||||
/// If `ifOp` is not null and the result is `success, the `ifOp` points to the
|
||||
/// newly created conditional upon function return.
|
||||
/// To accomodate for the fact that the original vector.transfer indexing may be
|
||||
/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the
|
||||
/// scf.if op returns a view and values of type index.
|
||||
/// At this time, only vector.transfer_read case is implemented.
|
||||
///
|
||||
/// Example (a 2-D vector.transfer_read):
|
||||
/// ```
|
||||
@@ -2537,6 +2643,32 @@ static scf::IfOp createScopedFullPartialVectorTransferRead(
|
||||
/// ```
|
||||
/// where `alloc` is a top of the function alloca'ed buffer of one vector.
|
||||
///
|
||||
/// For vector.transfer_write:
|
||||
/// There are 2 conditional blocks. First a block to decide which memref and
|
||||
/// indices to use for an unmasked, inbounds write. Then a conditional block to
|
||||
/// further copy a partial buffer into the final result in the slow path case.
|
||||
///
|
||||
/// Example (a 2-D vector.transfer_write):
|
||||
/// ```
|
||||
/// vector.transfer_write %arg, %0[...], %pad : memref<A...>, vector<...>
|
||||
/// ```
|
||||
/// is transformed into:
|
||||
/// ```
|
||||
/// %1:3 = scf.if (%inBounds) {
|
||||
/// memref.cast %A: memref<A...> to compatibleMemRefType
|
||||
/// scf.yield %view : compatibleMemRefType, index, index
|
||||
/// } else {
|
||||
/// memref.cast %alloc: memref<B...> to compatibleMemRefType
|
||||
/// scf.yield %4 : compatibleMemRefType, index, index
|
||||
/// }
|
||||
/// %0 = vector.transfer_write %arg, %1#0[%1#1, %1#2] {in_bounds = [true ...
|
||||
/// true]}
|
||||
/// scf.if (%notInBounds) {
|
||||
/// // slowpath: not in-bounds vector.transfer or linalg.copy.
|
||||
/// }
|
||||
/// ```
|
||||
/// where `alloc` is a top of the function alloca'ed buffer of one vector.
|
||||
///
|
||||
/// Preconditions:
|
||||
/// 1. `xferOp.permutation_map()` must be a minor identity map
|
||||
/// 2. the rank of the `xferOp.source()` and the rank of the `xferOp.vector()`
|
||||
@@ -2554,27 +2686,29 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
SmallVector<bool, 4> bools(xferOp.getTransferRank(), true);
|
||||
auto inBoundsAttr = b.getBoolArrayAttr(bools);
|
||||
if (options.vectorTransferSplit == VectorTransferSplit::ForceInBounds) {
|
||||
xferOp->setAttr(vector::TransferReadOp::getInBoundsAttrName(),
|
||||
inBoundsAttr);
|
||||
xferOp->setAttr(xferOp.getInBoundsAttrName(), inBoundsAttr);
|
||||
return success();
|
||||
}
|
||||
|
||||
assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) &&
|
||||
"Expected splitFullAndPartialTransferPrecondition to hold");
|
||||
auto xferReadOp = dyn_cast<vector::TransferReadOp>(xferOp.getOperation());
|
||||
// Assert preconditions. Additionally, keep the variables in an inner scope to
|
||||
// ensure they aren't used in the wrong scopes further down.
|
||||
{
|
||||
assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) &&
|
||||
"Expected splitFullAndPartialTransferPrecondition to hold");
|
||||
|
||||
// TODO: add support for write case.
|
||||
if (!xferReadOp)
|
||||
return failure();
|
||||
auto xferReadOp = dyn_cast<vector::TransferReadOp>(xferOp.getOperation());
|
||||
auto xferWriteOp = dyn_cast<vector::TransferWriteOp>(xferOp.getOperation());
|
||||
|
||||
if (xferReadOp.mask())
|
||||
return failure();
|
||||
if (!(xferReadOp || xferWriteOp))
|
||||
return failure();
|
||||
if (xferWriteOp && xferWriteOp.mask())
|
||||
return failure();
|
||||
if (xferReadOp && xferReadOp.mask())
|
||||
return failure();
|
||||
}
|
||||
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
if (Operation *sourceOp = xferOp.source().getDefiningOp())
|
||||
b.setInsertionPointAfter(sourceOp);
|
||||
else
|
||||
b.setInsertionPoint(xferOp);
|
||||
b.setInsertionPoint(xferOp);
|
||||
ScopedContext scope(b, xferOp.getLoc());
|
||||
Value inBoundsCond = createScopedInBoundsCond(
|
||||
cast<VectorTransferOpInterface>(xferOp.getOperation()));
|
||||
@@ -2596,26 +2730,57 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
|
||||
MemRefType compatibleMemRefType =
|
||||
getCastCompatibleMemRefType(xferOp.getShapedType().cast<MemRefType>(),
|
||||
alloc.getType().cast<MemRefType>());
|
||||
|
||||
// Read case: full fill + partial copy -> in-bounds vector.xfer_read.
|
||||
SmallVector<Type, 4> returnTypes(1 + xferOp.getTransferRank(),
|
||||
b.getIndexType());
|
||||
returnTypes[0] = compatibleMemRefType;
|
||||
scf::IfOp fullPartialIfOp =
|
||||
options.vectorTransferSplit == VectorTransferSplit::VectorTransfer
|
||||
? createScopedFullPartialVectorTransferRead(
|
||||
xferReadOp, returnTypes, inBoundsCond, compatibleMemRefType,
|
||||
alloc)
|
||||
: createScopedFullPartialLinalgCopy(xferReadOp, returnTypes,
|
||||
inBoundsCond,
|
||||
compatibleMemRefType, alloc);
|
||||
if (ifOp)
|
||||
*ifOp = fullPartialIfOp;
|
||||
|
||||
// Set existing read op to in-bounds, it always reads from a full buffer.
|
||||
for (unsigned i = 0, e = returnTypes.size(); i != e; ++i)
|
||||
xferReadOp.setOperand(i, fullPartialIfOp.getResult(i));
|
||||
xferOp->setAttr(vector::TransferReadOp::getInBoundsAttrName(), inBoundsAttr);
|
||||
if (auto xferReadOp =
|
||||
dyn_cast<vector::TransferReadOp>(xferOp.getOperation())) {
|
||||
// Read case: full fill + partial copy -> in-bounds vector.xfer_read.
|
||||
scf::IfOp fullPartialIfOp =
|
||||
options.vectorTransferSplit == VectorTransferSplit::VectorTransfer
|
||||
? createScopedFullPartialVectorTransferRead(
|
||||
xferReadOp, returnTypes, inBoundsCond, compatibleMemRefType,
|
||||
alloc)
|
||||
: createScopedFullPartialLinalgCopy(xferReadOp, returnTypes,
|
||||
inBoundsCond,
|
||||
compatibleMemRefType, alloc);
|
||||
if (ifOp)
|
||||
*ifOp = fullPartialIfOp;
|
||||
|
||||
// Set existing read op to in-bounds, it always reads from a full buffer.
|
||||
for (unsigned i = 0, e = returnTypes.size(); i != e; ++i)
|
||||
xferReadOp.setOperand(i, fullPartialIfOp.getResult(i));
|
||||
|
||||
xferOp->setAttr(xferOp.getInBoundsAttrName(), inBoundsAttr);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
auto xferWriteOp = cast<vector::TransferWriteOp>(xferOp.getOperation());
|
||||
|
||||
// Decide which location to write the entire vector to.
|
||||
auto memrefAndIndices = getLocationToWriteFullVec(
|
||||
xferWriteOp, returnTypes, inBoundsCond, compatibleMemRefType, alloc);
|
||||
|
||||
// Do an in bounds write to either the output or the extra allocated buffer.
|
||||
// The operation is cloned to prevent deleting information needed for the
|
||||
// later IR creation.
|
||||
BlockAndValueMapping mapping;
|
||||
mapping.map(xferWriteOp.source(), memrefAndIndices.front());
|
||||
mapping.map(xferWriteOp.indices(), memrefAndIndices.drop_front());
|
||||
auto *clone = b.clone(*xferWriteOp, mapping);
|
||||
clone->setAttr(xferWriteOp.getInBoundsAttrName(), inBoundsAttr);
|
||||
|
||||
// Create a potential copy from the allocated buffer to the final output in
|
||||
// the slow path case.
|
||||
if (options.vectorTransferSplit == VectorTransferSplit::VectorTransfer)
|
||||
createScopedFullPartialVectorTransferWrite(xferWriteOp, inBoundsCond,
|
||||
alloc);
|
||||
else
|
||||
createScopedFullPartialLinalgCopy(xferWriteOp, inBoundsCond, alloc);
|
||||
|
||||
xferOp->erase();
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s -test-vector-transfer-full-partial-split | FileCheck %s
|
||||
// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-linalg-copy | FileCheck %s --check-prefix=LINALG
|
||||
// RUN: mlir-opt %s -test-vector-transfer-full-partial-split -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-linalg-copy -split-input-file | FileCheck %s --check-prefix=LINALG
|
||||
|
||||
// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
||||
// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
@@ -186,3 +186,206 @@ func @split_vector_transfer_read_strided_2d(
|
||||
// CHECK: return %[[res]] : vector<4x8xf32>
|
||||
return %1 : vector<4x8xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref<?x8xf32>, %i: index, %j: index) {
|
||||
vector.transfer_write %V, %A[%i, %j] :
|
||||
vector<4x8xf32>, memref<?x8xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
// CHECK: func @split_vector_transfer_write_2d(
|
||||
// CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
||||
// CHECK-SAME: %[[DEST:.*]]: memref<?x8xf32>,
|
||||
// CHECK-SAME: %[[I:.*]]: index,
|
||||
// CHECK-SAME: %[[J:.*]]: index) {
|
||||
// CHECK-DAG: %[[C8:.*]] = constant 8 : index
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[CT:.*]] = constant true
|
||||
// CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
||||
// CHECK: %[[VAL_8:.*]] = affine.apply #[[MAP0]]()[%[[I]]]
|
||||
// CHECK: %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
|
||||
// CHECK: %[[DIM0_IN:.*]] = cmpi sle, %[[VAL_8]], %[[DIM0]] : index
|
||||
// CHECK: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]]
|
||||
// CHECK: %[[DIM1_IN:.*]] = cmpi sle, %[[DIM1]], %[[C8]] : index
|
||||
// CHECK: %[[IN_BOUNDS:.*]] = and %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
||||
// CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] ->
|
||||
// CHECK-SAME: (memref<?x8xf32>, index, index) {
|
||||
// CHECK: scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_15:.*]] = memref.cast %[[TEMP]]
|
||||
// CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32>
|
||||
// CHECK: scf.yield %[[VAL_15]], %[[C0]], %[[C0]]
|
||||
// CHECK-SAME: : memref<?x8xf32>, index, index
|
||||
// CHECK: }
|
||||
// CHECK: vector.transfer_write %[[VEC]],
|
||||
// CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
||||
// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32>
|
||||
// CHECK: %[[OUT_BOUNDS:.*]] = xor %[[IN_BOUNDS]], %[[CT]] : i1
|
||||
// CHECK: scf.if %[[OUT_BOUNDS]] {
|
||||
// CHECK: %[[CASTED:.*]] = vector.type_cast %[[TEMP]]
|
||||
// CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>>
|
||||
// CHECK: %[[RESULT_COPY:.*]] = memref.load %[[CASTED]][]
|
||||
// CHECK-SAME: : memref<vector<4x8xf32>>
|
||||
// CHECK: vector.transfer_write %[[RESULT_COPY]],
|
||||
// CHECK-SAME: %[[DEST]][%[[I]], %[[J]]]
|
||||
// CHECK-SAME: : vector<4x8xf32>, memref<?x8xf32>
|
||||
// CHECK: }
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
||||
// LINALG-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
||||
// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
// LINALG-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)>
|
||||
// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)>
|
||||
// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>
|
||||
// LINALG: func @split_vector_transfer_write_2d(
|
||||
// LINALG-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
||||
// LINALG-SAME: %[[DEST:.*]]: memref<?x8xf32>,
|
||||
// LINALG-SAME: %[[I:.*]]: index,
|
||||
// LINALG-SAME: %[[J:.*]]: index) {
|
||||
// LINALG-DAG: %[[CT:.*]] = constant true
|
||||
// LINALG-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// LINALG-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// LINALG-DAG: %[[C8:.*]] = constant 8 : index
|
||||
// LINALG: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
||||
// LINALG: %[[IDX0:.*]] = affine.apply #[[MAP0]]()[%[[I]]]
|
||||
// LINALG: %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
|
||||
// LINALG: %[[DIM0_IN:.*]] = cmpi sle, %[[IDX0]], %[[DIM0]] : index
|
||||
// LINALG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]]
|
||||
// LINALG: %[[DIM1_IN:.*]] = cmpi sle, %[[DIM1]], %[[C8]] : index
|
||||
// LINALG: %[[IN_BOUNDS:.*]] = and %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
||||
// LINALG: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
|
||||
// LINALG-SAME: -> (memref<?x8xf32>, index, index) {
|
||||
// LINALG: scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index
|
||||
// LINALG: } else {
|
||||
// LINALG: %[[VAL_16:.*]] = memref.cast %[[TEMP]] : memref<4x8xf32> to memref<?x8xf32>
|
||||
// LINALG: scf.yield %[[VAL_16]], %[[C0]], %[[C0]] : memref<?x8xf32>, index, index
|
||||
// LINALG: }
|
||||
// LINALG: vector.transfer_write %[[VEC]],
|
||||
// LINALG-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
||||
// LINALG-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32>
|
||||
// LINALG: %[[OUT_BOUNDS:.*]] = xor %[[IN_BOUNDS]], %[[CT]] : i1
|
||||
// LINALG: scf.if %[[OUT_BOUNDS]] {
|
||||
// LINALG: %[[VAL_19:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
|
||||
// LINALG-DAG: %[[VAL_20:.*]] = affine.min #[[MAP2]](%[[VAL_19]], %[[I]], %[[C4]])
|
||||
// LINALG-DAG: %[[VAL_21:.*]] = affine.min #[[MAP3]](%[[C8]], %[[J]], %[[C8]])
|
||||
// LINALG: %[[VAL_22:.*]] = memref.subview %[[TEMP]]
|
||||
// LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]]
|
||||
// LINALG-SAME: [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP4]]>
|
||||
// LINALG: linalg.copy(%[[VAL_22]], %[[DEST]])
|
||||
// LINALG-SAME: : memref<?x?xf32, #[[MAP4]]>, memref<?x8xf32>
|
||||
// LINALG: }
|
||||
// LINALG: return
|
||||
// LINALG: }
|
||||
|
||||
// -----
|
||||
|
||||
func @split_vector_transfer_write_strided_2d(
|
||||
%V: vector<4x8xf32>, %A: memref<7x8xf32, offset:?, strides:[?, 1]>,
|
||||
%i: index, %j: index) {
|
||||
vector.transfer_write %V, %A[%i, %j] :
|
||||
vector<4x8xf32>, memref<7x8xf32, offset:?, strides:[?, 1]>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
|
||||
// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
||||
// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
// CHECK: func @split_vector_transfer_write_strided_2d(
|
||||
// CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
||||
// CHECK-SAME: %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>,
|
||||
// CHECK-SAME: %[[I:.*]]: index,
|
||||
// CHECK-SAME: %[[J:.*]]: index) {
|
||||
// CHECK-DAG: %[[C7:.*]] = constant 7 : index
|
||||
// CHECK-DAG: %[[C8:.*]] = constant 8 : index
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[CT:.*]] = constant true
|
||||
// CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
||||
// CHECK: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]]
|
||||
// CHECK: %[[DIM0_IN:.*]] = cmpi sle, %[[DIM0]], %[[C7]] : index
|
||||
// CHECK: %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]]
|
||||
// CHECK: %[[DIM1_IN:.*]] = cmpi sle, %[[DIM1]], %[[C8]] : index
|
||||
// CHECK: %[[IN_BOUNDS:.*]] = and %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
||||
// CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
|
||||
// CHECK-SAME: -> (memref<?x8xf32, #[[MAP0]]>, index, index) {
|
||||
// CHECK: %[[VAL_15:.*]] = memref.cast %[[DEST]]
|
||||
// CHECK-SAME: : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]>
|
||||
// CHECK: scf.yield %[[VAL_15]], %[[I]], %[[J]]
|
||||
// CHECK-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_16:.*]] = memref.cast %[[TEMP]]
|
||||
// CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]>
|
||||
// CHECK: scf.yield %[[VAL_16]], %[[C0]], %[[C0]]
|
||||
// CHECK-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index
|
||||
// CHECK: }
|
||||
// CHECK: vector.transfer_write %[[VEC]],
|
||||
// CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0
|
||||
// CHECK-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
||||
// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]>
|
||||
// CHECK: %[[OUT_BOUNDS:.*]] = xor %[[IN_BOUNDS]], %[[CT]] : i1
|
||||
// CHECK: scf.if %[[OUT_BOUNDS]] {
|
||||
// CHECK: %[[VAL_19:.*]] = vector.type_cast %[[TEMP]]
|
||||
// CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>>
|
||||
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_19]][]
|
||||
// CHECK-SAME: : memref<vector<4x8xf32>>
|
||||
// CHECK: vector.transfer_write %[[VAL_20]], %[[DEST]][%[[I]], %[[J]]]
|
||||
// CHECK-SAME: : vector<4x8xf32>, memref<7x8xf32, #[[MAP0]]>
|
||||
// CHECK: }
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
||||
// LINALG-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
|
||||
// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
||||
// LINALG-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
||||
// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)>
|
||||
// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)>
|
||||
// LINALG-DAG: #[[MAP5:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)>
|
||||
// LINALG: func @split_vector_transfer_write_strided_2d(
|
||||
// LINALG-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
||||
// LINALG-SAME: %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>,
|
||||
// LINALG-SAME: %[[I:.*]]: index,
|
||||
// LINALG-SAME: %[[J:.*]]: index) {
|
||||
// LINALG-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// LINALG-DAG: %[[CT:.*]] = constant true
|
||||
// LINALG-DAG: %[[C7:.*]] = constant 7 : index
|
||||
// LINALG-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// LINALG-DAG: %[[C8:.*]] = constant 8 : index
|
||||
// LINALG: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
||||
// LINALG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]]
|
||||
// LINALG: %[[DIM0_IN:.*]] = cmpi sle, %[[DIM0]], %[[C7]] : index
|
||||
// LINALG: %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]]
|
||||
// LINALG: %[[DIM1_IN:.*]] = cmpi sle, %[[DIM1]], %[[C8]] : index
|
||||
// LINALG: %[[IN_BOUNDS:.*]] = and %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
||||
// LINALG: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
|
||||
// LINALG-SAME: -> (memref<?x8xf32, #[[MAP0]]>, index, index) {
|
||||
// LINALG: %[[VAL_16:.*]] = memref.cast %[[DEST]]
|
||||
// LINALG-SAME: : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]>
|
||||
// LINALG: scf.yield %[[VAL_16]], %[[I]], %[[J]]
|
||||
// LINALG-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index
|
||||
// LINALG: } else {
|
||||
// LINALG: %[[VAL_17:.*]] = memref.cast %[[TEMP]]
|
||||
// LINALG-SAME: : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]>
|
||||
// LINALG: scf.yield %[[VAL_17]], %[[C0]], %[[C0]]
|
||||
// LINALG-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index
|
||||
// LINALG: }
|
||||
// LINALG: vector.transfer_write %[[VEC]],
|
||||
// LINALG-SAME: %[[IN_BOUND_DEST:.*]]#0
|
||||
// LINALG-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
||||
// LINALG-SAME: {in_bounds = [true, true]}
|
||||
// LINALG-SAME: : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]>
|
||||
// LINALG: %[[OUT_BOUNDS:.*]] = xor %[[IN_BOUNDS]], %[[CT]] : i1
|
||||
// LINALG: scf.if %[[OUT_BOUNDS]] {
|
||||
// LINALG-DAG: %[[VAL_20:.*]] = affine.min #[[MAP3]](%[[C7]], %[[I]], %[[C4]])
|
||||
// LINALG-DAG: %[[VAL_21:.*]] = affine.min #[[MAP4]](%[[C8]], %[[J]], %[[C8]])
|
||||
// LINALG: %[[VAL_22:.*]] = memref.subview %[[TEMP]]
|
||||
// LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]]
|
||||
// LINALG-SAME: [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP5]]>
|
||||
// LINALG: linalg.copy(%[[VAL_22]], %[[DEST]])
|
||||
// LINALG-SAME: : memref<?x?xf32, #[[MAP5]]>, memref<7x8xf32, #[[MAP0]]>
|
||||
// LINALG: }
|
||||
// LINALG: return
|
||||
// LINALG: }
|
||||
|
||||
Reference in New Issue
Block a user