mirror of
https://github.com/intel/llvm.git
synced 2026-01-25 19:44:38 +08:00
[mlir][xegpu] Allow out-of-bounds writes (#110811)
Relaxes vector.transfer_write lowering to allow out-of-bound writes. This aligns lowering with the current hardware specification which does not update bytes in out-of-bound locations during block stores.
This commit is contained in:
@@ -218,18 +218,15 @@ struct TransferWriteLowering
|
||||
if (failed(transferPreconditions(rewriter, writeOp)))
|
||||
return failure();
|
||||
|
||||
if (writeOp.hasOutOfBoundsDim())
|
||||
return rewriter.notifyMatchFailure(writeOp,
|
||||
"Unsupported out-of-bounds write");
|
||||
AffineMap map = writeOp.getPermutationMap();
|
||||
if (!map.isMinorIdentity())
|
||||
return rewriter.notifyMatchFailure(writeOp, "Expects identity map");
|
||||
|
||||
VectorType vecTy = writeOp.getVectorType();
|
||||
auto descType =
|
||||
xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
|
||||
/*array_length=*/1, /*boundary_check=*/false,
|
||||
xegpu::MemorySpace::Global);
|
||||
auto descType = xegpu::TensorDescType::get(
|
||||
vecTy.getShape(), vecTy.getElementType(),
|
||||
/*array_length=*/1, /*boundary_check=*/writeOp.hasOutOfBoundsDim(),
|
||||
xegpu::MemorySpace::Global);
|
||||
xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
|
||||
rewriter, loc, descType,
|
||||
dyn_cast<TypedValue<MemRefType>>(writeOp.getSource()),
|
||||
|
||||
@@ -66,6 +66,26 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
|
||||
|
||||
// -----
|
||||
|
||||
func.func @store_out_of_bounds(%vec: vector<8x16xf32>,
|
||||
%source: memref<7x64xf32>, %offset: index) {
|
||||
vector.transfer_write %vec, %source[%offset, %offset]
|
||||
{in_bounds = [false, true]}
|
||||
: vector<8x16xf32>, memref<7x64xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @store_out_of_bounds(
|
||||
// CHECK-SAME: %[[VEC:.+]]: vector<8x16xf32>,
|
||||
// CHECK-SAME: %[[SRC:.+]]: memref<7x64xf32>,
|
||||
// CHECK-SAME: %[[OFFSET:.+]]: index
|
||||
// CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc
|
||||
// CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]]]
|
||||
// CHECK-SAME: memref<7x64xf32> -> !xegpu.tensor_desc<8x16xf32,
|
||||
// CHECK-SAME: boundary_check = true
|
||||
// CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
|
||||
|
||||
// -----
|
||||
|
||||
func.func @no_store_transposed(%vec: vector<8x16xf32>,
|
||||
%source: memref<32x64xf32>, %offset: index) {
|
||||
vector.transfer_write %vec, %source[%offset, %offset]
|
||||
@@ -80,19 +100,6 @@ func.func @no_store_transposed(%vec: vector<8x16xf32>,
|
||||
|
||||
// -----
|
||||
|
||||
func.func @no_store_out_of_bounds(%vec: vector<8x16xf32>,
|
||||
%source: memref<32x64xf32>, %offset: index) {
|
||||
vector.transfer_write %vec, %source[%offset, %offset]
|
||||
{in_bounds = [false, true]}
|
||||
: vector<8x16xf32>, memref<32x64xf32>
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @no_store_out_of_bounds(
|
||||
// CHECK: vector.transfer_write
|
||||
|
||||
// -----
|
||||
|
||||
func.func @no_store_masked(%vec: vector<4xf32>,
|
||||
%source: memref<4xf32>, %offset: index) {
|
||||
%mask = arith.constant dense<[0, 1, 0, 1]> : vector<4xi1>
|
||||
|
||||
Reference in New Issue
Block a user