mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 08:30:34 +08:00
Lower vector transfer ops to loop.for operations.
This allows mixing linalg operations with vector transfer operations (with additional modifications to affine ops) and is a step towards solving tensorflow/mlir#189. PiperOrigin-RevId: 275543361
This commit is contained in:
committed by
A. Unique TensorFlower
parent
2823b68580
commit
9e7e297da3
@@ -770,5 +770,5 @@ mlir::linalg::createLowerLinalgToLLVMPass() {
|
||||
}
|
||||
|
||||
static PassRegistration<LowerLinalgToLLVMPass>
|
||||
pass("linalg-convert-to-llvm",
|
||||
pass("convert-linalg-to-llvm",
|
||||
"Lower the operations from the linalg dialect into the LLVM dialect");
|
||||
|
||||
@@ -320,7 +320,6 @@ categorizeValueByAffineType(MLIRContext *context, Value *val, unsigned &numDims,
|
||||
d = getAffineSymbolExpr(numSymbols++, context);
|
||||
resultVal = val;
|
||||
} else {
|
||||
assert(isValidDim(val) && "Must be a valid Dim");
|
||||
d = getAffineDimExpr(numDims++, context);
|
||||
resultVal = val;
|
||||
}
|
||||
|
||||
@@ -24,11 +24,8 @@ using namespace mlir::edsc;
|
||||
|
||||
static SmallVector<ValueHandle, 8> getMemRefSizes(Value *memRef) {
|
||||
MemRefType memRefType = memRef->getType().cast<MemRefType>();
|
||||
assert(isStrided(memRefType) && "Expected strided MemRef type");
|
||||
|
||||
auto maps = memRefType.getAffineMaps();
|
||||
(void)maps;
|
||||
assert((maps.empty() || (maps.size() == 1 && maps[0].isIdentity())) &&
|
||||
"Layout maps not supported");
|
||||
SmallVector<ValueHandle, 8> res;
|
||||
res.reserve(memRefType.getShape().size());
|
||||
const auto &shape = memRefType.getShape();
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "mlir/Analysis/NestedMatcher.h"
|
||||
#include "mlir/Analysis/Utils.h"
|
||||
#include "mlir/Analysis/VectorAnalysis.h"
|
||||
#include "mlir/Dialect/LoopOps/LoopOps.h"
|
||||
#include "mlir/Dialect/StandardOps/Ops.h"
|
||||
#include "mlir/Dialect/VectorOps/VectorOps.h"
|
||||
#include "mlir/EDSC/Builders.h"
|
||||
@@ -54,9 +55,9 @@
|
||||
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
|
||||
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
|
||||
/// %f0 = constant 0.0f : f32
|
||||
/// affine.for %i0 = 0 to %0 {
|
||||
/// affine.for %i1 = 0 to %1 step 256 {
|
||||
/// affine.for %i2 = 0 to %2 step 32 {
|
||||
/// loop.for %i0 = 0 to %0 {
|
||||
/// loop.for %i1 = 0 to %1 step %c256 {
|
||||
/// loop.for %i2 = 0 to %2 step %c32 {
|
||||
/// %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
|
||||
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
/// memref<?x?x?xf32>, vector<32x256xf32>
|
||||
@@ -68,8 +69,8 @@
|
||||
/// abstraction):
|
||||
///
|
||||
/// ```mlir {.mlir}
|
||||
/// affine.for %d2 = 0 to 256 {
|
||||
/// affine.for %d1 = 0 to 32 {
|
||||
/// loop.for %d2 = 0 to %c256 {
|
||||
/// loop.for %d1 = 0 to %c32 {
|
||||
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
|
||||
/// %tmp[%d2, %d1] = %s
|
||||
/// }
|
||||
@@ -126,7 +127,7 @@ struct VectorTransferRewriter : public RewritePattern {
|
||||
/// Analyzes the `transfer` to find an access dimension along the fastest remote
|
||||
/// MemRef dimension. If such a dimension with coalescing properties is found,
|
||||
/// `pivs` and `vectorView` are swapped so that the invocation of
|
||||
/// AffineLoopNestBuilder captures it in the innermost loop.
|
||||
/// LoopNestBuilder captures it in the innermost loop.
|
||||
template <typename VectorTransferOpTy>
|
||||
void coalesceCopy(VectorTransferOpTy transfer,
|
||||
SmallVectorImpl<edsc::ValueHandle *> *pivs,
|
||||
@@ -282,13 +283,16 @@ VectorTransferRewriter<VectorTransferReadOp>::matchAndRewrite(
|
||||
|
||||
auto lbs = vectorView.getLbs();
|
||||
auto ubs = vectorView.getUbs();
|
||||
auto steps = vectorView.getSteps();
|
||||
SmallVector<ValueHandle, 8> steps;
|
||||
steps.reserve(vectorView.getSteps().size());
|
||||
for (auto step : vectorView.getSteps())
|
||||
steps.push_back(constant_index(step));
|
||||
|
||||
// 2. Emit alloc-copy-load-dealloc.
|
||||
ValueHandle tmp = alloc(tmpMemRefType(transfer));
|
||||
IndexedValue local(tmp);
|
||||
ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer));
|
||||
AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
|
||||
LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
|
||||
// Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
|
||||
local(ivs) = remote(clip(transfer, view, ivs));
|
||||
});
|
||||
@@ -342,14 +346,17 @@ VectorTransferRewriter<VectorTransferWriteOp>::matchAndRewrite(
|
||||
|
||||
auto lbs = vectorView.getLbs();
|
||||
auto ubs = vectorView.getUbs();
|
||||
auto steps = vectorView.getSteps();
|
||||
SmallVector<ValueHandle, 8> steps;
|
||||
steps.reserve(vectorView.getSteps().size());
|
||||
for (auto step : vectorView.getSteps())
|
||||
steps.push_back(constant_index(step));
|
||||
|
||||
// 2. Emit alloc-store-copy-dealloc.
|
||||
ValueHandle tmp = alloc(tmpMemRefType(transfer));
|
||||
IndexedValue local(tmp);
|
||||
ValueHandle vec = vector_type_cast(tmp, vectorMemRefType(transfer));
|
||||
std_store(vectorValue, vec, {constant_index(0)});
|
||||
AffineLoopNestBuilder(pivs, lbs, ubs, steps)([&] {
|
||||
LoopNestBuilder(pivs, lbs, ubs, steps)([&] {
|
||||
// Computes clippedScalarAccessExprs in the loop nest scope (ivs exist).
|
||||
remote(clip(transfer, view, ivs)) = local(ivs);
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s -linalg-convert-to-llvm | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm | FileCheck %s --check-prefix=LLVM-LOOPS
|
||||
// RUN: mlir-opt %s -convert-linalg-to-llvm | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm | FileCheck %s --check-prefix=LLVM-LOOPS
|
||||
|
||||
func @buffer_size(%arg0: !linalg.buffer<?xf32>) {
|
||||
%c1 = constant 1 : index
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops | FileCheck %s
|
||||
|
||||
// Test that we can lower all the way to LLVM without crashing, don't check results here.
|
||||
// RUN: mlir-opt %s --linalg-convert-to-llvm -o=/dev/null 2>&1
|
||||
// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1
|
||||
|
||||
// CHECK-DAG: #[[strided1D:.*]] = (d0)[s0] -> (d0 + s0)
|
||||
// CHECK-DAG: #[[strided2D:.*]] = (d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
|
||||
|
||||
// Test that we can lower all the way to LLVM without crashing, don't check results here.
|
||||
// RUN: mlir-opt %s --linalg-convert-to-llvm -o=/dev/null 2>&1
|
||||
// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1
|
||||
|
||||
// CHECK-DAG: #[[strided1D:.*]] = (d0)[s0] -> (d0 + s0)
|
||||
// CHECK-DAG: #[[strided2D:.*]] = (d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)
|
||||
|
||||
@@ -53,8 +53,12 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
|
||||
|
||||
// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
|
||||
// CHECK: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
|
||||
@@ -65,9 +69,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %[[D3:.*]] = dim %{{.*}}, 3 : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
|
||||
// CHECK-NEXT: loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
|
||||
// CHECK-NEXT: loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[SUB]]()[%[[D0]]]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index
|
||||
@@ -126,9 +130,13 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
|
||||
// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
|
||||
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
|
||||
// CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
|
||||
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
|
||||
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
|
||||
// CHECK: %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
|
||||
@@ -140,9 +148,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK: store %{{.*}}, {{.*}} : memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
|
||||
// CHECK-NEXT: loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
|
||||
// CHECK-NEXT: loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[SUB]]()[%[[D0]]]
|
||||
// CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
// RUN: mlir-opt %s -linalg-convert-to-llvm | mlir-cpu-runner -e dot -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm | mlir-cpu-runner -e dot -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-convert-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-promote-subviews -linalg-lower-to-loops -linalg-convert-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-promote-subviews -linalg-convert-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-linalg-to-llvm | mlir-cpu-runner -e dot -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm | mlir-cpu-runner -e dot -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-linalg-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-promote-subviews -linalg-lower-to-loops -convert-linalg-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-promote-subviews -convert-linalg-to-llvm | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
|
||||
|
||||
#strided1D = (d0)[s0] -> (d0 + s0)
|
||||
#strided2D = (d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -linalg-convert-to-llvm -lower-to-llvm | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm -lower-to-llvm | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
|
||||
// RUN: mlir-opt %s -linalg-lower-to-loops -convert-linalg-to-llvm -lower-to-llvm | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
|
||||
|
||||
func @print_0d() {
|
||||
%f = constant 2.00000e+00 : f32
|
||||
|
||||
Reference in New Issue
Block a user