2019-08-12 04:08:26 -07:00
|
|
|
//===- LowerToLLVMDialect.cpp - conversion from Linalg to LLVM dialect ----===//
|
|
|
|
|
//
|
|
|
|
|
// Copyright 2019 The MLIR Authors.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
//
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
// =============================================================================
|
|
|
|
|
|
|
|
|
|
#include "mlir/Conversion/VectorToLLVM/VectorToLLVM.h"
|
|
|
|
|
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
|
|
|
|
|
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
|
2019-08-19 11:00:47 -07:00
|
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
2019-08-19 17:11:12 -07:00
|
|
|
#include "mlir/Dialect/VectorOps/VectorOps.h"
|
2019-08-12 04:08:26 -07:00
|
|
|
#include "mlir/IR/Attributes.h"
|
|
|
|
|
#include "mlir/IR/Builders.h"
|
|
|
|
|
#include "mlir/IR/MLIRContext.h"
|
|
|
|
|
#include "mlir/IR/Module.h"
|
|
|
|
|
#include "mlir/IR/Operation.h"
|
|
|
|
|
#include "mlir/IR/PatternMatch.h"
|
|
|
|
|
#include "mlir/IR/StandardTypes.h"
|
|
|
|
|
#include "mlir/IR/Types.h"
|
|
|
|
|
#include "mlir/Pass/Pass.h"
|
|
|
|
|
#include "mlir/Pass/PassManager.h"
|
|
|
|
|
#include "mlir/Transforms/DialectConversion.h"
|
|
|
|
|
#include "mlir/Transforms/Passes.h"
|
|
|
|
|
|
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
|
#include "llvm/Support/Allocator.h"
|
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
|
|
|
|
|
|
using namespace mlir;
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static LLVM::LLVMType getPtrToElementType(T containerType,
|
|
|
|
|
LLVMTypeConverter &lowering) {
|
|
|
|
|
return lowering.convertType(containerType.getElementType())
|
|
|
|
|
.template cast<LLVM::LLVMType>()
|
|
|
|
|
.getPointerTo();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ExtractElementOpConversion : public LLVMOpLowering {
|
|
|
|
|
public:
|
|
|
|
|
explicit ExtractElementOpConversion(MLIRContext *context,
|
|
|
|
|
LLVMTypeConverter &typeConverter)
|
|
|
|
|
: LLVMOpLowering(vector::ExtractElementOp::getOperationName(), context,
|
|
|
|
|
typeConverter) {}
|
|
|
|
|
|
|
|
|
|
PatternMatchResult
|
|
|
|
|
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
|
|
|
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
|
auto loc = op->getLoc();
|
|
|
|
|
auto adaptor = vector::ExtractElementOpOperandAdaptor(operands);
|
|
|
|
|
auto extractOp = cast<vector::ExtractElementOp>(op);
|
|
|
|
|
auto vectorType = extractOp.vector()->getType().cast<VectorType>();
|
|
|
|
|
auto resultType = extractOp.getResult()->getType();
|
|
|
|
|
auto llvmResultType = lowering.convertType(resultType);
|
|
|
|
|
|
|
|
|
|
auto positionArrayAttr = extractOp.position();
|
|
|
|
|
// One-shot extraction of vector from array (only requires extractvalue).
|
|
|
|
|
if (resultType.isa<VectorType>()) {
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
Value *extracted = rewriter.create<LLVM::ExtractValueOp>(
|
|
|
|
|
loc, llvmResultType, adaptor.vector(), positionArrayAttr);
|
2019-08-12 04:08:26 -07:00
|
|
|
rewriter.replaceOp(op, extracted);
|
|
|
|
|
return matchSuccess();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Potential extraction of 1-D vector from struct.
|
|
|
|
|
auto *context = op->getContext();
|
|
|
|
|
Value *extracted = adaptor.vector();
|
|
|
|
|
auto positionAttrs = positionArrayAttr.getValue();
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
auto i32Type = rewriter.getIntegerType(32);
|
2019-08-12 04:08:26 -07:00
|
|
|
if (positionAttrs.size() > 1) {
|
|
|
|
|
auto nDVectorType = vectorType;
|
|
|
|
|
auto oneDVectorType = VectorType::get(nDVectorType.getShape().take_back(),
|
|
|
|
|
nDVectorType.getElementType());
|
|
|
|
|
auto nMinusOnePositionAttrs =
|
|
|
|
|
ArrayAttr::get(positionAttrs.drop_back(), context);
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
extracted = rewriter.create<LLVM::ExtractValueOp>(
|
|
|
|
|
loc, lowering.convertType(oneDVectorType), extracted,
|
|
|
|
|
nMinusOnePositionAttrs);
|
2019-08-12 04:08:26 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Remaining extraction of element from 1-D LLVM vector
|
|
|
|
|
auto position = positionAttrs.back().cast<IntegerAttr>();
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
auto constant = rewriter.create<LLVM::ConstantOp>(
|
|
|
|
|
loc, lowering.convertType(i32Type), position);
|
2019-08-12 04:08:26 -07:00
|
|
|
extracted =
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
rewriter.create<LLVM::ExtractElementOp>(loc, extracted, constant);
|
2019-08-12 04:08:26 -07:00
|
|
|
rewriter.replaceOp(op, extracted);
|
|
|
|
|
|
|
|
|
|
return matchSuccess();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class OuterProductOpConversion : public LLVMOpLowering {
|
|
|
|
|
public:
|
|
|
|
|
explicit OuterProductOpConversion(MLIRContext *context,
|
|
|
|
|
LLVMTypeConverter &typeConverter)
|
|
|
|
|
: LLVMOpLowering(vector::OuterProductOp::getOperationName(), context,
|
|
|
|
|
typeConverter) {}
|
|
|
|
|
|
|
|
|
|
PatternMatchResult
|
|
|
|
|
matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
|
|
|
|
|
ConversionPatternRewriter &rewriter) const override {
|
|
|
|
|
auto loc = op->getLoc();
|
|
|
|
|
auto adaptor = vector::OuterProductOpOperandAdaptor(operands);
|
|
|
|
|
auto *ctx = op->getContext();
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
auto vLHS = adaptor.lhs()->getType().cast<LLVM::LLVMType>();
|
|
|
|
|
auto vRHS = adaptor.rhs()->getType().cast<LLVM::LLVMType>();
|
|
|
|
|
auto rankLHS = vLHS.getUnderlyingType()->getVectorNumElements();
|
|
|
|
|
auto rankRHS = vRHS.getUnderlyingType()->getVectorNumElements();
|
2019-08-12 04:08:26 -07:00
|
|
|
auto llvmArrayOfVectType = lowering.convertType(
|
|
|
|
|
cast<vector::OuterProductOp>(op).getResult()->getType());
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
Value *desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayOfVectType);
|
|
|
|
|
Value *a = adaptor.lhs(), *b = adaptor.rhs();
|
|
|
|
|
Value *acc = adaptor.acc().empty() ? nullptr : adaptor.acc().front();
|
|
|
|
|
SmallVector<Value *, 8> lhs, accs;
|
|
|
|
|
lhs.reserve(rankLHS);
|
|
|
|
|
accs.reserve(rankLHS);
|
|
|
|
|
for (unsigned d = 0, e = rankLHS; d < e; ++d) {
|
|
|
|
|
// shufflevector explicitly requires i32.
|
|
|
|
|
auto attr = rewriter.getI32IntegerAttr(d);
|
|
|
|
|
SmallVector<Attribute, 4> bcastAttr(rankRHS, attr);
|
|
|
|
|
auto bcastArrayAttr = ArrayAttr::get(bcastAttr, ctx);
|
|
|
|
|
Value *aD = nullptr, *accD = nullptr;
|
|
|
|
|
// 1. Broadcast the element a[d] into vector aD.
|
|
|
|
|
aD = rewriter.create<LLVM::ShuffleVectorOp>(loc, a, a, bcastArrayAttr);
|
|
|
|
|
// 2. If acc is present, extract 1-d vector acc[d] into accD.
|
|
|
|
|
if (acc)
|
2019-09-16 03:30:33 -07:00
|
|
|
accD = rewriter.create<LLVM::ExtractValueOp>(
|
|
|
|
|
loc, vRHS, acc, rewriter.getI64ArrayAttr(d));
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
// 3. Compute aD outer b (plus accD, if relevant).
|
|
|
|
|
Value *aOuterbD =
|
|
|
|
|
accD ? rewriter.create<LLVM::fmuladd>(loc, vRHS, aD, b, accD)
|
|
|
|
|
.getResult()
|
|
|
|
|
: rewriter.create<LLVM::FMulOp>(loc, aD, b).getResult();
|
|
|
|
|
// 4. Insert as value `d` in the descriptor.
|
2019-09-16 03:30:33 -07:00
|
|
|
desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayOfVectType,
|
|
|
|
|
desc, aOuterbD,
|
|
|
|
|
rewriter.getI64ArrayAttr(d));
|
2019-08-12 04:08:26 -07:00
|
|
|
}
|
|
|
|
|
rewriter.replaceOp(op, desc);
|
|
|
|
|
return matchSuccess();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/// Populate the given list with patterns that convert from Vector to LLVM.
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
void mlir::populateVectorToLLVMConversionPatterns(
|
|
|
|
|
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
|
2019-08-12 04:08:26 -07:00
|
|
|
patterns.insert<ExtractElementOpConversion, OuterProductOpConversion>(
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
converter.getDialect()->getContext(), converter);
|
2019-08-12 04:08:26 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
struct LowerVectorToLLVMPass : public ModulePass<LowerVectorToLLVMPass> {
|
|
|
|
|
void runOnModule();
|
|
|
|
|
};
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
void LowerVectorToLLVMPass::runOnModule() {
|
|
|
|
|
// Convert to the LLVM IR dialect using the converter defined above.
|
|
|
|
|
OwningRewritePatternList patterns;
|
|
|
|
|
LLVMTypeConverter converter(&getContext());
|
Extend vector.outerproduct with an optional 3rd argument
This CL adds an optional third argument to the vector.outerproduct instruction.
When such a third argument is specified, it is added to the result of the outerproduct and is lowered to FMA intrinsic when the lowering supports it.
In the future, we can add an attribute on the `vector.outerproduct` instruction to modify the operations for which to emit code (e.g. "+/*", "max/+", "min/+", "log/exp" ...).
This CL additionally performs minor cleanups in the vector lowering and adds tests to improve coverage.
This has been independently verified to result in proper fma instructions for haswell as follows.
Input:
```
func @outerproduct_add(%arg0: vector<17xf32>, %arg1: vector<8xf32>, %arg2: vector<17x8xf32>) -> vector<17x8xf32> {
%2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<17xf32>, vector<8xf32>
return %2 : vector<17x8xf32>
}
}
```
Command:
```
mlir-opt vector-to-llvm.mlir -vector-lower-to-llvm-dialect --disable-pass-threading | mlir-opt -lower-to-cfg -lower-to-llvm | mlir-translate --mlir-to-llvmir | opt -O3 | llc -O3 -march=x86-64 -mcpu=haswell -mattr=fma,avx2
```
Output:
```
outerproduct_add: # @outerproduct_add
# %bb.0:
...
vmovaps 112(%rbp), %ymm8
vbroadcastss %xmm0, %ymm0
...
vbroadcastss 64(%rbp), %ymm15
vfmadd213ps 144(%rbp), %ymm8, %ymm0 # ymm0 = (ymm8 * ymm0) + mem
...
vfmadd213ps 400(%rbp), %ymm8, %ymm9 # ymm9 = (ymm8 * ymm9) + mem
...
```
PiperOrigin-RevId: 263743359
2019-08-16 03:52:56 -07:00
|
|
|
populateVectorToLLVMConversionPatterns(converter, patterns);
|
2019-08-12 04:08:26 -07:00
|
|
|
populateStdToLLVMConversionPatterns(converter, patterns);
|
|
|
|
|
|
|
|
|
|
ConversionTarget target(getContext());
|
|
|
|
|
target.addLegalDialect<LLVM::LLVMDialect>();
|
|
|
|
|
target.addDynamicallyLegalOp<FuncOp>(
|
|
|
|
|
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
|
|
|
|
|
if (failed(
|
|
|
|
|
applyPartialConversion(getModule(), target, patterns, &converter))) {
|
|
|
|
|
signalPassFailure();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-13 13:33:46 -07:00
|
|
|
OpPassBase<ModuleOp> *mlir::createLowerVectorToLLVMPass() {
|
2019-08-12 04:08:26 -07:00
|
|
|
return new LowerVectorToLLVMPass();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static PassRegistration<LowerVectorToLLVMPass>
|
|
|
|
|
pass("vector-lower-to-llvm-dialect",
|
|
|
|
|
"Lower the operations from the vector dialect into the LLVM dialect");
|