[flang] Fixed simplification for FP maxval.

On x86, a simplified F128 maxval ends up calling fmaxl that does not
work properly for F128 arguments. It is probably an LLVM issue, but
we also should not use arith.maxf if NaN or -0.0 operands are possible.
The change is to use cmpf and select. Unfortunately, these arith ops
do not support FastMathFlags currently, so I will have to fix this
sooner or later (depending on how this affects performance).

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D158200
This commit is contained in:
Slava Zakharin
2023-08-21 17:21:48 -07:00
parent 6ee3b24444
commit 89b98c13e0
2 changed files with 14 additions and 3 deletions

View File

@@ -617,8 +617,18 @@ static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,
auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
mlir::Type elementType, mlir::Value elem1,
mlir::Value elem2) -> mlir::Value {
if (elementType.isa<mlir::FloatType>())
return builder.create<mlir::arith::MaxFOp>(loc, elem1, elem2);
if (elementType.isa<mlir::FloatType>()) {
// arith.maxf later converted to llvm.intr.maxnum does not work
// correctly for NaNs and -0.0 (see maxnum/minnum pattern matching
// in LLVM's InstCombine pass). Moreover, llvm.intr.maxnum
// for F128 operands is lowered into fmaxl call by LLVM.
// This libm function may not work properly for F128 arguments
// on targets where long double is not F128. It is an LLVM issue,
// but we just use normal select here to resolve all the cases.
auto compare = builder.create<mlir::arith::CmpFOp>(
loc, mlir::arith::CmpFPredicate::OGT, elem1, elem2);
return builder.create<mlir::arith::SelectOp>(loc, compare, elem1, elem2);
}
if (elementType.isa<mlir::IntegerType>())
return builder.create<mlir::arith::MaxSIOp>(loc, elem1, elem2);

View File

@@ -899,7 +899,8 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
// CHECK: %[[RES:.*]] = fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] iter_args(%[[MAX]] = %[[NEG_DBL_MAX]]) -> (f64) {
// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_F64]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<f64>
// CHECK: %[[NEW_MAX:.*]] = arith.maxf %[[ITEM_VAL]], %[[MAX]] : f64
// CHECK: %[[CMP:.*]] = arith.cmpf ogt, %[[ITEM_VAL]], %[[MAX]] : f64
// CHECK: %[[NEW_MAX:.*]] = arith.select %[[CMP]], %[[ITEM_VAL]], %[[MAX]] : f64
// CHECK: fir.result %[[NEW_MAX]] : f64
// CHECK: }
// CHECK: return %[[RES]] : f64