[flang] Fixed simplification for FP maxval.

On x86, a simplified F128 maxval ends up calling fmaxl that does not work properly for F128 arguments. It is probably an LLVM issue, but we also should not use arith.maxf if NaN or -0.0 operands are possible. The change is to use cmpf and select. Unfortunately, these arith ops do not support FastMathFlags currently, so I will have to fix this sooner or later (depending on how this affects performance). Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D158200
2026-01-25 10:55:58 +08:00 · 2023-08-21 17:21:48 -07:00
parent 6ee3b24444
commit 89b98c13e0
2 changed files with 14 additions and 3 deletions
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -617,8 +617,18 @@ static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,
  auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
                      mlir::Type elementType, mlir::Value elem1,
                      mlir::Value elem2) -> mlir::Value {
-    if (elementType.isa<mlir::FloatType>())
-      return builder.create<mlir::arith::MaxFOp>(loc, elem1, elem2);
+    if (elementType.isa<mlir::FloatType>()) {
+      // arith.maxf later converted to llvm.intr.maxnum does not work
+      // correctly for NaNs and -0.0 (see maxnum/minnum pattern matching
+      // in LLVM's InstCombine pass). Moreover, llvm.intr.maxnum
+      // for F128 operands is lowered into fmaxl call by LLVM.
+      // This libm function may not work properly for F128 arguments
+      // on targets where long double is not F128. It is an LLVM issue,
+      // but we just use normal select here to resolve all the cases.
+      auto compare = builder.create<mlir::arith::CmpFOp>(
+          loc, mlir::arith::CmpFPredicate::OGT, elem1, elem2);
+      return builder.create<mlir::arith::SelectOp>(loc, compare, elem1, elem2);
+    }
    if (elementType.isa<mlir::IntegerType>())
      return builder.create<mlir::arith::MaxSIOp>(loc, elem1, elem2);

--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -899,7 +899,8 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK:           %[[RES:.*]] = fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] iter_args(%[[MAX]] = %[[NEG_DBL_MAX]]) -> (f64) {
 // CHECK:             %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_F64]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK:             %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<f64>
-// CHECK:             %[[NEW_MAX:.*]] = arith.maxf %[[ITEM_VAL]], %[[MAX]] : f64
+// CHECK:             %[[CMP:.*]] = arith.cmpf ogt, %[[ITEM_VAL]], %[[MAX]] : f64
+// CHECK:             %[[NEW_MAX:.*]] = arith.select %[[CMP]], %[[ITEM_VAL]], %[[MAX]] : f64
 // CHECK:             fir.result %[[NEW_MAX]] : f64
 // CHECK:           }
 // CHECK:           return %[[RES]] : f64