mirror of
https://github.com/intel/llvm.git
synced 2026-01-14 20:10:50 +08:00
[Flang] Add -ffast-real-mod and direct code for MOD on REAL types (#160660)
This patch adds direct code-gen support for a faster MOD intrinsic for REAL types. Flang has maintained and keeps maintaining a high-precision implementation of the MOD intrinsic as part of the Fortran runtime. With the -ffast-real-mod flag, users can opt to avoid calling into the Fortran runtime, but instead trigger code-gen that produces faster code by avoiding the runtime call, at the expense of potentially risking bit cancelation by having the compiler use the MOD formula a specified by ISO Fortran.
This commit is contained in:
@@ -2750,6 +2750,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
|
||||
Group<f_Group>;
|
||||
def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
|
||||
def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
|
||||
def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">,
|
||||
Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
|
||||
HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">;
|
||||
defm reciprocal_math : BoolFOption<"reciprocal-math",
|
||||
LangOpts<"AllowRecip">, DefaultFalse,
|
||||
PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption],
|
||||
|
||||
@@ -822,6 +822,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
|
||||
complexRangeKindToStr(Range)));
|
||||
}
|
||||
|
||||
if (Args.hasArg(options::OPT_fno_fast_real_mod))
|
||||
CmdArgs.push_back("-fno-fast-real-mod");
|
||||
|
||||
if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath &&
|
||||
ApproxFunc && !SignedZeros &&
|
||||
(FPContract == "fast" || FPContract.empty())) {
|
||||
|
||||
@@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0)
|
||||
LANGOPT(OpenMPNoNestedParallelism, 1, 0)
|
||||
/// Use SIMD only OpenMP support.
|
||||
LANGOPT(OpenMPSimd, 1, false)
|
||||
|
||||
/// Enable fast MOD operations for REAL
|
||||
LANGOPT(NoFastRealMod, 1, false)
|
||||
LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value
|
||||
LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value
|
||||
|
||||
|
||||
@@ -1425,6 +1425,9 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
|
||||
opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
|
||||
}
|
||||
|
||||
if (args.hasArg(clang::driver::options::OPT_fno_fast_real_mod))
|
||||
opts.NoFastRealMod = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -277,6 +277,14 @@ bool CodeGenAction::beginSourceFileAction() {
|
||||
ci.getInvocation().getLangOpts().OpenMPVersion);
|
||||
}
|
||||
|
||||
if (ci.getInvocation().getLangOpts().NoFastRealMod) {
|
||||
mlir::ModuleOp mod = lb.getModule();
|
||||
mod.getOperation()->setAttr(
|
||||
mlir::StringAttr::get(mod.getContext(),
|
||||
llvm::Twine{"fir.no_fast_real_mod"}),
|
||||
mlir::BoolAttr::get(mod.getContext(), true));
|
||||
}
|
||||
|
||||
// Create a parse tree and lower it to FIR
|
||||
parseAndLowerTree(ci, lb);
|
||||
|
||||
|
||||
@@ -6989,8 +6989,33 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType,
|
||||
}
|
||||
|
||||
// MOD
|
||||
static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
|
||||
mlir::Value a, mlir::Value p) {
|
||||
auto fastmathFlags = mlir::arith::FastMathFlags::contract;
|
||||
auto fastmathAttr =
|
||||
mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags);
|
||||
mlir::Value divResult =
|
||||
mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr);
|
||||
mlir::Type intType = builder.getIntegerType(
|
||||
a.getType().getIntOrFloatBitWidth(), /*signed=*/true);
|
||||
mlir::Value intResult = builder.createConvert(loc, intType, divResult);
|
||||
mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult);
|
||||
mlir::Value mulResult =
|
||||
mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr);
|
||||
mlir::Value subResult =
|
||||
mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr);
|
||||
return subResult;
|
||||
}
|
||||
|
||||
mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
|
||||
llvm::ArrayRef<mlir::Value> args) {
|
||||
auto mod = builder.getModule();
|
||||
bool dontUseFastRealMod = false;
|
||||
bool canUseApprox = mlir::arith::bitEnumContainsAny(
|
||||
builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn);
|
||||
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod"))
|
||||
dontUseFastRealMod = attr.getValue();
|
||||
|
||||
assert(args.size() == 2);
|
||||
if (resultType.isUnsignedInteger()) {
|
||||
mlir::Type signlessType = mlir::IntegerType::get(
|
||||
@@ -7002,9 +7027,16 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
|
||||
if (mlir::isa<mlir::IntegerType>(resultType))
|
||||
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);
|
||||
|
||||
// Use runtime.
|
||||
return builder.createConvert(
|
||||
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
|
||||
if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) {
|
||||
// Treat MOD as an approximate function and code-gen inline code
|
||||
// instead of calling into the Fortran runtime library.
|
||||
return builder.createConvert(loc, resultType,
|
||||
genFastMod(builder, loc, args[0], args[1]));
|
||||
} else {
|
||||
// Use runtime.
|
||||
return builder.createConvert(
|
||||
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
|
||||
}
|
||||
}
|
||||
|
||||
// MODULO
|
||||
|
||||
7
flang/test/Driver/fast-real-mod.f90
Normal file
7
flang/test/Driver/fast-real-mod.f90
Normal file
@@ -0,0 +1,7 @@
|
||||
! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
|
||||
|
||||
! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod"
|
||||
|
||||
program test
|
||||
! nothing to be done in here
|
||||
end program test
|
||||
83
flang/test/Lower/Intrinsics/fast-real-mod.f90
Normal file
83
flang/test/Lower/Intrinsics/fast-real-mod.f90
Normal file
@@ -0,0 +1,83 @@
|
||||
! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
|
||||
! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
|
||||
|
||||
! TODO: check line that fir.fast_real_mod is not there
|
||||
! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}}
|
||||
|
||||
! CHECK-LABEL: @_QPmod_real4
|
||||
subroutine mod_real4(r, a, p)
|
||||
implicit none
|
||||
real(kind=4) :: r, a, p
|
||||
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
|
||||
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
|
||||
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
|
||||
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
|
||||
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
|
||||
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
|
||||
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
|
||||
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
|
||||
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
|
||||
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
|
||||
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
|
||||
! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref<i8>, i32) -> f32
|
||||
r = mod(a, p)
|
||||
end subroutine mod_real4
|
||||
|
||||
! CHECK-LABEL: @_QPmod_real8
|
||||
subroutine mod_real8(r, a, p)
|
||||
implicit none
|
||||
real(kind=8) :: r, a, p
|
||||
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
|
||||
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
|
||||
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
|
||||
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
|
||||
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
|
||||
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
|
||||
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
|
||||
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
|
||||
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
|
||||
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
|
||||
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
|
||||
! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref<i8>, i32) -> f64
|
||||
r = mod(a, p)
|
||||
end subroutine mod_real8
|
||||
|
||||
! CHECK-LABEL: @_QPmod_real10
|
||||
subroutine mod_real10(r, a, p)
|
||||
implicit none
|
||||
integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10)
|
||||
real(kind=kind10) :: r, a, p
|
||||
! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
|
||||
! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
|
||||
! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
|
||||
! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
|
||||
! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
|
||||
! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
|
||||
! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
|
||||
! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
|
||||
! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
|
||||
! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
|
||||
! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
|
||||
! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref<i8>, i32) -> f80
|
||||
r = mod(a, p)
|
||||
end subroutine mod_real10
|
||||
|
||||
! CHECK-LABEL: @_QPmod_real16
|
||||
subroutine mod_real16(r, a, p)
|
||||
implicit none
|
||||
integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
|
||||
real(kind=kind16) :: r, a, p
|
||||
! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
|
||||
! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
|
||||
! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
|
||||
! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
|
||||
! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
|
||||
! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
|
||||
! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
|
||||
! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
|
||||
! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
|
||||
! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
|
||||
! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
|
||||
! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
|
||||
r = mod(a, p)
|
||||
end subroutine mod_real16
|
||||
Reference in New Issue
Block a user