[flang] Readability improvement in binary->decimal conversion

Tweak binary->decimal conversions to avoid an integer multiplication in a hot loop to improve readability and get a minor (~5%) speed-up. Use native integer division by constants for more readability, too, since current build compilers seem to optimize it correctly now. Delete the now needless temporary work-around facility in Common/unsigned-const-division.h. Differential revision: https://reviews.llvm.org/D88604
2026-01-16 05:32:28 +08:00 · 2020-09-30 12:26:25 -07:00
parent 5402d11b1d
commit e99d184d54
4 changed files with 31 additions and 103 deletions
--- a/flang/include/flang/Common/unsigned-const-division.h
+++ b/flang/include/flang/Common/unsigned-const-division.h
@@ -1,77 +0,0 @@
-//===-- include/flang/Common/unsigned-const-division.h ----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_
-#define FORTRAN_COMMON_UNSIGNED_CONST_DIVISION_H_
-
-// Work around unoptimized implementations of unsigned integer division
-// by constant values in some compilers (looking at YOU, clang 7!) by
-// explicitly implementing integer division by constant divisors as
-// multiplication by a fixed-point reciprocal and a right shift.
-
-#include "bit-population-count.h"
-#include "leading-zero-bit-count.h"
-#include "uint128.h"
-#include <cinttypes>
-#include <type_traits>
-
-namespace Fortran::common {
-
-template <typename UINT> class FixedPointReciprocal {
-public:
-  using type = UINT;
-
-private:
-  static_assert(std::is_unsigned_v<type>);
-  static const int bits{static_cast<int>(8 * sizeof(type))};
-  static_assert(bits <= 64);
-  using Big = HostUnsignedIntType<bits * 2>;
-
-public:
-  static constexpr FixedPointReciprocal For(type n) {
-    if (n == 0) {
-      return {0, 0};
-    } else if ((n & (n - 1)) == 0) { // n is a power of two
-      return {TrailingZeroBitCount(n), 1};
-    } else {
-      int shift{bits - 1 + BitsNeededFor(n)};
-      return {shift, static_cast<type>(((Big{1} << shift) + n - 1) / n)};
-    }
-  }
-
-  constexpr type Divide(type n) const {
-    return static_cast<type>((static_cast<Big>(reciprocal_) * n) >> shift_);
-  }
-
-private:
-  constexpr FixedPointReciprocal(int s, type r) : shift_{s}, reciprocal_{r} {}
-
-  int shift_;
-  type reciprocal_;
-};
-
-static_assert(FixedPointReciprocal<std::uint32_t>::For(5).Divide(2000000000u) ==
-    400000000u);
-static_assert(FixedPointReciprocal<std::uint64_t>::For(10).Divide(
-                  10000000000000000u) == 1000000000000000u);
-
-template <typename UINT, std::uint64_t DENOM>
-inline constexpr UINT DivideUnsignedBy(UINT n) {
-  if constexpr (std::is_same_v<UINT, uint128_t>) {
-    return n / static_cast<UINT>(DENOM);
-  } else {
-    // G++ can recognize that the reciprocal is a compile-time
-    // constant when For() is called inline, but clang requires
-    // a constexpr variable definition to force compile-time
-    // evaluation of the reciprocal.
-    constexpr auto recip{FixedPointReciprocal<UINT>::For(DENOM)};
-    return recip.Divide(n);
-  }
-}
-} // namespace Fortran::common
-#endif
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -24,7 +24,6 @@
 #include "flang/Common/bit-population-count.h"
 #include "flang/Common/leading-zero-bit-count.h"
 #include "flang/Common/uint128.h"
-#include "flang/Common/unsigned-const-division.h"
 #include "flang/Decimal/binary-floating-point.h"
 #include "flang/Decimal/decimal.h"
 #include <cinttypes>
@@ -147,7 +146,7 @@ private:
        std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
    SetToZero();
    while (n != 0) {
-      auto q{common::DivideUnsignedBy<UINT, 10>(n)};
+      auto q{n / 10u};
      if (n != q * 10) {
        break;
      }
@@ -161,7 +160,7 @@ private:
      return 0;
    } else {
      while (n != 0 && digits_ < digitLimit_) {
-        auto q{common::DivideUnsignedBy<UINT, radix>(n)};
+        auto q{n / radix};
        digit_[digits_++] = static_cast<Digit>(n - q * radix);
        n = q;
      }
@@ -214,7 +213,7 @@ private:
  template <unsigned DIVISOR> int DivideBy() {
    Digit remainder{0};
    for (int j{digits_ - 1}; j >= 0; --j) {
-      Digit q{common::DivideUnsignedBy<Digit, DIVISOR>(digit_[j])};
+      Digit q{digit_[j] / DIVISOR};
      Digit nrem{digit_[j] - DIVISOR * q};
      digit_[j] = q + (radix / DIVISOR) * remainder;
      remainder = nrem;
@@ -295,7 +294,7 @@ private:
  template <int N> int MultiplyByHelper(int carry = 0) {
    for (int j{0}; j < digits_; ++j) {
      auto v{N * digit_[j] + carry};
-      carry = common::DivideUnsignedBy<Digit, radix>(v);
+      carry = v / radix;
      digit_[j] = v - carry * radix; // i.e., v % radix
    }
    return carry;
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -100,28 +100,35 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToDecimal(char *buffer,
                            "4041424344454647484950515253545556575859"
                            "6061626364656667686970717273747576777879"
                            "8081828384858687888990919293949596979899";
-  static constexpr Digit hundredth{radix / 100};
  // Treat the MSD specially: don't emit leading zeroes.
  Digit dig{digit_[digits_ - 1]};
-  for (int k{0}; k < LOG10RADIX; k += 2) {
-    Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
-    dig = 100 * (dig - d * hundredth);
-    const char *q{lut + 2 * d};
-    if (q[0] != '0' || p > start) {
-      *p++ = q[0];
-      *p++ = q[1];
-    } else if (q[1] != '0') {
-      *p++ = q[1];
-    }
+  char stack[LOG10RADIX], *sp{stack};
+  for (int k{0}; k < log10Radix; k += 2) {
+    Digit newDig{dig / 100};
+    auto d{static_cast<std::uint32_t>(dig) -
+        std::uint32_t{100} * static_cast<std::uint32_t>(newDig)};
+    dig = newDig;
+    const char *q{lut + d + d};
+    *sp++ = q[1];
+    *sp++ = q[0];
+  }
+  while (sp > stack && sp[-1] == '0') {
+    --sp;
+  }
+  while (sp > stack) {
+    *p++ = *--sp;
  }
  for (int j{digits_ - 1}; j-- > 0;) {
    Digit dig{digit_[j]};
+    char *reverse{p += log10Radix};
    for (int k{0}; k < log10Radix; k += 2) {
-      Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
-      dig = 100 * (dig - d * hundredth);
-      const char *q{lut + 2 * d};
-      *p++ = q[0];
-      *p++ = q[1];
+      Digit newDig{dig / 100};
+      auto d{static_cast<std::uint32_t>(dig) -
+          std::uint32_t{100} * static_cast<std::uint32_t>(newDig)};
+      dig = newDig;
+      const char *q{lut + d + d};
+      *--reverse = q[1];
+      *--reverse = q[0];
    }
  }
  // Adjust exponent so the effective decimal point is to
@@ -251,9 +258,9 @@ void BigRadixFloatingPointNumber<PREC, LOG10RADIX>::Minimize(
  Digit least{less.digit_[offset]};
  Digit my{digit_[0]};
  while (true) {
-    Digit q{common::DivideUnsignedBy<Digit, 10>(my)};
+    Digit q{my / 10u};
    Digit r{my - 10 * q};
-    Digit lq{common::DivideUnsignedBy<Digit, 10>(least)};
+    Digit lq{least / 10u};
    Digit lr{least - 10 * lq};
    if (r != 0 && lq == q) {
      Digit sub{(r - lr) >> 1};
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -8,7 +8,6 @@

 #include "edit-output.h"
 #include "flang/Common/uint128.h"
-#include "flang/Common/unsigned-const-division.h"
 #include <algorithm>

 namespace Fortran::runtime::io {
@@ -32,7 +31,7 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit, INT n) {
      signChars = 1; // '-' or '+'
    }
    while (un > 0) {
-      auto quotient{common::DivideUnsignedBy<UINT, 10>(un)};
+      auto quotient{un / 10u};
      *--p = '0' + static_cast<int>(un - UINT{10} * quotient);
      un = quotient;
    }
@@ -99,7 +98,7 @@ const char *RealOutputEditingBase::FormatExponent(
  char *eEnd{&exponent_[sizeof exponent_]};
  char *exponent{eEnd};
  for (unsigned e{static_cast<unsigned>(std::abs(expo))}; e > 0;) {
-    unsigned quotient{common::DivideUnsignedBy<unsigned, 10>(e)};
+    unsigned quotient{e / 10u};
    *--exponent = '0' + e - 10 * quotient;
    e = quotient;
  }