[libc][math] Implement double precision log1p correctly rounded to all rounding modes.

Implement double precision log1p function correctly rounded to all
rounding modes.

**Performance**

  - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.93%.
  - Benchmarks with `./perf.sh` tool from the CORE-MATH project, unit is (CPU clocks / call).
  - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X:
```
$ ./perf.sh log1p
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH reciprocal throughput -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 39.792 + 1.011 clc/call; Median-Min = 0.940 clc/call; Max = 41.373 clc/call;

-- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2)
[####################] 100 %
Ntrial = 20 ; Min = 87.285 + 1.135 clc/call; Median-Min = 1.299 clc/call; Max = 89.715 clc/call;

-- System LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 20.666 + 0.123 clc/call; Median-Min = 0.125 clc/call; Max = 20.828 clc/call;

-- LIBC reciprocal throughput -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 20.928 + 0.771 clc/call; Median-Min = 0.725 clc/call; Max = 22.767 clc/call;

-- LIBC reciprocal throughput -- without FMA
[####################] 100 %
Ntrial = 20 ; Min = 31.461 + 0.528 clc/call; Median-Min = 0.602 clc/call; Max = 36.809 clc/call;

```
  - Latency from CORE-MATH's perf tool on Ryzen 5900X:
```
$ ./perf.sh log1p --latency
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH latency -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 77.875 + 0.062 clc/call; Median-Min = 0.051 clc/call; Max = 78.003 clc/call;

-- CORE-MATH latency -- without FMA (-march=x86-64-v2)
[####################] 100 %
Ntrial = 20 ; Min = 101.958 + 1.202 clc/call; Median-Min = 1.325 clc/call; Max = 104.452 clc/call;

-- System LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 60.581 + 1.443 clc/call; Median-Min = 1.611 clc/call; Max = 62.285 clc/call;

-- LIBC latency -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 48.817 + 1.108 clc/call; Median-Min = 1.300 clc/call; Max = 50.282 clc/call;

-- LIBC latency -- without FMA
[####################] 100 %
Ntrial = 20 ; Min = 61.121 + 0.599 clc/call; Median-Min = 0.761 clc/call; Max = 62.020 clc/call;
```
  - Accurate pass latency:
```
$ ./perf.sh log1p --latency --simple_stat
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH latency -- with FMA
760.444

-- CORE-MATH latency -- without FMA (-march=x86-64-v2)
827.880

-- LIBC latency -- with FMA
711.837

-- LIBC latency -- without FMA
764.317
```

Reviewed By: zimmermann6

Differential Revision: https://reviews.llvm.org/D151049
This commit is contained in:
Tue Ly
2023-05-21 01:27:38 -04:00
parent 18a5bd7a95
commit b91e78da37
11 changed files with 1262 additions and 0 deletions

View File

@@ -176,6 +176,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.ldexpl
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1p
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f

View File

@@ -287,6 +287,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.ldexpl
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1p
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f

View File

@@ -292,6 +292,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.llroundl
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1p
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f

View File

@@ -169,6 +169,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.llroundl
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1p
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f

View File

@@ -408,6 +408,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"log1p", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,

View File

@@ -114,6 +114,7 @@ add_math_entrypoint_object(ldexpl)
add_math_entrypoint_object(log10)
add_math_entrypoint_object(log10f)
add_math_entrypoint_object(log1p)
add_math_entrypoint_object(log1pf)
add_math_entrypoint_object(log2)

View File

@@ -814,6 +814,26 @@ add_entrypoint_object(
-O3
)
add_entrypoint_object(
log1p
SRCS
log1p.cpp
HDRS
../log1p.h
DEPENDS
.common_constants
.log_range_reduction
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.macros.optimization
COMPILE_OPTIONS
-O3
)
add_entrypoint_object(
log1pf
SRCS

File diff suppressed because it is too large Load Diff

18
libc/src/math/log1p.h Normal file
View File

@@ -0,0 +1,18 @@
//===-- Implementation header for log1p -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_LOG1P_H
#define LLVM_LIBC_SRC_MATH_LOG1P_H
namespace __llvm_libc {
double log1p(double x);
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_LOG1P_H

View File

@@ -1351,6 +1351,20 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
add_fp_unittest(
log1p_test
NEED_MPFR
SUITE
libc_math_unittests
SRCS
log1p_test.cpp
DEPENDS
libc.src.errno.errno
libc.include.math
libc.src.math.log1p
libc.src.__support.FPUtil.fp_bits
)
add_fp_unittest(
log1pf_test
NEED_MPFR

View File

@@ -0,0 +1,166 @@
//===-- Unittests for log1p -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/__support/FPUtil/FPBits.h"
#include "src/errno/libc_errno.h"
#include "src/math/log1p.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
#include <math.h>
#include <errno.h>
#include <stdint.h>
namespace mpfr = __llvm_libc::testing::mpfr;
using __llvm_libc::testing::tlog;
DECLARE_SPECIAL_CONSTANTS(double)
TEST(LlvmLibcLog1pTest, SpecialNumbers) {
EXPECT_FP_EQ(aNaN, __llvm_libc::log1p(aNaN));
EXPECT_FP_EQ(inf, __llvm_libc::log1p(inf));
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(neg_inf), FE_INVALID);
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(-2.0), FE_INVALID);
EXPECT_FP_EQ(zero, __llvm_libc::log1p(0.0));
EXPECT_FP_EQ(neg_zero, __llvm_libc::log1p(-0.0));
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log1p(-1.0), FE_DIVBYZERO);
}
TEST(LlvmLibcLog1pTest, TrickyInputs) {
constexpr int N = 41;
constexpr uint64_t INPUTS[N] = {
0x3ff0000000000000, // x = 1.0
0x4024000000000000, // x = 10.0
0x4059000000000000, // x = 10^2
0x408f400000000000, // x = 10^3
0x40c3880000000000, // x = 10^4
0x40f86a0000000000, // x = 10^5
0x412e848000000000, // x = 10^6
0x416312d000000000, // x = 10^7
0x4197d78400000000, // x = 10^8
0x41cdcd6500000000, // x = 10^9
0x4202a05f20000000, // x = 10^10
0x42374876e8000000, // x = 10^11
0x426d1a94a2000000, // x = 10^12
0x42a2309ce5400000, // x = 10^13
0x42d6bcc41e900000, // x = 10^14
0x430c6bf526340000, // x = 10^15
0x4341c37937e08000, // x = 10^16
0x4376345785d8a000, // x = 10^17
0x43abc16d674ec800, // x = 10^18
0x43e158e460913d00, // x = 10^19
0x4415af1d78b58c40, // x = 10^20
0x444b1ae4d6e2ef50, // x = 10^21
0x4480f0cf064dd592, // x = 10^22
0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
0x3fefbfdaa448ed98, 0x3fd00a8cefe9a5f8, 0x3fd0b4d870eb22f8,
0x3c90c40cef04efb5, 0x449d2ccad399848e, 0x4aa12ccdffd9d2ec,
0x5656f070b92d36ce, 0x6db06dcb74f76bcc, 0x7f1954e72ffd4596,
0x5671e2f1628093e4, 0x73dac56e2bf1a951, 0x8001bc6879ea14c5,
};
for (int i = 0; i < N; ++i) {
double x = double(FPBits(INPUTS[i]));
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
__llvm_libc::log1p(x), 0.5);
}
}
TEST(LlvmLibcLog1pTest, AllExponents) {
double x = 0x1.0p-1074;
for (int i = -1074; i < 1024; ++i, x *= 2.0) {
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
__llvm_libc::log1p(x), 0.5);
}
}
TEST(LlvmLibcLog1pTest, InDoubleRange) {
constexpr uint64_t COUNT = 1234561;
auto test = [&](uint64_t start, uint64_t stop,
mpfr::RoundingMode rounding_mode) {
mpfr::ForceRoundingMode __r(rounding_mode);
uint64_t fails = 0;
uint64_t count = 0;
uint64_t cc = 0;
double mx, mr = 0.0;
double tol = 0.5;
uint64_t step = (stop - start) / COUNT;
for (uint64_t i = 0, v = start; i <= COUNT; ++i, v += step) {
double x = FPBits(v).get_val();
if (isnan(x) || isinf(x) || x < 0.0)
continue;
libc_errno = 0;
double result = __llvm_libc::log1p(x);
++cc;
if (isnan(result) || isinf(result))
continue;
++count;
// ASSERT_MPFR_MATCH(mpfr::Operation::Log1p, x, result, 0.5);
if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log1p, x,
result, 0.5, rounding_mode)) {
++fails;
while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
mpfr::Operation::Log1p, x, result, tol, rounding_mode)) {
mx = x;
mr = result;
tol *= 2.0;
}
}
}
tlog << " Log1p failed: " << fails << "/" << count << "/" << cc
<< " tests.\n";
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
if (fails) {
EXPECT_MPFR_MATCH(mpfr::Operation::Log1p, mx, mr, 0.5, rounding_mode);
}
};
auto test_all_rounding = [&](uint64_t start, uint64_t stop,
const char *start_str, const char *stop_str) {
tlog << "\n=== Test in range [" << start_str << ", " << stop_str
<< "] ===\n";
tlog << "\n Test Rounding To Nearest...\n";
test(start, stop, mpfr::RoundingMode::Nearest);
tlog << "\n Test Rounding Downward...\n";
test(start, stop, mpfr::RoundingMode::Downward);
tlog << "\n Test Rounding Upward...\n";
test(start, stop, mpfr::RoundingMode::Upward);
tlog << "\n Test Rounding Toward Zero...\n";
test(start, stop, mpfr::RoundingMode::TowardZero);
};
test_all_rounding(0x0000'0000'0000'0001ULL, 0x0010'0000'0000'0000ULL,
"2^-1074", "2^-1022");
test_all_rounding(0x39B0'0000'0000'0000ULL, 0x3A50'0000'0000'0000ULL,
"2^-100", "2^-90");
test_all_rounding(0x3CD0'0000'0000'0000ULL, 0x3D20'0000'0000'0000ULL, "2^-50",
"2^-45");
test_all_rounding(0x3E10'0000'0000'0000ULL, 0x3E40'0000'0000'0000ULL, "2^-30",
"2^-27");
test_all_rounding(0x3FD0'0000'0000'0000ULL, 0x4010'0000'0000'0000ULL, "0.25",
"4.0");
test_all_rounding(0x4630'0000'0000'0000ULL, 0x4670'0000'0000'0000ULL, "2^100",
"2^104");
test_all_rounding(0x7FD0'0000'0000'0000ULL, 0x7FF0'0000'0000'0000ULL,
"2^1022", "2^1024");
}