mirror of
https://github.com/intel/llvm.git
synced 2026-01-20 19:07:53 +08:00
[libc][math] Implement double precision log1p correctly rounded to all rounding modes.
Implement double precision log1p function correctly rounded to all rounding modes. **Performance** - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.93%. - Benchmarks with `./perf.sh` tool from the CORE-MATH project, unit is (CPU clocks / call). - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 39.792 + 1.011 clc/call; Median-Min = 0.940 clc/call; Max = 41.373 clc/call; -- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 87.285 + 1.135 clc/call; Median-Min = 1.299 clc/call; Max = 89.715 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 20.666 + 0.123 clc/call; Median-Min = 0.125 clc/call; Max = 20.828 clc/call; -- LIBC reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 20.928 + 0.771 clc/call; Median-Min = 0.725 clc/call; Max = 22.767 clc/call; -- LIBC reciprocal throughput -- without FMA [####################] 100 % Ntrial = 20 ; Min = 31.461 + 0.528 clc/call; Median-Min = 0.602 clc/call; Max = 36.809 clc/call; ``` - Latency from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log1p --latency GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 77.875 + 0.062 clc/call; Median-Min = 0.051 clc/call; Max = 78.003 clc/call; -- CORE-MATH latency -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 101.958 + 1.202 clc/call; Median-Min = 1.325 clc/call; Max = 104.452 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 60.581 + 1.443 clc/call; Median-Min = 1.611 clc/call; Max = 62.285 clc/call; -- LIBC latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 48.817 + 1.108 clc/call; Median-Min = 1.300 clc/call; Max = 50.282 clc/call; -- LIBC latency -- without FMA [####################] 100 % Ntrial = 20 ; Min = 61.121 + 0.599 clc/call; Median-Min = 0.761 clc/call; Max = 62.020 clc/call; ``` - Accurate pass latency: ``` $ ./perf.sh log1p --latency --simple_stat GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA 760.444 -- CORE-MATH latency -- without FMA (-march=x86-64-v2) 827.880 -- LIBC latency -- with FMA 711.837 -- LIBC latency -- without FMA 764.317 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D151049
This commit is contained in:
@@ -176,6 +176,7 @@ set(TARGET_LIBM_ENTRYPOINTS
|
||||
libc.src.math.ldexpl
|
||||
libc.src.math.log10
|
||||
libc.src.math.log10f
|
||||
libc.src.math.log1p
|
||||
libc.src.math.log1pf
|
||||
libc.src.math.log2
|
||||
libc.src.math.log2f
|
||||
|
||||
@@ -287,6 +287,7 @@ set(TARGET_LIBM_ENTRYPOINTS
|
||||
libc.src.math.ldexpl
|
||||
libc.src.math.log10
|
||||
libc.src.math.log10f
|
||||
libc.src.math.log1p
|
||||
libc.src.math.log1pf
|
||||
libc.src.math.log2
|
||||
libc.src.math.log2f
|
||||
|
||||
@@ -292,6 +292,7 @@ set(TARGET_LIBM_ENTRYPOINTS
|
||||
libc.src.math.llroundl
|
||||
libc.src.math.log10
|
||||
libc.src.math.log10f
|
||||
libc.src.math.log1p
|
||||
libc.src.math.log1pf
|
||||
libc.src.math.log2
|
||||
libc.src.math.log2f
|
||||
|
||||
@@ -169,6 +169,7 @@ set(TARGET_LIBM_ENTRYPOINTS
|
||||
libc.src.math.llroundl
|
||||
libc.src.math.log10
|
||||
libc.src.math.log10f
|
||||
libc.src.math.log1p
|
||||
libc.src.math.log1pf
|
||||
libc.src.math.log2
|
||||
libc.src.math.log2f
|
||||
|
||||
@@ -408,6 +408,7 @@ def StdC : StandardSpec<"stdc"> {
|
||||
FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
|
||||
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
|
||||
|
||||
FunctionSpec<"log1p", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
|
||||
FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
|
||||
|
||||
FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
|
||||
|
||||
@@ -114,6 +114,7 @@ add_math_entrypoint_object(ldexpl)
|
||||
add_math_entrypoint_object(log10)
|
||||
add_math_entrypoint_object(log10f)
|
||||
|
||||
add_math_entrypoint_object(log1p)
|
||||
add_math_entrypoint_object(log1pf)
|
||||
|
||||
add_math_entrypoint_object(log2)
|
||||
|
||||
@@ -814,6 +814,26 @@ add_entrypoint_object(
|
||||
-O3
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
log1p
|
||||
SRCS
|
||||
log1p.cpp
|
||||
HDRS
|
||||
../log1p.h
|
||||
DEPENDS
|
||||
.common_constants
|
||||
.log_range_reduction
|
||||
libc.src.__support.FPUtil.fenv_impl
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
libc.src.__support.FPUtil.multiply_add
|
||||
libc.src.__support.FPUtil.polyeval
|
||||
libc.src.__support.FPUtil.double_double
|
||||
libc.src.__support.FPUtil.dyadic_float
|
||||
libc.src.__support.macros.optimization
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
)
|
||||
|
||||
add_entrypoint_object(
|
||||
log1pf
|
||||
SRCS
|
||||
|
||||
1038
libc/src/math/generic/log1p.cpp
Normal file
1038
libc/src/math/generic/log1p.cpp
Normal file
File diff suppressed because it is too large
Load Diff
18
libc/src/math/log1p.h
Normal file
18
libc/src/math/log1p.h
Normal file
@@ -0,0 +1,18 @@
|
||||
//===-- Implementation header for log1p -------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_MATH_LOG1P_H
|
||||
#define LLVM_LIBC_SRC_MATH_LOG1P_H
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
double log1p(double x);
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_MATH_LOG1P_H
|
||||
@@ -1351,6 +1351,20 @@ add_fp_unittest(
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
log1p_test
|
||||
NEED_MPFR
|
||||
SUITE
|
||||
libc_math_unittests
|
||||
SRCS
|
||||
log1p_test.cpp
|
||||
DEPENDS
|
||||
libc.src.errno.errno
|
||||
libc.include.math
|
||||
libc.src.math.log1p
|
||||
libc.src.__support.FPUtil.fp_bits
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
log1pf_test
|
||||
NEED_MPFR
|
||||
|
||||
166
libc/test/src/math/log1p_test.cpp
Normal file
166
libc/test/src/math/log1p_test.cpp
Normal file
@@ -0,0 +1,166 @@
|
||||
//===-- Unittests for log1p -----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/FPUtil/FPBits.h"
|
||||
#include "src/errno/libc_errno.h"
|
||||
#include "src/math/log1p.h"
|
||||
#include "test/UnitTest/FPMatcher.h"
|
||||
#include "test/UnitTest/Test.h"
|
||||
#include "utils/MPFRWrapper/MPFRUtils.h"
|
||||
#include <math.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace mpfr = __llvm_libc::testing::mpfr;
|
||||
using __llvm_libc::testing::tlog;
|
||||
|
||||
DECLARE_SPECIAL_CONSTANTS(double)
|
||||
|
||||
TEST(LlvmLibcLog1pTest, SpecialNumbers) {
|
||||
EXPECT_FP_EQ(aNaN, __llvm_libc::log1p(aNaN));
|
||||
EXPECT_FP_EQ(inf, __llvm_libc::log1p(inf));
|
||||
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(neg_inf), FE_INVALID);
|
||||
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log1p(-2.0), FE_INVALID);
|
||||
EXPECT_FP_EQ(zero, __llvm_libc::log1p(0.0));
|
||||
EXPECT_FP_EQ(neg_zero, __llvm_libc::log1p(-0.0));
|
||||
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log1p(-1.0), FE_DIVBYZERO);
|
||||
}
|
||||
|
||||
TEST(LlvmLibcLog1pTest, TrickyInputs) {
|
||||
constexpr int N = 41;
|
||||
constexpr uint64_t INPUTS[N] = {
|
||||
0x3ff0000000000000, // x = 1.0
|
||||
0x4024000000000000, // x = 10.0
|
||||
0x4059000000000000, // x = 10^2
|
||||
0x408f400000000000, // x = 10^3
|
||||
0x40c3880000000000, // x = 10^4
|
||||
0x40f86a0000000000, // x = 10^5
|
||||
0x412e848000000000, // x = 10^6
|
||||
0x416312d000000000, // x = 10^7
|
||||
0x4197d78400000000, // x = 10^8
|
||||
0x41cdcd6500000000, // x = 10^9
|
||||
0x4202a05f20000000, // x = 10^10
|
||||
0x42374876e8000000, // x = 10^11
|
||||
0x426d1a94a2000000, // x = 10^12
|
||||
0x42a2309ce5400000, // x = 10^13
|
||||
0x42d6bcc41e900000, // x = 10^14
|
||||
0x430c6bf526340000, // x = 10^15
|
||||
0x4341c37937e08000, // x = 10^16
|
||||
0x4376345785d8a000, // x = 10^17
|
||||
0x43abc16d674ec800, // x = 10^18
|
||||
0x43e158e460913d00, // x = 10^19
|
||||
0x4415af1d78b58c40, // x = 10^20
|
||||
0x444b1ae4d6e2ef50, // x = 10^21
|
||||
0x4480f0cf064dd592, // x = 10^22
|
||||
0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
|
||||
0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
|
||||
0x3fefbfdaa448ed98, 0x3fd00a8cefe9a5f8, 0x3fd0b4d870eb22f8,
|
||||
0x3c90c40cef04efb5, 0x449d2ccad399848e, 0x4aa12ccdffd9d2ec,
|
||||
0x5656f070b92d36ce, 0x6db06dcb74f76bcc, 0x7f1954e72ffd4596,
|
||||
0x5671e2f1628093e4, 0x73dac56e2bf1a951, 0x8001bc6879ea14c5,
|
||||
};
|
||||
for (int i = 0; i < N; ++i) {
|
||||
double x = double(FPBits(INPUTS[i]));
|
||||
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
|
||||
__llvm_libc::log1p(x), 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LlvmLibcLog1pTest, AllExponents) {
|
||||
double x = 0x1.0p-1074;
|
||||
for (int i = -1074; i < 1024; ++i, x *= 2.0) {
|
||||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
|
||||
__llvm_libc::log1p(x), 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LlvmLibcLog1pTest, InDoubleRange) {
|
||||
constexpr uint64_t COUNT = 1234561;
|
||||
|
||||
auto test = [&](uint64_t start, uint64_t stop,
|
||||
mpfr::RoundingMode rounding_mode) {
|
||||
mpfr::ForceRoundingMode __r(rounding_mode);
|
||||
uint64_t fails = 0;
|
||||
uint64_t count = 0;
|
||||
uint64_t cc = 0;
|
||||
double mx, mr = 0.0;
|
||||
double tol = 0.5;
|
||||
|
||||
uint64_t step = (stop - start) / COUNT;
|
||||
|
||||
for (uint64_t i = 0, v = start; i <= COUNT; ++i, v += step) {
|
||||
double x = FPBits(v).get_val();
|
||||
if (isnan(x) || isinf(x) || x < 0.0)
|
||||
continue;
|
||||
libc_errno = 0;
|
||||
double result = __llvm_libc::log1p(x);
|
||||
++cc;
|
||||
if (isnan(result) || isinf(result))
|
||||
continue;
|
||||
|
||||
++count;
|
||||
// ASSERT_MPFR_MATCH(mpfr::Operation::Log1p, x, result, 0.5);
|
||||
if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log1p, x,
|
||||
result, 0.5, rounding_mode)) {
|
||||
++fails;
|
||||
while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
|
||||
mpfr::Operation::Log1p, x, result, tol, rounding_mode)) {
|
||||
mx = x;
|
||||
mr = result;
|
||||
tol *= 2.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
tlog << " Log1p failed: " << fails << "/" << count << "/" << cc
|
||||
<< " tests.\n";
|
||||
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
|
||||
if (fails) {
|
||||
EXPECT_MPFR_MATCH(mpfr::Operation::Log1p, mx, mr, 0.5, rounding_mode);
|
||||
}
|
||||
};
|
||||
|
||||
auto test_all_rounding = [&](uint64_t start, uint64_t stop,
|
||||
const char *start_str, const char *stop_str) {
|
||||
tlog << "\n=== Test in range [" << start_str << ", " << stop_str
|
||||
<< "] ===\n";
|
||||
|
||||
tlog << "\n Test Rounding To Nearest...\n";
|
||||
test(start, stop, mpfr::RoundingMode::Nearest);
|
||||
|
||||
tlog << "\n Test Rounding Downward...\n";
|
||||
test(start, stop, mpfr::RoundingMode::Downward);
|
||||
|
||||
tlog << "\n Test Rounding Upward...\n";
|
||||
test(start, stop, mpfr::RoundingMode::Upward);
|
||||
|
||||
tlog << "\n Test Rounding Toward Zero...\n";
|
||||
test(start, stop, mpfr::RoundingMode::TowardZero);
|
||||
};
|
||||
|
||||
test_all_rounding(0x0000'0000'0000'0001ULL, 0x0010'0000'0000'0000ULL,
|
||||
"2^-1074", "2^-1022");
|
||||
|
||||
test_all_rounding(0x39B0'0000'0000'0000ULL, 0x3A50'0000'0000'0000ULL,
|
||||
"2^-100", "2^-90");
|
||||
|
||||
test_all_rounding(0x3CD0'0000'0000'0000ULL, 0x3D20'0000'0000'0000ULL, "2^-50",
|
||||
"2^-45");
|
||||
|
||||
test_all_rounding(0x3E10'0000'0000'0000ULL, 0x3E40'0000'0000'0000ULL, "2^-30",
|
||||
"2^-27");
|
||||
|
||||
test_all_rounding(0x3FD0'0000'0000'0000ULL, 0x4010'0000'0000'0000ULL, "0.25",
|
||||
"4.0");
|
||||
|
||||
test_all_rounding(0x4630'0000'0000'0000ULL, 0x4670'0000'0000'0000ULL, "2^100",
|
||||
"2^104");
|
||||
|
||||
test_all_rounding(0x7FD0'0000'0000'0000ULL, 0x7FF0'0000'0000'0000ULL,
|
||||
"2^1022", "2^1024");
|
||||
}
|
||||
Reference in New Issue
Block a user