mirror of
https://github.com/intel/llvm.git
synced 2026-01-16 05:32:28 +08:00
[libc][stdfix] Implement fxdivi functions (rdivi) (#154914)
This PR includes only one of the fxdivi functions (rdivi). It uses a polynomial function for initial approximation followed by 4 newton-raphson iterations to calculate the reciprocal and finally multiplies the numerator with it to get the result. --------- Signed-off-by: Shreeyash Pandey <shreeyash335@gmail.com>
This commit is contained in:
@@ -1022,6 +1022,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
|
||||
libc.src.stdfix.idivulr
|
||||
libc.src.stdfix.idivuk
|
||||
libc.src.stdfix.idivulk
|
||||
libc.src.stdfix.rdivi
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -1058,6 +1058,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
|
||||
libc.src.stdfix.idivulr
|
||||
libc.src.stdfix.idivuk
|
||||
libc.src.stdfix.idivulk
|
||||
libc.src.stdfix.rdivi
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ The following functions are included in the ISO/IEC TR 18037:2008 standard.
|
||||
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
|
||||
| muli | | | | | | | | | | | | |
|
||||
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
|
||||
| \*divi | | | | | | | | | | | | |
|
||||
| \*divi | | | | |check| | | | | | | | | |
|
||||
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
|
||||
| round | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| |
|
||||
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
|
||||
|
||||
@@ -544,3 +544,11 @@ functions:
|
||||
arguments:
|
||||
- type: unsigned long accum
|
||||
guard: LIBC_COMPILER_HAS_FIXED_POINT
|
||||
- name: rdivi
|
||||
standards:
|
||||
- stdc_ext
|
||||
return_type: fract
|
||||
arguments:
|
||||
- type: int
|
||||
- type: int
|
||||
guard: LIBC_COMPILER_HAS_FIXED_POINT
|
||||
|
||||
@@ -10,9 +10,11 @@
|
||||
#define LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_BITS_H
|
||||
|
||||
#include "include/llvm-libc-macros/stdfix-macros.h"
|
||||
#include "src/__support/CPP/algorithm.h"
|
||||
#include "src/__support/CPP/bit.h"
|
||||
#include "src/__support/CPP/limits.h" // numeric_limits
|
||||
#include "src/__support/CPP/type_traits.h"
|
||||
#include "src/__support/libc_assert.h"
|
||||
#include "src/__support/macros/attributes.h" // LIBC_INLINE
|
||||
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
|
||||
#include "src/__support/macros/null_check.h" // LIBC_CRASH_ON_VALUE
|
||||
@@ -21,6 +23,8 @@
|
||||
|
||||
#include "fx_rep.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LIBC_COMPILER_HAS_FIXED_POINT
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
@@ -224,6 +228,113 @@ idiv(T x, T y) {
|
||||
return static_cast<XType>(result);
|
||||
}
|
||||
|
||||
LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
|
||||
auto v = x0 * (2.lk - (d * x0));
|
||||
return v;
|
||||
}
|
||||
|
||||
// Divide the two integers and return a fixed_point value
|
||||
//
|
||||
// For reference, see:
|
||||
// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
|
||||
// https://stackoverflow.com/a/9231996
|
||||
|
||||
template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
|
||||
// If the value of the second operand of the / operator is zero, the
|
||||
// behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
|
||||
LIBC_CRASH_ON_VALUE(d, 0);
|
||||
|
||||
if (LIBC_UNLIKELY(n == 0)) {
|
||||
return FXRep<XType>::ZERO();
|
||||
}
|
||||
auto is_power_of_two = [](int n) { return (n > 0) && ((n & (n - 1)) == 0); };
|
||||
long accum max_val = static_cast<long accum>(FXRep<XType>::MAX());
|
||||
long accum min_val = static_cast<long accum>(FXRep<XType>::MIN());
|
||||
|
||||
if (is_power_of_two(cpp::abs(d))) {
|
||||
int k = cpp::countr_zero<uint32_t>(static_cast<uint32_t>(cpp::abs(d)));
|
||||
constexpr int F = FXRep<XType>::FRACTION_LEN;
|
||||
int64_t scaled_n = static_cast<int64_t>(n) << F;
|
||||
int64_t res64 = scaled_n >> k;
|
||||
constexpr int TOTAL_BITS = sizeof(XType) * 8;
|
||||
const int64_t max_limit = (1LL << (TOTAL_BITS - 1)) - 1;
|
||||
const int64_t min_limit = -(1LL << (TOTAL_BITS - 1));
|
||||
if (res64 > max_limit) {
|
||||
return FXRep<XType>::MAX();
|
||||
} else if (res64 < min_limit) {
|
||||
return FXRep<XType>::MIN();
|
||||
}
|
||||
long accum res_accum =
|
||||
static_cast<long accum>(res64) / static_cast<long accum>(1 << F);
|
||||
res_accum = (d < 0) ? static_cast<long accum>(-1) * res_accum : res_accum;
|
||||
if (res_accum > max_val) {
|
||||
return FXRep<XType>::MAX();
|
||||
} else if (res_accum < min_val) {
|
||||
return FXRep<XType>::MIN();
|
||||
}
|
||||
return static_cast<XType>(res_accum);
|
||||
}
|
||||
|
||||
bool result_is_negative = ((n < 0) != (d < 0));
|
||||
int64_t n64 = static_cast<int64_t>(n);
|
||||
int64_t d64 = static_cast<int64_t>(d);
|
||||
|
||||
uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64);
|
||||
uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64);
|
||||
|
||||
if (d == INT_MIN) {
|
||||
nv <<= 1;
|
||||
dv >>= 1;
|
||||
}
|
||||
|
||||
uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1;
|
||||
uint64_t scaled_val = dv << clz;
|
||||
// Scale denominator to be in the range of [0.5,1]
|
||||
FXBits<long accum> d_scaled{scaled_val};
|
||||
uint64_t scaled_val_n = nv << clz;
|
||||
// Scale the numerator as much as the denominator to maintain correctness of
|
||||
// the original equation
|
||||
FXBits<long accum> n_scaled{scaled_val_n};
|
||||
long accum n_scaled_val = n_scaled.get_val();
|
||||
long accum d_scaled_val = d_scaled.get_val();
|
||||
// x0 = (48/17) - (32/17) * d_n
|
||||
long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294...
|
||||
long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529...
|
||||
// Error of the initial approximation, as derived
|
||||
// from the wikipedia article is
|
||||
// E0 = 1/17 = 0.059 (5.9%)
|
||||
long accum initial_approx = a - (b * d_scaled_val);
|
||||
// Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235
|
||||
LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) &&
|
||||
(initial_approx <= 0x1.f0f0d845p0lk));
|
||||
// Each newton-raphson iteration will square the error, due
|
||||
// to quadratic convergence. So,
|
||||
// E1 = (0.059)^2 = 0.0034
|
||||
long accum val = nrstep(d_scaled_val, initial_approx);
|
||||
if constexpr (FXRep<XType>::FRACTION_LEN > 8) {
|
||||
// E2 = 0.0000121
|
||||
val = nrstep(d_scaled_val, val);
|
||||
if constexpr (FXRep<XType>::FRACTION_LEN > 16) {
|
||||
// E3 = 1.468e−10
|
||||
val = nrstep(d_scaled_val, val);
|
||||
}
|
||||
}
|
||||
long accum res = n_scaled_val * val;
|
||||
|
||||
if (result_is_negative) {
|
||||
res *= static_cast<long accum>(-1);
|
||||
}
|
||||
|
||||
// Per clause 7.18a.6.1, saturate values on overflow
|
||||
if (res > max_val) {
|
||||
return FXRep<XType>::MAX();
|
||||
} else if (res < min_val) {
|
||||
return FXRep<XType>::MIN();
|
||||
} else {
|
||||
return static_cast<XType>(res);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace fixed_point
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
|
||||
@@ -89,6 +89,20 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
|
||||
)
|
||||
endforeach()
|
||||
|
||||
foreach(suffix IN ITEMS r)
|
||||
add_entrypoint_object(
|
||||
${suffix}divi
|
||||
HDRS
|
||||
${suffix}divi.h
|
||||
SRCS
|
||||
${suffix}divi.cpp
|
||||
COMPILE_OPTIONS
|
||||
${libc_opt_high_flag}
|
||||
DEPENDS
|
||||
libc.src.__support.fixed_point.fx_bits
|
||||
)
|
||||
endforeach()
|
||||
|
||||
add_entrypoint_object(
|
||||
uhksqrtus
|
||||
HDRS
|
||||
|
||||
21
libc/src/stdfix/rdivi.cpp
Normal file
21
libc/src/stdfix/rdivi.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
//===-- Implementation of rdivi function ---------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "rdivi.h"
|
||||
#include "include/llvm-libc-macros/stdfix-macros.h" // fract
|
||||
#include "src/__support/common.h" // LLVM_LIBC_FUNCTION
|
||||
#include "src/__support/fixed_point/fx_bits.h" // fixed_point
|
||||
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
LLVM_LIBC_FUNCTION(fract, rdivi, (int a, int b)) {
|
||||
return fixed_point::divi<fract>(a, b);
|
||||
}
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
21
libc/src/stdfix/rdivi.h
Normal file
21
libc/src/stdfix/rdivi.h
Normal file
@@ -0,0 +1,21 @@
|
||||
//===-- Implementation header for rdivi ------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_STDFIX_RDIVI_H
|
||||
#define LLVM_LIBC_SRC_STDFIX_RDIVI_H
|
||||
|
||||
#include "include/llvm-libc-macros/stdfix-macros.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
fract rdivi(int a, int b);
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
#endif // LLVM_LIBC_SRC_STDFIX_RDIVI_H
|
||||
@@ -120,6 +120,22 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
|
||||
)
|
||||
endforeach()
|
||||
|
||||
foreach(suffix IN ITEMS r)
|
||||
add_libc_test(
|
||||
${suffix}divi_test
|
||||
SUITE
|
||||
libc-stdfix-tests
|
||||
HDRS
|
||||
DivITest.h
|
||||
SRCS
|
||||
${suffix}divi_test.cpp
|
||||
DEPENDS
|
||||
libc.src.stdfix.${suffix}divi
|
||||
libc.src.__support.fixed_point.fx_bits
|
||||
libc.hdr.signal_macros
|
||||
)
|
||||
endforeach()
|
||||
|
||||
add_libc_test(
|
||||
uhksqrtus_test
|
||||
SUITE
|
||||
|
||||
74
libc/test/src/stdfix/DivITest.h
Normal file
74
libc/test/src/stdfix/DivITest.h
Normal file
@@ -0,0 +1,74 @@
|
||||
//===-- Utility class to test fxdivi functions ------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/__support/CPP/type_traits.h"
|
||||
#include "src/__support/fixed_point/fx_bits.h"
|
||||
#include "src/__support/fixed_point/fx_rep.h"
|
||||
#include "test/UnitTest/Test.h"
|
||||
|
||||
template <typename XType> XType get_epsilon() = delete;
|
||||
template <> fract get_epsilon() { return FRACT_EPSILON; }
|
||||
template <> unsigned fract get_epsilon() { return UFRACT_EPSILON; }
|
||||
template <> long fract get_epsilon() { return LFRACT_EPSILON; }
|
||||
|
||||
template <typename XType>
|
||||
class DivITest : public LIBC_NAMESPACE::testing::Test {
|
||||
using FXRep = LIBC_NAMESPACE::fixed_point::FXRep<XType>;
|
||||
using FXBits = LIBC_NAMESPACE::fixed_point::FXBits<XType>;
|
||||
|
||||
public:
|
||||
typedef XType (*DivIFunc)(int, int);
|
||||
|
||||
void testBasic(DivIFunc func) {
|
||||
XType epsilon = get_epsilon<XType>();
|
||||
EXPECT_LT((func(2, 3) - 0.666656494140625r), epsilon);
|
||||
EXPECT_LT((func(3, 4) - 0.75r), epsilon);
|
||||
EXPECT_LT((func(1043, 2764) - 0.3773516643r), epsilon);
|
||||
EXPECT_LT((func(60000, 720293) - 0.08329943509r), epsilon);
|
||||
|
||||
EXPECT_EQ(func(128, 256), 0.5r);
|
||||
EXPECT_EQ(func(1, 2), 0.5r);
|
||||
EXPECT_EQ(func(1, 4), 0.25r);
|
||||
EXPECT_EQ(func(1, 8), 0.125r);
|
||||
EXPECT_EQ(func(1, 16), 0.0625r);
|
||||
|
||||
EXPECT_EQ(func(-1, 2), -0.5r);
|
||||
EXPECT_EQ(func(1, -4), -0.25r);
|
||||
EXPECT_EQ(func(-1, 8), -0.125r);
|
||||
EXPECT_EQ(func(1, -16), -0.0625r);
|
||||
}
|
||||
|
||||
void testSpecial(DivIFunc func) {
|
||||
XType epsilon = get_epsilon<XType>();
|
||||
EXPECT_EQ(func(0, 10), 0.r);
|
||||
EXPECT_EQ(func(0, -10), 0.r);
|
||||
EXPECT_EQ(func(-(1 << FRACT_FBIT), 1 << FRACT_FBIT), FRACT_MIN);
|
||||
EXPECT_EQ(func((1 << FRACT_FBIT) - 1, 1 << FRACT_FBIT), FRACT_MAX);
|
||||
// From Section 7.18a.6.1, functions returning a fixed-point value, the
|
||||
// return value is saturated on overflow.
|
||||
EXPECT_EQ(func(INT_MAX, INT_MAX), FRACT_MAX);
|
||||
EXPECT_LT(func(INT_MAX - 1, INT_MAX) - 0.99999999r, epsilon);
|
||||
EXPECT_EQ(func(INT_MIN, INT_MAX), FRACT_MIN);
|
||||
// Expecting 0 here as fract is not precise enough to
|
||||
// handle 1/INT_MAX
|
||||
EXPECT_LT(func(1, INT_MAX) - 0.r, epsilon);
|
||||
// This results in 1.1739, which should be saturated to FRACT_MAX
|
||||
EXPECT_EQ(func(27, 23), FRACT_MAX);
|
||||
|
||||
EXPECT_EQ(func(INT_MIN, 1), FRACT_MIN);
|
||||
EXPECT_LT(func(1, INT_MIN) - 0.r, epsilon);
|
||||
|
||||
EXPECT_EQ(func(INT_MIN, INT_MIN), 1.r);
|
||||
}
|
||||
};
|
||||
|
||||
#define LIST_DIVI_TESTS(Name, XType, func) \
|
||||
using LlvmLibc##Name##diviTest = DivITest<XType>; \
|
||||
TEST_F(LlvmLibc##Name##diviTest, Basic) { testBasic(&func); } \
|
||||
TEST_F(LlvmLibc##Name##diviTest, Special) { testSpecial(&func); } \
|
||||
static_assert(true, "Require semicolon.")
|
||||
14
libc/test/src/stdfix/rdivi_test.cpp
Normal file
14
libc/test/src/stdfix/rdivi_test.cpp
Normal file
@@ -0,0 +1,14 @@
|
||||
//===-- Unittests for rdivi -----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "DivITest.h"
|
||||
|
||||
#include "llvm-libc-macros/stdfix-macros.h" // fract
|
||||
#include "src/stdfix/rdivi.h"
|
||||
|
||||
LIST_DIVI_TESTS(r, fract, LIBC_NAMESPACE::rdivi);
|
||||
Reference in New Issue
Block a user