[libc][math] Improve performance test framework (#134501)

- Merges `BinaryOpSingleOutputPerf.h` and `SingleInputSingleOutputPerf.h` files into a unified `PerfTest.h` and update all performance tests to use this. - Improve the output printed to log file for tests. - Removes unused `run_diff` method and redundant `run_perf` call in `BINARY_INPUT_SINGLE_OUTPUT_PERF_EX` (previously `BINARY_OP_SINGLE_OUTPUT_PERF_EX`) - Change `BINARY_INPUT_SINGLE_OUTPUT_PERF_EX` and `SINGLE_INPUT_SINGLE_OUTPUT_PERF` to not define `main`
2026-01-26 12:26:52 +08:00 · 2025-04-24 16:52:21 +05:30
parent f218cd28d4
commit dde00f5e22
39 changed files with 424 additions and 475 deletions
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -1,148 +0,0 @@
-//===-- Common utility class for differential analysis --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/algorithm.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/macros/config.h"
-#include "test/src/math/performance_testing/Timer.h"
-
-#include <cstddef>
-#include <fstream>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace testing {
-template <typename OutputType, typename InputType>
-class BinaryOpSingleOutputPerf {
-  using FPBits = fputil::FPBits<OutputType>;
-  using StorageType = typename FPBits::StorageType;
-  static constexpr StorageType UIntMax =
-      cpp::numeric_limits<StorageType>::max();
-
-public:
-  typedef OutputType Func(InputType, InputType);
-
-  static void run_perf_in_range(Func myFunc, Func otherFunc,
-                                StorageType startingBit, StorageType endingBit,
-                                size_t N, size_t rounds, std::ofstream &log) {
-    if (sizeof(StorageType) <= sizeof(size_t))
-      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
-
-    auto runner = [=](Func func) {
-      [[maybe_unused]] volatile OutputType result;
-      if (endingBit < startingBit) {
-        return;
-      }
-
-      StorageType step = (endingBit - startingBit) / N;
-      for (size_t i = 0; i < rounds; i++) {
-        for (StorageType bitsX = startingBit, bitsY = endingBit;;
-             bitsX += step, bitsY -= step) {
-          InputType x = FPBits(bitsX).get_val();
-          InputType y = FPBits(bitsY).get_val();
-          result = func(x, y);
-          if (endingBit - bitsX < step) {
-            break;
-          }
-        }
-      }
-    };
-
-    Timer timer;
-    timer.start();
-    runner(myFunc);
-    timer.stop();
-
-    double my_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- My function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << my_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
-
-    timer.start();
-    runner(otherFunc);
-    timer.stop();
-
-    double other_average =
-        static_cast<double>(timer.nanoseconds()) / N / rounds;
-    log << "-- Other function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << other_average << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
-
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << my_average / other_average << " \n";
-  }
-
-  static void run_perf(Func myFunc, Func otherFunc, int rounds,
-                       const char *logFile) {
-    std::ofstream log(logFile);
-    log << " Performance tests with inputs in denormal range:\n";
-    run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0),
-                      /* endingBit= */ FPBits::max_subnormal().uintval(),
-                      1'000'001, rounds, log);
-    log << "\n Performance tests with inputs in normal range:\n";
-    run_perf_in_range(myFunc, otherFunc,
-                      /* startingBit= */ FPBits::min_normal().uintval(),
-                      /* endingBit= */ FPBits::max_normal().uintval(),
-                      1'000'001, rounds, log);
-    log << "\n Performance tests with inputs in normal range with exponents "
-           "close to each other:\n";
-    run_perf_in_range(
-        myFunc, otherFunc,
-        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
-        rounds, log);
-  }
-
-  static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
-    uint64_t diffCount = 0;
-    std::ofstream log(logFile);
-    log << " Diff tests with inputs in denormal range:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc, /* startingBit= */ StorageType(0),
-        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, log);
-    log << "\n Diff tests with inputs in normal range:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc,
-        /* startingBit= */ FPBits::min_normal().uintval(),
-        /* endingBit= */ FPBits::max_normal().uintval(), 100'000'001, log);
-    log << "\n Diff tests with inputs in normal range with exponents "
-           "close to each other:\n";
-    diffCount += run_diff_in_range(
-        myFunc, otherFunc,
-        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 10'000'001,
-        log);
-
-    log << "Total number of differing results: " << diffCount << '\n';
-  }
-};
-
-} // namespace testing
-} // namespace LIBC_NAMESPACE_DECL
-
-#define BINARY_OP_SINGLE_OUTPUT_PERF(OutputType, InputType, myFunc, otherFunc, \
-                                     filename)                                 \
-  int main() {                                                                 \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, 1, filename);    \
-    return 0;                                                                  \
-  }
-
-#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, myFunc,         \
-                                        otherFunc, rounds, filename)           \
-  {                                                                            \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
-                                         filename);                            \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
-        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
-                                         filename);                            \
-  }
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -92,18 +92,9 @@ function(add_perf_binary target_name)
 endfunction()

 add_header_library(
-  single_input_single_output_diff
+  perf_test
  HDRS
-    SingleInputSingleOutputPerf.h
-  DEPENDS
-    libc.src.__support.CPP.algorithm
-    libc.src.__support.FPUtil.fp_bits
-)
-
-add_header_library(
-  binary_op_single_output_diff
-  HDRS
-    BinaryOpSingleOutputPerf.h
+    PerfTest.h
  DEPENDS
    libc.src.__support.CPP.algorithm
    libc.src.__support.FPUtil.fp_bits
@@ -114,7 +105,7 @@ add_perf_binary(
  SRCS
    sinf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.sinf
  COMPILE_OPTIONS
    -fno-builtin
@@ -125,7 +116,7 @@ add_perf_binary(
  SRCS
    cosf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.cosf
  COMPILE_OPTIONS
    -fno-builtin
@@ -136,7 +127,7 @@ add_perf_binary(
  SRCS
    expm1f_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.expm1f
  COMPILE_OPTIONS
    -fno-builtin
@@ -147,7 +138,7 @@ add_perf_binary(
  SRCS
    ceilf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.ceilf
  COMPILE_OPTIONS
    -fno-builtin
@@ -158,7 +149,7 @@ add_perf_binary(
  SRCS
    exp10f16_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.exp10f16
  COMPILE_OPTIONS
    -fno-builtin
@@ -169,7 +160,7 @@ add_perf_binary(
  SRCS
    exp2f_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.exp2f
  COMPILE_OPTIONS
    -fno-builtin
@@ -180,7 +171,7 @@ add_perf_binary(
  SRCS
    exp2f16_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.exp2f16
  COMPILE_OPTIONS
    -fno-builtin
@@ -191,7 +182,7 @@ add_perf_binary(
  SRCS
    expf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.expf
  COMPILE_OPTIONS
    -fno-builtin
@@ -202,7 +193,7 @@ add_perf_binary(
  SRCS
    expf16_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.expf16
  COMPILE_OPTIONS
    -fno-builtin
@@ -213,7 +204,7 @@ add_perf_binary(
  SRCS
    fabsf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.fabsf
  COMPILE_OPTIONS
    -fno-builtin
@@ -224,7 +215,7 @@ add_perf_binary(
  SRCS
    floorf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.floorf
  COMPILE_OPTIONS
    -fno-builtin
@@ -235,7 +226,7 @@ add_perf_binary(
  SRCS
    log10f_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.log10f
  COMPILE_OPTIONS
    -fno-builtin
@@ -246,7 +237,7 @@ add_perf_binary(
  SRCS
    log1pf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.log1pf
  COMPILE_OPTIONS
    -fno-builtin
@@ -257,7 +248,7 @@ add_perf_binary(
  SRCS
    log2f_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.log2f
  COMPILE_OPTIONS
    -fno-builtin
@@ -268,7 +259,7 @@ add_perf_binary(
  SRCS
    logf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.logf
  COMPILE_OPTIONS
    -fno-builtin
@@ -279,7 +270,7 @@ add_perf_binary(
  SRCS
    logbf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.logbf
  COMPILE_OPTIONS
    -fno-builtin
@@ -290,7 +281,7 @@ add_perf_binary(
  SRCS
    nearbyintf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.nearbyintf
  COMPILE_OPTIONS
    -fno-builtin
@@ -301,7 +292,7 @@ add_perf_binary(
  SRCS
    rintf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.rintf
  COMPILE_OPTIONS
    -fno-builtin
@@ -312,7 +303,7 @@ add_perf_binary(
  SRCS
    roundf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.roundf
  COMPILE_OPTIONS
    -fno-builtin
@@ -323,7 +314,7 @@ add_perf_binary(
  SRCS
    sqrtf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.sqrtf
  COMPILE_OPTIONS
    -fno-builtin
@@ -334,7 +325,7 @@ add_perf_binary(
  SRCS
    truncf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.truncf
  COMPILE_OPTIONS
    -fno-builtin
@@ -345,7 +336,7 @@ add_perf_binary(
  SRCS
    hypotf16_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.hypotf16
    libc.src.__support.FPUtil.fp_bits
  COMPILE_OPTIONS
@@ -357,7 +348,7 @@ add_perf_binary(
  SRCS
    hypotf_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.hypotf
  COMPILE_OPTIONS
    -fno-builtin
@@ -368,7 +359,7 @@ add_perf_binary(
  SRCS
    hypot_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.hypot
  COMPILE_OPTIONS
    -fno-builtin
@@ -379,7 +370,7 @@ add_perf_binary(
  SRCS
    fmodf_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.fmodf
  COMPILE_OPTIONS
    -fno-builtin
@@ -390,7 +381,7 @@ add_perf_binary(
  SRCS
    fmod_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.fmod
  COMPILE_OPTIONS
    -fno-builtin
@@ -401,7 +392,7 @@ add_perf_binary(
  SRCS
    fmodl_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.fmodl
  COMPILE_OPTIONS
    -fno-builtin
@@ -412,7 +403,7 @@ add_perf_binary(
  SRCS
    fmodf16_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.fmodf16
    libc.src.__support.FPUtil.generic.fmod
    libc.src.__support.macros.properties.types
@@ -423,8 +414,9 @@ add_perf_binary(
  SRCS
    fmodf128_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.fmodf128
+    libc.src.__support.macros.properties.types
  COMPILE_OPTIONS
    -fno-builtin
 )
@@ -457,8 +449,7 @@ add_perf_binary(
  SRCS
    misc_basic_ops_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.copysignf
    libc.src.math.copysignf16
    libc.src.math.fabsf
@@ -472,7 +463,7 @@ add_perf_binary(
  SRCS
    max_min_funcs_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.fmaxf
    libc.src.math.fmaxf16
    libc.src.math.fmaximumf
@@ -494,7 +485,7 @@ add_perf_binary(
  SRCS
    fmul_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.fmul
    libc.src.__support.FPUtil.generic.mul
    libc.src.__support.FPUtil.fp_bits
@@ -507,7 +498,7 @@ add_perf_binary(
  SRCS
    fmull_perf.cpp
  DEPENDS
-    .binary_op_single_output_diff
+    .perf_test
    libc.src.math.fmull
  COMPILE_OPTIONS
    -fno-builtin
@@ -518,6 +509,6 @@ add_perf_binary(
  SRCS
    sqrtf128_perf.cpp
  DEPENDS
-    .single_input_single_output_diff
+    .perf_test
    libc.src.math.sqrtf128
 )
--- a/libc/test/src/math/performance_testing/PerfTest.h
+++ b/libc/test/src/math/performance_testing/PerfTest.h
@@ -0,0 +1,159 @@
+//===-- Common utility class for differential analysis --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/config.h"
+#include "test/src/math/performance_testing/Timer.h"
+
+#include <cstddef>
+#include <fstream>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace testing {
+template <typename OutputType, typename InputType> class PerfTest {
+  using FPBits = fputil::FPBits<OutputType>;
+  using StorageType = typename FPBits::StorageType;
+  static constexpr StorageType U_INT_MAX =
+      cpp::numeric_limits<StorageType>::max();
+
+public:
+  using BinaryFuncPtr = OutputType (*)(InputType, InputType);
+  using UnaryFuncPtr = OutputType (*)(InputType);
+
+  template <bool binary, typename Func>
+  static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit,
+                                StorageType endingBit, size_t N, size_t rounds,
+                                const char *name_a, const char *name_b,
+                                std::ofstream &log) {
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
+
+    auto runner = [=](Func func) {
+      [[maybe_unused]] volatile OutputType result;
+      if (endingBit < startingBit) {
+        return;
+      }
+
+      StorageType step = (endingBit - startingBit) / N;
+      if (step == 0)
+        step = 1;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bits_x = startingBit, bits_y = endingBit;;
+             bits_x += step, bits_y -= step) {
+          InputType x = FPBits(bits_x).get_val();
+          if constexpr (binary) {
+            InputType y = FPBits(bits_y).get_val();
+            result = func(x, y);
+          } else {
+            result = func(x);
+          }
+          if (endingBit - bits_x < step) {
+            break;
+          }
+        }
+      }
+    };
+
+    Timer timer;
+    timer.start();
+    runner(FuncA);
+    timer.stop();
+
+    double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function A: " << name_a << " --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << a_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n";
+
+    timer.start();
+    runner(FuncB);
+    timer.stop();
+
+    double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Function B: " << name_b << " --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << b_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n";
+
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << b_average / a_average << " \n";
+  }
+
+  template <bool binary, typename Func>
+  static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a,
+                       const char *name_b, const char *logFile) {
+    std::ofstream log(logFile);
+    log << " Performance tests with inputs in denormal range:\n";
+    run_perf_in_range<binary>(
+        FuncA, FuncB, /* startingBit= */ StorageType(0),
+        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, rounds,
+        name_a, name_b, log);
+    log << "\n Performance tests with inputs in normal range:\n";
+    run_perf_in_range<binary>(FuncA, FuncB,
+                              /* startingBit= */ FPBits::min_normal().uintval(),
+                              /* endingBit= */ FPBits::max_normal().uintval(),
+                              1'000'001, rounds, name_a, name_b, log);
+    log << "\n Performance tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    run_perf_in_range<binary>(
+        FuncA, FuncB,
+        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
+        rounds, name_a, name_b, log);
+  }
+};
+
+} // namespace testing
+} // namespace LIBC_NAMESPACE_DECL
+
+#define BINARY_INPUT_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB,   \
+                                        filename)                              \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<OutputType,                 \
+                                                   InputType>::BinaryFuncPtr;  \
+    LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>(  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename);      \
+    return 0;                                                                  \
+  }
+
+#define BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA,       \
+                                           FuncB, rounds, filename)            \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<OutputType,                 \
+                                                   InputType>::BinaryFuncPtr;  \
+    LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>(  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \
+    return 0;                                                                  \
+  }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename)             \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr;        \
+    LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>(                  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename);      \
+    return 0;                                                                  \
+  }
+
+#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename)  \
+  {                                                                            \
+    using TargetFuncPtr =                                                      \
+        typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr;        \
+    LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>(                  \
+        static_cast<TargetFuncPtr>(&FuncA),                                    \
+        static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \
+    return 0;                                                                  \
+  }
--- a/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/SingleInputSingleOutputPerf.h
@@ -1,105 +0,0 @@
-//===-- Common utility class for differential analysis --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/CPP/algorithm.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/macros/config.h"
-#include "test/src/math/performance_testing/Timer.h"
-
-#include <fstream>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace testing {
-
-template <typename T> class SingleInputSingleOutputPerf {
-  using FPBits = fputil::FPBits<T>;
-  using StorageType = typename FPBits::StorageType;
-  static constexpr StorageType UIntMax =
-      cpp::numeric_limits<StorageType>::max();
-
-public:
-  typedef T Func(T);
-
-  static void runPerfInRange(Func myFunc, Func otherFunc,
-                             StorageType startingBit, StorageType endingBit,
-                             size_t rounds, std::ofstream &log) {
-    size_t n = 10'010'001;
-    if (sizeof(StorageType) <= sizeof(size_t))
-      n = cpp::min(n, static_cast<size_t>(endingBit - startingBit));
-
-    auto runner = [=](Func func) {
-      StorageType step = (endingBit - startingBit) / n;
-      if (step == 0)
-        step = 1;
-      [[maybe_unused]] volatile T result;
-      for (size_t i = 0; i < rounds; i++) {
-        for (StorageType bits = startingBit; bits < endingBit; bits += step) {
-          T x = FPBits(bits).get_val();
-          result = func(x);
-        }
-      }
-    };
-
-    Timer timer;
-    timer.start();
-    runner(myFunc);
-    timer.stop();
-
-    double myAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- My function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << myAverage << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / myAverage) << " op/s \n";
-
-    timer.start();
-    runner(otherFunc);
-    timer.stop();
-
-    double otherAverage = static_cast<double>(timer.nanoseconds()) / n / rounds;
-    log << "-- Other function --\n";
-    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
-    log << "     Average runtime : " << otherAverage << " ns/op \n";
-    log << "     Ops per second  : "
-        << static_cast<uint64_t>(1'000'000'000.0 / otherAverage) << " op/s \n";
-
-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << myAverage / otherAverage << " \n";
-  }
-
-  static void runPerf(Func myFunc, Func otherFunc, size_t rounds,
-                      const char *logFile) {
-    std::ofstream log(logFile);
-    log << " Performance tests with inputs in denormal range:\n";
-    runPerfInRange(myFunc, otherFunc, /* startingBit= */ StorageType(0),
-                   /* endingBit= */ FPBits::max_subnormal().uintval(), rounds,
-                   log);
-    log << "\n Performance tests with inputs in normal range:\n";
-    runPerfInRange(myFunc, otherFunc,
-                   /* startingBit= */ FPBits::min_normal().uintval(),
-                   /* endingBit= */ FPBits::max_normal().uintval(), rounds,
-                   log);
-  }
-};
-
-} // namespace testing
-} // namespace LIBC_NAMESPACE_DECL
-
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)        \
-  int main() {                                                                 \
-    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, 1, filename);                                     \
-    return 0;                                                                  \
-  }
-
-#define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,       \
-                                           filename)                           \
-  {                                                                            \
-    LIBC_NAMESPACE::testing::SingleInputSingleOutputPerf<T>::runPerf(          \
-        &myFunc, &otherFunc, rounds, filename);                                \
-  }
--- a/libc/test/src/math/performance_testing/ceilf_perf.cpp
+++ b/libc/test/src/math/performance_testing/ceilf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/ceilf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf,
-                                "ceilf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf,
+                                  "ceilf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/cosf_perf.cpp
+++ b/libc/test/src/math/performance_testing/cosf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/cosf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::cosf, ::cosf,
-                                "cosf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::cosf, ::cosf,
+                                  "cosf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/exp10f16_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp10f16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp10f16.h"

 // LLVM libc might be the only libc implementation with support for float16 math
--- a/libc/test/src/math/performance_testing/exp2f16_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp2f16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp2f16.h"

 // LLVM libc might be the only libc implementation with support for float16 math
--- a/libc/test/src/math/performance_testing/exp2f_perf.cpp
+++ b/libc/test/src/math/performance_testing/exp2f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/exp2f.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::exp2f, ::exp2f,
-                                "exp2f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::exp2f, ::exp2f,
+                                  "exp2f_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/expf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/expf16_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expf16.h"

 // LLVM libc might be the only libc implementation with support for float16 math
--- a/libc/test/src/math/performance_testing/expf_perf.cpp
+++ b/libc/test/src/math/performance_testing/expf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expf, ::expf,
-                                "expf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expf, ::expf,
+                                  "expf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/expm1f_perf.cpp
+++ b/libc/test/src/math/performance_testing/expm1f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/expm1f.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expm1f, ::expm1f,
-                                "expm1f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::expm1f, ::expm1f,
+                                  "expm1f_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fabsf_perf.cpp
+++ b/libc/test/src/math/performance_testing/fabsf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fabsf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fabsf, ::fabsf,
-                                "fabsf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fabsf, ::fabsf,
+                                  "fabsf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/floorf_perf.cpp
+++ b/libc/test/src/math/performance_testing/floorf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/floorf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::floorf, ::floorf,
-                                "floorf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::floorf, ::floorf,
+                                  "floorf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fmod_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmod_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fmod.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::fmod, ::fmod,
-                             "fmod_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::fmod, ::fmod,
+                                  "fmod_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fmodf128_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf128_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputDiff.h"
-
+#include "PerfTest.h"
+#include "src/__support/macros/properties/types.h"
 #include "src/math/fmodf128.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fmodf128, ::fmodf128,
-                             "fmodf128_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float128, float128, LIBC_NAMESPACE::fmodf128,
+                                  ::fmodf128, "fmodf128_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fmodf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf16_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"

 #include "src/__support/FPUtil/generic/FMod.h"
 #include "src/__support/macros/properties/types.h"
@@ -16,12 +16,12 @@
 #define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod<float16, U>::eval)

 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
-                                  FMOD_FUNC(uint32_t), 5000,
-                                  "fmodf16_u16_vs_u32_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
+                                     FMOD_FUNC(uint32_t), 5000,
+                                     "fmodf16_u16_vs_u32_perf.log")

-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
-                                  FMOD_FUNC(uint64_t), 5000,
-                                  "fmodf16_u16_vs_u64_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
+                                     FMOD_FUNC(uint64_t), 5000,
+                                     "fmodf16_u16_vs_u64_perf.log")
  return 0;
 }
--- a/libc/test/src/math/performance_testing/fmodf_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/fmodf.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::fmodf, ::fmodf,
-                             "fmodf_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::fmodf, ::fmodf,
+                                  "fmodf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fmodl_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmodl_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputDiff.h"
-
+#include "PerfTest.h"
 #include "src/math/fmodl.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(long double, LIBC_NAMESPACE::fmodl, ::fmodl,
-                             "fmodl_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(long double, long double,
+                                  LIBC_NAMESPACE::fmodl, ::fmodl,
+                                  "fmodl_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/fmul_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/__support/FPUtil/generic/mul.h"
 #include "src/math/fmul.h"

@@ -17,8 +17,8 @@ float fmul_placeholder_binary(double x, double y) {
 }

 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
-                                  fmul_placeholder_binary, DOUBLE_ROUNDS,
-                                  "fmul_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
+                                     fmul_placeholder_binary, DOUBLE_ROUNDS,
+                                     "fmul_perf.log")
  return 0;
 }
--- a/libc/test/src/math/performance_testing/fmull_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmull_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/fmull.h"

 static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
@@ -16,8 +16,8 @@ float fmull_placeholder_binary(long double x, long double y) {
 }

 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
-                                  fmull_placeholder_binary, LONG_DOUBLE_ROUNDS,
-                                  "fmull_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
+                                     fmull_placeholder_binary,
+                                     LONG_DOUBLE_ROUNDS, "fmull_perf.log")
  return 0;
 }
--- a/libc/test/src/math/performance_testing/hypot_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypot_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/hypot.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::hypot, ::hypot,
-                             "hypot_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::hypot,
+                                  ::hypot, "hypot_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/hypotf16_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypotf16_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"

 #include "src/__support/FPUtil/Hypot.h"
 #include "src/math/hypotf16.h"

-BINARY_OP_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16,
-                             LIBC_NAMESPACE::fputil::hypot<float16>,
-                             "hypotf16_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16,
+                                  LIBC_NAMESPACE::fputil::hypot<float16>,
+                                  "hypotf16_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/hypotf_perf.cpp
+++ b/libc/test/src/math/performance_testing/hypotf_perf.cpp
@@ -6,11 +6,14 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"

 #include "src/math/hypotf.h"

 #include <math.h>

-BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::hypotf, ::hypotf,
-                             "hypotf_perf.log")
+int main() {
+  BINARY_INPUT_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::hypotf,
+                                  ::hypotf, "hypotf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/log10f_perf.cpp
+++ b/libc/test/src/math/performance_testing/log10f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log10f.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log10f, ::log10f,
-                                "log10f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log10f, ::log10f,
+                                  "log10f_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/log1pf_perf.cpp
+++ b/libc/test/src/math/performance_testing/log1pf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log1pf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log1pf, ::log1pf,
-                                "log1pf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log1pf, ::log1pf,
+                                  "log1pf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/log2f_perf.cpp
+++ b/libc/test/src/math/performance_testing/log2f_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/log2f.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log2f, ::log2f,
-                                "log2f_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::log2f, ::log2f,
+                                  "log2f_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/logbf_perf.cpp
+++ b/libc/test/src/math/performance_testing/logbf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/logbf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logbf, ::logbf,
-                                "logbf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logbf, ::logbf,
+                                  "logbf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/logf_perf.cpp
+++ b/libc/test/src/math/performance_testing/logf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/logf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logf, ::logf,
-                                "logf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::logf, ::logf,
+                                  "logf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
+++ b/libc/test/src/math/performance_testing/max_min_funcs_perf.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/fmaxf.h"
 #include "src/math/fmaxf16.h"
 #include "src/math/fmaximum_numf.h"
@@ -35,41 +35,40 @@ float16 placeholder_binaryf16(float16 x, float16 y) { return x; }
 float placeholder_binaryf(float x, float y) { return x; }

 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaxf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fmaxf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fminf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaximumf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fmaximumf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminimumf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "fminimumf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fmaxf16,
+                                     placeholder_binaryf16, FLOAT16_ROUNDS,
+                                     "fmaxf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::fminf16,
+                                     placeholder_binaryf16, FLOAT16_ROUNDS,
+                                     "fminf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::fmaximumf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "fmaximumf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::fminimumf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "fminimumf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
      float16, float16, LIBC_NAMESPACE::fmaximum_numf16, placeholder_binaryf16,
      FLOAT16_ROUNDS, "fmaximum_numf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
      float16, float16, LIBC_NAMESPACE::fminimum_numf16, placeholder_binaryf16,
      FLOAT16_ROUNDS, "fminimum_numf16_perf.log")

-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaxf, ::fmaxf,
-                                  FLOAT_ROUNDS, "fmaxf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminf, ::fminf,
-                                  FLOAT_ROUNDS, "fminf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximumf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fmaximumf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimumf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fminimumf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fmaximum_numf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimum_numf,
-                                  placeholder_binaryf, FLOAT_ROUNDS,
-                                  "fminimum_numf_perf.log")
-
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaxf,
+                                     ::fmaxf, FLOAT_ROUNDS, "fmaxf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminf,
+                                     ::fminf, FLOAT_ROUNDS, "fminf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fmaximumf,
+                                     placeholder_binaryf, FLOAT_ROUNDS,
+                                     "fmaximumf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::fminimumf,
+                                     placeholder_binaryf, FLOAT_ROUNDS,
+                                     "fminimumf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float, float, LIBC_NAMESPACE::fmaximum_numf, placeholder_binaryf,
+      FLOAT_ROUNDS, "fmaximum_numf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float, float, LIBC_NAMESPACE::fminimum_numf, placeholder_binaryf,
+      FLOAT_ROUNDS, "fminimum_numf_perf.log")
  return 0;
 }
--- a/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
+++ b/libc/test/src/math/performance_testing/misc_basic_ops_perf.cpp
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "BinaryOpSingleOutputPerf.h"
-#include "SingleInputSingleOutputPerf.h"
+#include "PerfTest.h"
 #include "src/math/copysignf.h"
 #include "src/math/copysignf16.h"
 #include "src/math/fabsf.h"
@@ -28,14 +27,15 @@ int main() {
  SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float16, LIBC_NAMESPACE::fabsf16,
                                     placeholder_unaryf16, FLOAT16_ROUNDS,
                                     "fabsf16_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, LIBC_NAMESPACE::copysignf16,
-                                  placeholder_binaryf16, FLOAT16_ROUNDS,
-                                  "copysignf16_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(
+      float16, float16, LIBC_NAMESPACE::copysignf16, placeholder_binaryf16,
+      FLOAT16_ROUNDS, "copysignf16_perf.log")

  SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(float, LIBC_NAMESPACE::fabsf, fabsf,
                                     FLOAT_ROUNDS, "fabsf_perf.log")
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::copysignf,
-                                  copysignf, FLOAT_ROUNDS, "copysignf_perf.log")
+  BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(float, float, LIBC_NAMESPACE::copysignf,
+                                     copysignf, FLOAT_ROUNDS,
+                                     "copysignf_perf.log")

  return 0;
 }
--- a/libc/test/src/math/performance_testing/nearbyintf_perf.cpp
+++ b/libc/test/src/math/performance_testing/nearbyintf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/nearbyintf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::nearbyintf, ::nearbyintf,
-                                "nearbyintf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::nearbyintf,
+                                  ::nearbyintf, "nearbyintf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
+++ b/libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
@@ -40,7 +40,8 @@ public:
  static void run_perf_in_range(Func my_func, Func other_func,
                                StorageType starting_bit,
                                StorageType ending_bit, StorageType step,
-                                size_t rounds, std::ofstream &log) {
+                                size_t rounds, const char *name_a,
+                                const char *name_b, std::ofstream &log) {
    auto runner = [=](Func func) {
      [[maybe_unused]] volatile T result;
      for (size_t i = 0; i < rounds; i++) {
@@ -60,7 +61,7 @@ public:
    size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
    double my_average =
        static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
-    log << "-- My function --\n";
+    log << "-- Function A: " << name_a << " --\n";
    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
    log << "     Average runtime : " << my_average << " ns/op \n";
    log << "     Ops per second  : "
@@ -72,17 +73,18 @@ public:

    double other_average =
        static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
-    log << "-- Other function --\n";
+    log << "-- Function B: " << name_b << " --\n";
    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
    log << "     Average runtime : " << other_average << " ns/op \n";
    log << "     Ops per second  : "
        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";

-    log << "-- Average runtime ratio --\n";
-    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+    log << "-- Average ops per second ratio --\n";
+    log << "     A / B  : " << other_average / my_average << " \n";
  }

  static void run_perf(Func my_func, Func other_func, size_t rounds,
+                       const char *name_a, const char *name_b,
                       const char *log_file) {
    std::ofstream log(log_file);
    log << "Performance tests with inputs in normal integral range:\n";
@@ -93,14 +95,14 @@ public:
        StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
                    << FPBits::SIG_LEN),
        /*step=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
    log << "\n Performance tests with inputs in low integral range:\n";
    run_perf_in_range(
        my_func, other_func,
        /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
        /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
        /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
    log << "\n Performance tests with inputs in high integral range:\n";
    run_perf_in_range(
        my_func, other_func,
@@ -110,7 +112,7 @@ public:
        /*ending_bit=*/
        StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
        /*step=*/StorageType(1 << FPBits::SIG_LEN),
-        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, log);
+        rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
    log << "\n Performance tests with inputs in normal fractional range:\n";
    run_perf_in_range(
        my_func, other_func,
@@ -118,11 +120,11 @@ public:
        StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
        /*ending_bit=*/
        StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
-        /*step=*/StorageType(1), rounds * 2, log);
+        /*step=*/StorageType(1), rounds * 2, name_a, name_b, log);
    log << "\n Performance tests with inputs in subnormal fractional range:\n";
    run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
                      /*ending_bit=*/StorageType(FPBits::SIG_MASK),
-                      /*step=*/StorageType(1), rounds, log);
+                      /*step=*/StorageType(1), rounds, name_a, name_b, log);
  }
 };

@@ -131,9 +133,7 @@ public:
 #define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename)         \
  {                                                                            \
    LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
-        &my_func, &other_func, rounds, filename);                              \
-    LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf(                  \
-        &my_func, &other_func, rounds, filename);                              \
+        &my_func, &other_func, rounds, #my_func, #other_func, filename);       \
  }

 static constexpr size_t FLOAT16_ROUNDS = 20'000;
--- a/libc/test/src/math/performance_testing/rintf_perf.cpp
+++ b/libc/test/src/math/performance_testing/rintf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/rintf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::rintf, ::rintf,
-                                "rintf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::rintf, ::rintf,
+                                  "rintf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/roundf_perf.cpp
+++ b/libc/test/src/math/performance_testing/roundf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/roundf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::roundf, ::roundf,
-                                "roundf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::roundf, ::roundf,
+                                  "roundf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/sinf_perf.cpp
+++ b/libc/test/src/math/performance_testing/sinf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/sinf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sinf, ::sinf,
-                                "sinf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sinf, ::sinf,
+                                  "sinf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/sqrtf128_perf.cpp
+++ b/libc/test/src/math/performance_testing/sqrtf128_perf.cpp
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/__support/FPUtil/sqrt.h"
 #include "src/math/sqrtf128.h"

@@ -16,5 +15,8 @@ float128 sqrtf128_placeholder(float128 x) {
  return LIBC_NAMESPACE::fputil::sqrt<float128>(x);
 }

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float128, LIBC_NAMESPACE::sqrtf128,
-                                ::sqrtf128_placeholder, "sqrtf128_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float128, LIBC_NAMESPACE::sqrtf128,
+                                  ::sqrtf128_placeholder, "sqrtf128_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/sqrtf_perf.cpp
+++ b/libc/test/src/math/performance_testing/sqrtf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/sqrtf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sqrtf, ::sqrtf,
-                                "sqrtf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::sqrtf, ::sqrtf,
+                                  "sqrtf_perf.log")
+  return 0;
+}
--- a/libc/test/src/math/performance_testing/truncf_perf.cpp
+++ b/libc/test/src/math/performance_testing/truncf_perf.cpp
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//

-#include "SingleInputSingleOutputPerf.h"
-
+#include "PerfTest.h"
 #include "src/math/truncf.h"

 #include <math.h>

-SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::truncf, ::truncf,
-                                "truncf_perf.log")
+int main() {
+  SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::truncf, ::truncf,
+                                  "truncf_perf.log")
+  return 0;
+}