[libc] revamp memory function benchmark

The benchmarking infrastructure can now run in two modes: - Sweep Mode: which generates a ramp of size values (same as before), - Distribution Mode: allows the user to select a distribution for the size paramater that is representative from production. The analysis tool has also been updated to handle both modes. Differential Revision: https://reviews.llvm.org/D93210
2026-01-18 16:50:51 +08:00 · 2020-12-17 13:16:14 +00:00
parent e7a3c4c11e
commit deae7e982a
20 changed files with 775 additions and 1001 deletions
--- a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp
+++ b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//

-#include "LibcMemoryBenchmarkMain.h"
 #include "JSON.h"
 #include "LibcBenchmark.h"
 #include "LibcMemoryBenchmark.h"
+#include "MemorySizeDistributions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -17,70 +17,310 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"

-#include <string>
+namespace __llvm_libc {
+
+extern void *memcpy(void *__restrict, const void *__restrict, size_t);
+extern void *memset(void *, int, size_t);
+
+} // namespace __llvm_libc

 namespace llvm {
 namespace libc_benchmarks {

-static cl::opt<std::string>
-    Configuration("conf", cl::desc("Specify configuration filename"),
-                  cl::value_desc("filename"), cl::init(""));
+enum Function { memcpy, memset };

-static cl::opt<std::string> Output("o", cl::desc("Specify output filename"),
+static cl::opt<std::string>
+    StudyName("study-name", cl::desc("The name for this study"), cl::Required);
+
+static cl::opt<Function>
+    MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
+                   cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
+                              clEnumVal(memset, "__llvm_libc::memset")),
+                   cl::Required);
+
+static cl::opt<std::string>
+    SizeDistributionName("size-distribution-name",
+                         cl::desc("The name of the distribution to use"));
+
+static cl::opt<bool>
+    SweepMode("sweep-mode",
+              cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
+
+static cl::opt<uint32_t>
+    SweepMaxSize("sweep-max-size",
+                 cl::desc("The maximum size to use in sweep-mode"),
+                 cl::init(256));
+
+static cl::opt<uint32_t>
+    AlignedAccess("aligned-access",
+                  cl::desc("The alignment to use when accessing the buffers\n"
+                           "Default is unaligned\n"
+                           "Use 0 to disable address randomization"),
+                  cl::init(1));
+
+static cl::opt<std::string> Output("output",
+                                   cl::desc("Specify output filename"),
                                   cl::value_desc("filename"), cl::init("-"));

-extern std::unique_ptr<BenchmarkRunner>
-getRunner(const StudyConfiguration &Conf);
+static cl::opt<uint32_t>
+    NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
+              cl::init(1));

-void Main() {
-#ifndef NDEBUG
-  static_assert(
-      false,
-      "For reproducibility benchmarks should not be compiled in DEBUG mode.");
-#endif
-  checkRequirements();
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
-      MemoryBuffer::getFileOrSTDIN(Configuration);
-  if (!MB)
-    report_fatal_error(
-        Twine("Could not open configuration file: ").concat(Configuration));
-  auto ErrorOrStudy = ParseJsonStudy((*MB)->getBuffer());
-  if (!ErrorOrStudy)
-    report_fatal_error(ErrorOrStudy.takeError());
+static constexpr int64_t KiB = 1024;
+static constexpr int64_t ParameterStorageBytes = 4 * KiB;
+static constexpr int64_t L1LeftAsideBytes = 1 * KiB;

-  const auto StudyPrototype = *ErrorOrStudy;
+struct ParameterType {
+  unsigned OffsetBytes : 16; // max : 16 KiB - 1
+  unsigned SizeBytes : 16;   // max : 16 KiB - 1
+};

-  Study S;
-  S.Host = HostState::get();
-  S.Options = StudyPrototype.Options;
-  S.Configuration = StudyPrototype.Configuration;
+struct MemcpyBenchmark {
+  static constexpr auto GetDistributions = &getMemcpySizeDistributions;
+  static constexpr size_t BufferCount = 2;
+  static void amend(Study &S) { S.Configuration.Function = "memcpy"; }

-  const auto Runs = S.Configuration.Runs;
-  const auto &SR = S.Configuration.Size;
-  std::unique_ptr<BenchmarkRunner> Runner = getRunner(S.Configuration);
-  const size_t TotalSteps =
-      Runner->getFunctionNames().size() * Runs * ((SR.To - SR.From) / SR.Step);
-  size_t Steps = 0;
-  for (auto FunctionName : Runner->getFunctionNames()) {
-    FunctionMeasurements FM;
-    FM.Name = std::string(FunctionName);
-    for (size_t Run = 0; Run < Runs; ++Run) {
-      for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) {
-        const auto Result = Runner->benchmark(S.Options, FunctionName, Size);
-        Measurement Measurement;
-        Measurement.Runtime = Result.BestGuess;
-        Measurement.Size = Size;
-        FM.Measurements.push_back(Measurement);
-        outs() << format("%3d%% run: %2d / %2d size: %5d ",
-                         (Steps * 100 / TotalSteps), Run, Runs, Size)
-               << FunctionName
-               << "                                                  \r";
-        ++Steps;
-      }
-    }
-    S.Functions.push_back(std::move(FM));
+  MemcpyBenchmark(const size_t BufferSize)
+      : SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
+
+  inline auto functor() {
+    return [this](ParameterType P) {
+      __llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
+                          P.SizeBytes);
+      return DstBuffer + P.OffsetBytes;
+    };
  }

+  AlignedBuffer SrcBuffer;
+  AlignedBuffer DstBuffer;
+};
+
+struct MemsetBenchmark {
+  static constexpr auto GetDistributions = &getMemsetSizeDistributions;
+  static constexpr size_t BufferCount = 1;
+  static void amend(Study &S) { S.Configuration.Function = "memset"; }
+
+  MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
+
+  inline auto functor() {
+    return [this](ParameterType P) {
+      __llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
+                          P.SizeBytes);
+      return DstBuffer + P.OffsetBytes;
+    };
+  }
+
+  AlignedBuffer DstBuffer;
+};
+
+template <typename Benchmark> struct Harness : Benchmark {
+  using Benchmark::functor;
+
+  Harness(const size_t BufferSize, size_t BatchParameterCount,
+          std::function<unsigned()> SizeSampler,
+          std::function<unsigned()> OffsetSampler)
+      : Benchmark(BufferSize), BufferSize(BufferSize),
+        BatchParameterCount(BatchParameterCount),
+        Parameters(BatchParameterCount), SizeSampler(SizeSampler),
+        OffsetSampler(OffsetSampler) {}
+
+  CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
+    for (auto &P : Parameters) {
+      P.OffsetBytes = OffsetSampler();
+      P.SizeBytes = SizeSampler();
+      if (P.OffsetBytes + P.SizeBytes >= BufferSize)
+        report_fatal_error("Call would result in buffer overflow");
+    }
+    return cycle(makeArrayRef(Parameters), Iterations);
+  }
+
+private:
+  const size_t BufferSize;
+  const size_t BatchParameterCount;
+  std::vector<ParameterType> Parameters;
+  std::function<unsigned()> SizeSampler;
+  std::function<unsigned()> OffsetSampler;
+};
+
+struct IBenchmark {
+  virtual ~IBenchmark() {}
+  virtual Study run() = 0;
+};
+
+size_t getL1DataCacheSize() {
+  const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
+  const auto IsL1DataCache = [](const CacheInfo &CI) {
+    return CI.Type == "Data" && CI.Level == 1;
+  };
+  const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
+  if (CacheIt != CacheInfos.end())
+    return CacheIt->Size;
+  report_fatal_error("Unable to read L1 Cache Data Size");
+}
+
+template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
+  MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
+      : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
+        BufferSize(AvailableSize / Benchmark::BufferCount),
+        BatchParameterCount(BufferSize / sizeof(ParameterType)) {
+    // Handling command line flags
+    if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
+      report_fatal_error("Not enough L1 cache");
+
+    if (!isPowerOfTwoOrZero(AlignedAccess))
+      report_fatal_error(AlignedAccess.ArgStr +
+                         Twine(" must be a power of two or zero"));
+
+    const bool HasDistributionName = !SizeDistributionName.empty();
+    if (SweepMode && HasDistributionName)
+      report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
+                         "` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
+
+    if (SweepMode) {
+      MaxSizeValue = SweepMaxSize;
+    } else {
+      std::map<StringRef, MemorySizeDistribution> Map;
+      for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
+        Map[Distribution.Name] = Distribution;
+      if (Map.count(SizeDistributionName) == 0) {
+        std::string Message;
+        raw_string_ostream Stream(Message);
+        Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
+               << SizeDistributionName << "', available distributions:\n";
+        for (const auto &Pair : Map)
+          Stream << "'" << Pair.first << "'\n";
+        report_fatal_error(Stream.str());
+      }
+      SizeDistribution = Map[SizeDistributionName];
+      MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
+    }
+
+    // Setup study.
+    Study.StudyName = StudyName;
+    Runtime &RI = Study.Runtime;
+    RI.Host = HostState::get();
+    RI.BufferSize = BufferSize;
+    RI.BatchParameterCount = BatchParameterCount;
+
+    BenchmarkOptions &BO = RI.BenchmarkOptions;
+    BO.MinDuration = std::chrono::milliseconds(1);
+    BO.MaxDuration = std::chrono::seconds(1);
+    BO.MaxIterations = 10'000'000U;
+    BO.MinSamples = 4;
+    BO.MaxSamples = 1000;
+    BO.Epsilon = 0.01; // 1%
+    BO.ScalingFactor = 1.4;
+
+    StudyConfiguration &SC = Study.Configuration;
+    SC.NumTrials = NumTrials;
+    SC.IsSweepMode = SweepMode;
+    if (SweepMode)
+      SC.SweepModeMaxSize = SweepMaxSize;
+    else
+      SC.SizeDistributionName = SizeDistributionName;
+    SC.AccessAlignment = MaybeAlign(AlignedAccess);
+
+    // Delegate specific flags and configuration.
+    Benchmark::amend(Study);
+  }
+
+  Study run() override {
+    if (SweepMode)
+      runSweepMode();
+    else
+      runDistributionMode();
+    return Study;
+  }
+
+private:
+  const int64_t AvailableSize;
+  const int64_t BufferSize;
+  const size_t BatchParameterCount;
+  size_t MaxSizeValue = 0;
+  MemorySizeDistribution SizeDistribution;
+  Study Study;
+  std::mt19937_64 Gen;
+
+  static constexpr bool isPowerOfTwoOrZero(size_t Value) {
+    return (Value & (Value - 1U)) == 0;
+  }
+
+  std::function<unsigned()> geOffsetSampler() {
+    return [this]() {
+      static OffsetDistribution OD(BufferSize, MaxSizeValue,
+                                   Study.Configuration.AccessAlignment);
+      return OD(Gen);
+    };
+  }
+
+  std::function<unsigned()> getSizeSampler() {
+    return [this]() {
+      static std::discrete_distribution<unsigned> Distribution(
+          SizeDistribution.Probabilities.begin(),
+          SizeDistribution.Probabilities.end());
+      return Distribution(Gen);
+    };
+  }
+
+  void reportProgress(BenchmarkStatus BS) {
+    const size_t TotalSteps = Study.Measurements.capacity();
+    const size_t Steps = Study.Measurements.size();
+    const size_t Percent = 100 * Steps / TotalSteps;
+    size_t I = 0;
+    errs() << '[';
+    for (; I <= Percent; ++I)
+      errs() << '#';
+    for (; I <= 100; ++I)
+      errs() << '_';
+    errs() << "] " << Percent << "%\r";
+  }
+
+  void runTrials(const BenchmarkOptions &Options,
+                 std::function<unsigned()> SizeSampler,
+                 std::function<unsigned()> OffsetSampler) {
+    Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
+                         OffsetSampler);
+    for (size_t i = 0; i < NumTrials; ++i) {
+      const BenchmarkResult Result = benchmark(Options, B, B.functor());
+      Study.Measurements.push_back(Result.BestGuess);
+      reportProgress(Result.TerminationStatus);
+    }
+  }
+
+  void runSweepMode() {
+    Study.Measurements.reserve(NumTrials * SweepMaxSize);
+
+    BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
+    BO.MinDuration = std::chrono::milliseconds(1);
+    BO.InitialIterations = 100;
+
+    for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
+      const auto SizeSampler = [Size]() { return Size; };
+      runTrials(BO, SizeSampler, geOffsetSampler());
+    }
+  }
+
+  void runDistributionMode() {
+    Study.Measurements.reserve(NumTrials);
+
+    BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
+    BO.MinDuration = std::chrono::milliseconds(10);
+    BO.InitialIterations = BatchParameterCount * 10;
+
+    runTrials(BO, getSizeSampler(), geOffsetSampler());
+  }
+};
+
+std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
+  switch (MemoryFunction) {
+  case memcpy:
+    return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
+  case memset:
+    return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
+  }
+}
+
+void writeStudy(const Study &S) {
  std::error_code EC;
  raw_fd_ostream FOS(Output, EC);
  if (EC)
@@ -89,7 +329,13 @@ void Main() {
                           .concat(", ")
                           .concat(Output));
  json::OStream JOS(FOS);
-  SerializeToJson(S, JOS);
+  serializeToJson(S, JOS);
+}
+
+void main() {
+  checkRequirements();
+  auto MB = getMemfunctionBenchmark();
+  writeStudy(MB->run());
 }

 } // namespace libc_benchmarks
@@ -97,6 +343,11 @@ void Main() {

 int main(int argc, char **argv) {
  llvm::cl::ParseCommandLineOptions(argc, argv);
-  llvm::libc_benchmarks::Main();
+#ifndef NDEBUG
+  static_assert(
+      false,
+      "For reproducibility benchmarks should not be compiled in DEBUG mode.");
+#endif
+  llvm::libc_benchmarks::main();
  return EXIT_SUCCESS;
 }