mirror of
https://github.com/intel/llvm.git
synced 2026-01-18 16:50:51 +08:00
[libc] revamp memory function benchmark
The benchmarking infrastructure can now run in two modes: - Sweep Mode: which generates a ramp of size values (same as before), - Distribution Mode: allows the user to select a distribution for the size paramater that is representative from production. The analysis tool has also been updated to handle both modes. Differential Revision: https://reviews.llvm.org/D93210
This commit is contained in:
@@ -6,10 +6,10 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "LibcMemoryBenchmarkMain.h"
|
||||
#include "JSON.h"
|
||||
#include "LibcBenchmark.h"
|
||||
#include "LibcMemoryBenchmark.h"
|
||||
#include "MemorySizeDistributions.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
@@ -17,70 +17,310 @@
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <string>
|
||||
namespace __llvm_libc {
|
||||
|
||||
extern void *memcpy(void *__restrict, const void *__restrict, size_t);
|
||||
extern void *memset(void *, int, size_t);
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
namespace llvm {
|
||||
namespace libc_benchmarks {
|
||||
|
||||
static cl::opt<std::string>
|
||||
Configuration("conf", cl::desc("Specify configuration filename"),
|
||||
cl::value_desc("filename"), cl::init(""));
|
||||
enum Function { memcpy, memset };
|
||||
|
||||
static cl::opt<std::string> Output("o", cl::desc("Specify output filename"),
|
||||
static cl::opt<std::string>
|
||||
StudyName("study-name", cl::desc("The name for this study"), cl::Required);
|
||||
|
||||
static cl::opt<Function>
|
||||
MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
|
||||
cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
|
||||
clEnumVal(memset, "__llvm_libc::memset")),
|
||||
cl::Required);
|
||||
|
||||
static cl::opt<std::string>
|
||||
SizeDistributionName("size-distribution-name",
|
||||
cl::desc("The name of the distribution to use"));
|
||||
|
||||
static cl::opt<bool>
|
||||
SweepMode("sweep-mode",
|
||||
cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
|
||||
|
||||
static cl::opt<uint32_t>
|
||||
SweepMaxSize("sweep-max-size",
|
||||
cl::desc("The maximum size to use in sweep-mode"),
|
||||
cl::init(256));
|
||||
|
||||
static cl::opt<uint32_t>
|
||||
AlignedAccess("aligned-access",
|
||||
cl::desc("The alignment to use when accessing the buffers\n"
|
||||
"Default is unaligned\n"
|
||||
"Use 0 to disable address randomization"),
|
||||
cl::init(1));
|
||||
|
||||
static cl::opt<std::string> Output("output",
|
||||
cl::desc("Specify output filename"),
|
||||
cl::value_desc("filename"), cl::init("-"));
|
||||
|
||||
extern std::unique_ptr<BenchmarkRunner>
|
||||
getRunner(const StudyConfiguration &Conf);
|
||||
static cl::opt<uint32_t>
|
||||
NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
|
||||
cl::init(1));
|
||||
|
||||
void Main() {
|
||||
#ifndef NDEBUG
|
||||
static_assert(
|
||||
false,
|
||||
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
|
||||
#endif
|
||||
checkRequirements();
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||
MemoryBuffer::getFileOrSTDIN(Configuration);
|
||||
if (!MB)
|
||||
report_fatal_error(
|
||||
Twine("Could not open configuration file: ").concat(Configuration));
|
||||
auto ErrorOrStudy = ParseJsonStudy((*MB)->getBuffer());
|
||||
if (!ErrorOrStudy)
|
||||
report_fatal_error(ErrorOrStudy.takeError());
|
||||
static constexpr int64_t KiB = 1024;
|
||||
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
|
||||
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
|
||||
|
||||
const auto StudyPrototype = *ErrorOrStudy;
|
||||
struct ParameterType {
|
||||
unsigned OffsetBytes : 16; // max : 16 KiB - 1
|
||||
unsigned SizeBytes : 16; // max : 16 KiB - 1
|
||||
};
|
||||
|
||||
Study S;
|
||||
S.Host = HostState::get();
|
||||
S.Options = StudyPrototype.Options;
|
||||
S.Configuration = StudyPrototype.Configuration;
|
||||
struct MemcpyBenchmark {
|
||||
static constexpr auto GetDistributions = &getMemcpySizeDistributions;
|
||||
static constexpr size_t BufferCount = 2;
|
||||
static void amend(Study &S) { S.Configuration.Function = "memcpy"; }
|
||||
|
||||
const auto Runs = S.Configuration.Runs;
|
||||
const auto &SR = S.Configuration.Size;
|
||||
std::unique_ptr<BenchmarkRunner> Runner = getRunner(S.Configuration);
|
||||
const size_t TotalSteps =
|
||||
Runner->getFunctionNames().size() * Runs * ((SR.To - SR.From) / SR.Step);
|
||||
size_t Steps = 0;
|
||||
for (auto FunctionName : Runner->getFunctionNames()) {
|
||||
FunctionMeasurements FM;
|
||||
FM.Name = std::string(FunctionName);
|
||||
for (size_t Run = 0; Run < Runs; ++Run) {
|
||||
for (uint32_t Size = SR.From; Size <= SR.To; Size += SR.Step) {
|
||||
const auto Result = Runner->benchmark(S.Options, FunctionName, Size);
|
||||
Measurement Measurement;
|
||||
Measurement.Runtime = Result.BestGuess;
|
||||
Measurement.Size = Size;
|
||||
FM.Measurements.push_back(Measurement);
|
||||
outs() << format("%3d%% run: %2d / %2d size: %5d ",
|
||||
(Steps * 100 / TotalSteps), Run, Runs, Size)
|
||||
<< FunctionName
|
||||
<< " \r";
|
||||
++Steps;
|
||||
}
|
||||
}
|
||||
S.Functions.push_back(std::move(FM));
|
||||
MemcpyBenchmark(const size_t BufferSize)
|
||||
: SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
|
||||
|
||||
inline auto functor() {
|
||||
return [this](ParameterType P) {
|
||||
__llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
|
||||
P.SizeBytes);
|
||||
return DstBuffer + P.OffsetBytes;
|
||||
};
|
||||
}
|
||||
|
||||
AlignedBuffer SrcBuffer;
|
||||
AlignedBuffer DstBuffer;
|
||||
};
|
||||
|
||||
struct MemsetBenchmark {
|
||||
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
|
||||
static constexpr size_t BufferCount = 1;
|
||||
static void amend(Study &S) { S.Configuration.Function = "memset"; }
|
||||
|
||||
MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
|
||||
|
||||
inline auto functor() {
|
||||
return [this](ParameterType P) {
|
||||
__llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
|
||||
P.SizeBytes);
|
||||
return DstBuffer + P.OffsetBytes;
|
||||
};
|
||||
}
|
||||
|
||||
AlignedBuffer DstBuffer;
|
||||
};
|
||||
|
||||
template <typename Benchmark> struct Harness : Benchmark {
|
||||
using Benchmark::functor;
|
||||
|
||||
Harness(const size_t BufferSize, size_t BatchParameterCount,
|
||||
std::function<unsigned()> SizeSampler,
|
||||
std::function<unsigned()> OffsetSampler)
|
||||
: Benchmark(BufferSize), BufferSize(BufferSize),
|
||||
BatchParameterCount(BatchParameterCount),
|
||||
Parameters(BatchParameterCount), SizeSampler(SizeSampler),
|
||||
OffsetSampler(OffsetSampler) {}
|
||||
|
||||
CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
|
||||
for (auto &P : Parameters) {
|
||||
P.OffsetBytes = OffsetSampler();
|
||||
P.SizeBytes = SizeSampler();
|
||||
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
|
||||
report_fatal_error("Call would result in buffer overflow");
|
||||
}
|
||||
return cycle(makeArrayRef(Parameters), Iterations);
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t BufferSize;
|
||||
const size_t BatchParameterCount;
|
||||
std::vector<ParameterType> Parameters;
|
||||
std::function<unsigned()> SizeSampler;
|
||||
std::function<unsigned()> OffsetSampler;
|
||||
};
|
||||
|
||||
struct IBenchmark {
|
||||
virtual ~IBenchmark() {}
|
||||
virtual Study run() = 0;
|
||||
};
|
||||
|
||||
size_t getL1DataCacheSize() {
|
||||
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
|
||||
const auto IsL1DataCache = [](const CacheInfo &CI) {
|
||||
return CI.Type == "Data" && CI.Level == 1;
|
||||
};
|
||||
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
|
||||
if (CacheIt != CacheInfos.end())
|
||||
return CacheIt->Size;
|
||||
report_fatal_error("Unable to read L1 Cache Data Size");
|
||||
}
|
||||
|
||||
template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
|
||||
MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
|
||||
: AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
|
||||
BufferSize(AvailableSize / Benchmark::BufferCount),
|
||||
BatchParameterCount(BufferSize / sizeof(ParameterType)) {
|
||||
// Handling command line flags
|
||||
if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
|
||||
report_fatal_error("Not enough L1 cache");
|
||||
|
||||
if (!isPowerOfTwoOrZero(AlignedAccess))
|
||||
report_fatal_error(AlignedAccess.ArgStr +
|
||||
Twine(" must be a power of two or zero"));
|
||||
|
||||
const bool HasDistributionName = !SizeDistributionName.empty();
|
||||
if (SweepMode && HasDistributionName)
|
||||
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
|
||||
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
|
||||
|
||||
if (SweepMode) {
|
||||
MaxSizeValue = SweepMaxSize;
|
||||
} else {
|
||||
std::map<StringRef, MemorySizeDistribution> Map;
|
||||
for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
|
||||
Map[Distribution.Name] = Distribution;
|
||||
if (Map.count(SizeDistributionName) == 0) {
|
||||
std::string Message;
|
||||
raw_string_ostream Stream(Message);
|
||||
Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
|
||||
<< SizeDistributionName << "', available distributions:\n";
|
||||
for (const auto &Pair : Map)
|
||||
Stream << "'" << Pair.first << "'\n";
|
||||
report_fatal_error(Stream.str());
|
||||
}
|
||||
SizeDistribution = Map[SizeDistributionName];
|
||||
MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
|
||||
}
|
||||
|
||||
// Setup study.
|
||||
Study.StudyName = StudyName;
|
||||
Runtime &RI = Study.Runtime;
|
||||
RI.Host = HostState::get();
|
||||
RI.BufferSize = BufferSize;
|
||||
RI.BatchParameterCount = BatchParameterCount;
|
||||
|
||||
BenchmarkOptions &BO = RI.BenchmarkOptions;
|
||||
BO.MinDuration = std::chrono::milliseconds(1);
|
||||
BO.MaxDuration = std::chrono::seconds(1);
|
||||
BO.MaxIterations = 10'000'000U;
|
||||
BO.MinSamples = 4;
|
||||
BO.MaxSamples = 1000;
|
||||
BO.Epsilon = 0.01; // 1%
|
||||
BO.ScalingFactor = 1.4;
|
||||
|
||||
StudyConfiguration &SC = Study.Configuration;
|
||||
SC.NumTrials = NumTrials;
|
||||
SC.IsSweepMode = SweepMode;
|
||||
if (SweepMode)
|
||||
SC.SweepModeMaxSize = SweepMaxSize;
|
||||
else
|
||||
SC.SizeDistributionName = SizeDistributionName;
|
||||
SC.AccessAlignment = MaybeAlign(AlignedAccess);
|
||||
|
||||
// Delegate specific flags and configuration.
|
||||
Benchmark::amend(Study);
|
||||
}
|
||||
|
||||
Study run() override {
|
||||
if (SweepMode)
|
||||
runSweepMode();
|
||||
else
|
||||
runDistributionMode();
|
||||
return Study;
|
||||
}
|
||||
|
||||
private:
|
||||
const int64_t AvailableSize;
|
||||
const int64_t BufferSize;
|
||||
const size_t BatchParameterCount;
|
||||
size_t MaxSizeValue = 0;
|
||||
MemorySizeDistribution SizeDistribution;
|
||||
Study Study;
|
||||
std::mt19937_64 Gen;
|
||||
|
||||
static constexpr bool isPowerOfTwoOrZero(size_t Value) {
|
||||
return (Value & (Value - 1U)) == 0;
|
||||
}
|
||||
|
||||
std::function<unsigned()> geOffsetSampler() {
|
||||
return [this]() {
|
||||
static OffsetDistribution OD(BufferSize, MaxSizeValue,
|
||||
Study.Configuration.AccessAlignment);
|
||||
return OD(Gen);
|
||||
};
|
||||
}
|
||||
|
||||
std::function<unsigned()> getSizeSampler() {
|
||||
return [this]() {
|
||||
static std::discrete_distribution<unsigned> Distribution(
|
||||
SizeDistribution.Probabilities.begin(),
|
||||
SizeDistribution.Probabilities.end());
|
||||
return Distribution(Gen);
|
||||
};
|
||||
}
|
||||
|
||||
void reportProgress(BenchmarkStatus BS) {
|
||||
const size_t TotalSteps = Study.Measurements.capacity();
|
||||
const size_t Steps = Study.Measurements.size();
|
||||
const size_t Percent = 100 * Steps / TotalSteps;
|
||||
size_t I = 0;
|
||||
errs() << '[';
|
||||
for (; I <= Percent; ++I)
|
||||
errs() << '#';
|
||||
for (; I <= 100; ++I)
|
||||
errs() << '_';
|
||||
errs() << "] " << Percent << "%\r";
|
||||
}
|
||||
|
||||
void runTrials(const BenchmarkOptions &Options,
|
||||
std::function<unsigned()> SizeSampler,
|
||||
std::function<unsigned()> OffsetSampler) {
|
||||
Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
|
||||
OffsetSampler);
|
||||
for (size_t i = 0; i < NumTrials; ++i) {
|
||||
const BenchmarkResult Result = benchmark(Options, B, B.functor());
|
||||
Study.Measurements.push_back(Result.BestGuess);
|
||||
reportProgress(Result.TerminationStatus);
|
||||
}
|
||||
}
|
||||
|
||||
void runSweepMode() {
|
||||
Study.Measurements.reserve(NumTrials * SweepMaxSize);
|
||||
|
||||
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
|
||||
BO.MinDuration = std::chrono::milliseconds(1);
|
||||
BO.InitialIterations = 100;
|
||||
|
||||
for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
|
||||
const auto SizeSampler = [Size]() { return Size; };
|
||||
runTrials(BO, SizeSampler, geOffsetSampler());
|
||||
}
|
||||
}
|
||||
|
||||
void runDistributionMode() {
|
||||
Study.Measurements.reserve(NumTrials);
|
||||
|
||||
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
|
||||
BO.MinDuration = std::chrono::milliseconds(10);
|
||||
BO.InitialIterations = BatchParameterCount * 10;
|
||||
|
||||
runTrials(BO, getSizeSampler(), geOffsetSampler());
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
|
||||
switch (MemoryFunction) {
|
||||
case memcpy:
|
||||
return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
|
||||
case memset:
|
||||
return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
|
||||
}
|
||||
}
|
||||
|
||||
void writeStudy(const Study &S) {
|
||||
std::error_code EC;
|
||||
raw_fd_ostream FOS(Output, EC);
|
||||
if (EC)
|
||||
@@ -89,7 +329,13 @@ void Main() {
|
||||
.concat(", ")
|
||||
.concat(Output));
|
||||
json::OStream JOS(FOS);
|
||||
SerializeToJson(S, JOS);
|
||||
serializeToJson(S, JOS);
|
||||
}
|
||||
|
||||
void main() {
|
||||
checkRequirements();
|
||||
auto MB = getMemfunctionBenchmark();
|
||||
writeStudy(MB->run());
|
||||
}
|
||||
|
||||
} // namespace libc_benchmarks
|
||||
@@ -97,6 +343,11 @@ void Main() {
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
llvm::cl::ParseCommandLineOptions(argc, argv);
|
||||
llvm::libc_benchmarks::Main();
|
||||
#ifndef NDEBUG
|
||||
static_assert(
|
||||
false,
|
||||
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
|
||||
#endif
|
||||
llvm::libc_benchmarks::main();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user