mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 03:56:16 +08:00
[libc] Remove automemcpy folder (#118781)
The build is currently broken and we don't have the resources to keep it up to date :-/
This commit is contained in:
committed by
GitHub
parent
edbebda454
commit
4873968649
@@ -212,5 +212,3 @@ target_link_libraries(libc.benchmarks.memory_functions.opt_host
|
||||
benchmark_main
|
||||
)
|
||||
llvm_update_compile_flags(libc.benchmarks.memory_functions.opt_host)
|
||||
|
||||
add_subdirectory(automemcpy)
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
if(NOT LIBC_BUILD_AUTOMEMCPY)
|
||||
return ()
|
||||
endif()
|
||||
|
||||
if(NOT LLVM_WITH_Z3)
|
||||
MESSAGE(FATAL_ERROR "Building llvm-libc automemcpy requires Z3")
|
||||
endif()
|
||||
|
||||
set(LIBC_AUTOMEMCPY_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
|
||||
add_subdirectory(lib)
|
||||
add_subdirectory(unittests)
|
||||
@@ -1,111 +0,0 @@
|
||||
This folder contains an implementation of [automemcpy: A framework for automatic generation of fundamental memory operations](https://research.google/pubs/pub50338/).
|
||||
|
||||
It uses the [Z3 theorem prover](https://github.com/Z3Prover/z3) to enumerate a subset of valid memory function implementations. These implementations are then materialized as C++ code and can be [benchmarked](../) against various [size distributions](../distributions). This process helps the design of efficient implementations for a particular environnement (size distribution, processor or custom compilation options).
|
||||
|
||||
This is not enabled by default, as it is mostly useful when working on tuning the library implementation. To build it, use `LIBC_BUILD_AUTOMEMCPY=ON` (see below).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You may need to install `Z3` from source if it's not available on your system.
|
||||
Here we show instructions to install it into `<Z3_INSTALL_DIR>`.
|
||||
You may need to `sudo` to `make install`.
|
||||
|
||||
```shell
|
||||
mkdir -p ~/git
|
||||
cd ~/git
|
||||
git clone https://github.com/Z3Prover/z3.git
|
||||
python scripts/mk_make.py --prefix=<Z3_INSTALL_DIR>
|
||||
cd build
|
||||
make -j
|
||||
make install
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
```shell
|
||||
mkdir -p <BUILD_DIR>
|
||||
cd <LLVM_PROJECT_DIR>/llvm
|
||||
cmake -DCMAKE_C_COMPILER=/usr/bin/clang \
|
||||
-DCMAKE_CXX_COMPILER=/usr/bin/clang++ \
|
||||
-DLLVM_ENABLE_PROJECTS="libc" \
|
||||
-DLLVM_ENABLE_Z3_SOLVER=ON \
|
||||
-DLLVM_Z3_INSTALL_DIR=<Z3_INSTALL_DIR> \
|
||||
-DLIBC_BUILD_AUTOMEMCPY=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-B<BUILD_DIR>
|
||||
```
|
||||
|
||||
## Targets and compilation
|
||||
|
||||
There are three main CMake targets
|
||||
1. `automemcpy_implementations`
|
||||
- runs `Z3` and materializes valid memory functions as C++ code, a message will display its ondisk location.
|
||||
- the source code is then compiled using the native host optimizations (i.e. `-march=native` or `-mcpu=native` depending on the architecture).
|
||||
2. `automemcpy`
|
||||
- the binary that benchmarks the autogenerated implementations.
|
||||
3. `automemcpy_result_analyzer`
|
||||
- the binary that analyses the benchmark results.
|
||||
|
||||
You may only compile the binaries as they both pull the autogenerated code as a dependency.
|
||||
|
||||
```shell
|
||||
make -C <BUILD_DIR> -j automemcpy automemcpy_result_analyzer
|
||||
```
|
||||
|
||||
## Running the benchmarks
|
||||
|
||||
Make sure to save the results of the benchmark as a json file.
|
||||
|
||||
```shell
|
||||
<BUILD_DIR>/bin/automemcpy --benchmark_out_format=json --benchmark_out=<RESULTS_DIR>/results.json
|
||||
```
|
||||
|
||||
### Additional useful options
|
||||
|
||||
|
||||
- `--benchmark_min_time=.2`
|
||||
|
||||
By default, each function is benchmarked for at least one second, here we lower it to 200ms.
|
||||
|
||||
- `--benchmark_filter="BM_Memset|BM_Bzero"`
|
||||
|
||||
By default, all functions are benchmarked, here we restrict them to `memset` and `bzero`.
|
||||
|
||||
Other options might be useful, use `--help` for more information.
|
||||
|
||||
## Analyzing the benchmarks
|
||||
|
||||
Analysis is performed by running `automemcpy_result_analyzer` on one or more json result files.
|
||||
|
||||
```shell
|
||||
<BUILD_DIR>/bin/automemcpy_result_analyzer <RESULTS_DIR>/results.json
|
||||
```
|
||||
|
||||
What it does:
|
||||
1. Gathers all throughput values for each function / distribution pair and picks the median one.\
|
||||
This allows picking a representative value over many runs of the benchmark. Please make sure all the runs happen under similar circumstances.
|
||||
|
||||
2. For each distribution, look at the span of throughputs for functions of the same type (e.g. For distribution `A`, memcpy throughput spans from 2GiB/s to 5GiB/s).
|
||||
|
||||
3. For each distribution, give a normalized score to each function (e.g. For distribution `A`, function `M` scores 0.65).\
|
||||
This score is then turned into a grade `EXCELLENT`, `VERY_GOOD`, `GOOD`, `PASSABLE`, `INADEQUATE`, `MEDIOCRE`, `BAD` - so that each distribution categorizes how function perform according to them.
|
||||
|
||||
4. A [Majority Judgement](https://en.wikipedia.org/wiki/Majority_judgment) process is then used to categorize each function. This enables finer analysis of how distributions agree on which function is better. In the following example, `Function_1` and `Function_2` are rated `EXCELLENT` but looking at the grade's distribution might help decide which is best.
|
||||
|
||||
| | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | INADEQUATE | MEDIOCRE | BAD |
|
||||
|------------|:---------:|:---------:|:----:|:--------:|:----------:|:--------:|:---:|
|
||||
| Function_1 | 7 | 1 | 2 | | | | |
|
||||
| Function_2 | 6 | 4 | | | | | |
|
||||
|
||||
The tool outputs the histogram of grades for each function. In case of tie, other dimensions might help decide (e.g. code size, performance on other microarchitectures).
|
||||
|
||||
```
|
||||
EXCELLENT |█▁▂ | Function_0
|
||||
EXCELLENT |█▅ | Function_1
|
||||
VERY_GOOD |▂█▁ ▁ | Function_2
|
||||
GOOD | ▁█▄ | Function_3
|
||||
PASSABLE | ▂▆▄█ | Function_4
|
||||
INADEQUATE | ▃▃█▁ | Function_5
|
||||
MEDIOCRE | █▆▁| Function_6
|
||||
BAD | ▁▁█| Function_7
|
||||
```
|
||||
@@ -1,26 +0,0 @@
|
||||
//===-- C++ code generation from NamedFunctionDescriptors -------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H
|
||||
#define LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H
|
||||
|
||||
#include "automemcpy/FunctionDescriptor.h"
|
||||
#include <llvm/ADT/ArrayRef.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
// This function serializes the array of FunctionDescriptors as a C++ file.
|
||||
void Serialize(raw_ostream &Stream, ArrayRef<FunctionDescriptor> FD);
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LIBC_BENCHMARKS_AUTOMEMCPY_CODEGEN_H
|
||||
@@ -1,159 +0,0 @@
|
||||
//===-- Pod structs to describe a memory function----------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H
|
||||
#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <llvm/ADT/ArrayRef.h>
|
||||
#include <llvm/ADT/Hashing.h>
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
// Boilerplate code to be able to sort and hash types.
|
||||
#define COMPARABLE_AND_HASHABLE(T, ...) \
|
||||
inline auto asTuple() const { return std::tie(__VA_ARGS__); } \
|
||||
bool operator==(const T &O) const { return asTuple() == O.asTuple(); } \
|
||||
bool operator<(const T &O) const { return asTuple() < O.asTuple(); } \
|
||||
struct Hasher { \
|
||||
std::size_t operator()(const T &K) const { \
|
||||
return llvm::hash_value(K.asTuple()); \
|
||||
} \
|
||||
};
|
||||
|
||||
// Represents the maximum value for the size parameter of a memory function.
|
||||
// This is an `int` so we can use it as an expression in Z3.
|
||||
// It also allows for a more readable and compact representation when storing
|
||||
// the SizeSpan in the autogenerated C++ file.
|
||||
static constexpr int kMaxSize = INT_MAX;
|
||||
|
||||
// This mimics the `Arg` type in libc/src/string/memory_utils/elements.h without
|
||||
// having to depend on it.
|
||||
enum class AlignArg { _1, _2, ARRAY_SIZE };
|
||||
|
||||
// Describes a range of sizes.
|
||||
// We use the begin/end representation instead of first/last to allow for empty
|
||||
// range (i.e. Begin == End)
|
||||
struct SizeSpan {
|
||||
size_t Begin = 0;
|
||||
size_t End = 0;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(SizeSpan, Begin, End)
|
||||
};
|
||||
|
||||
// Describes a contiguous region.
|
||||
// In such a region all sizes are handled individually.
|
||||
// e.g. with Span = {0, 2};
|
||||
// if(size == 0) return Handle<0>();
|
||||
// if(size == 1) return Handle<1>();
|
||||
struct Contiguous {
|
||||
SizeSpan Span;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(Contiguous, Span)
|
||||
};
|
||||
|
||||
// This struct represents a range of sizes over which to use an overlapping
|
||||
// strategy. An overlapping strategy of size N handles all sizes from N to 2xN.
|
||||
// The span may represent several contiguous overlaps.
|
||||
// e.g. with Span = {16, 128};
|
||||
// if(size >= 16 and size < 32) return Handle<Overlap<16>>();
|
||||
// if(size >= 32 and size < 64) return Handle<Overlap<32>>();
|
||||
// if(size >= 64 and size < 128) return Handle<Overlap<64>>();
|
||||
struct Overlap {
|
||||
SizeSpan Span;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(Overlap, Span)
|
||||
};
|
||||
|
||||
// Describes a region using a loop handling BlockSize bytes at a time. The
|
||||
// remaining bytes of the loop are handled with an overlapping operation.
|
||||
struct Loop {
|
||||
SizeSpan Span;
|
||||
size_t BlockSize = 0;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(Loop, Span, BlockSize)
|
||||
};
|
||||
|
||||
// Same as `Loop` but starts by aligning a buffer on `Alignment` bytes.
|
||||
// A first operation handling 'Alignment` bytes is performed followed by a
|
||||
// sequence of Loop.BlockSize bytes operation. The Loop starts processing from
|
||||
// the next aligned byte in the chosen buffer. The remaining bytes of the loop
|
||||
// are handled with an overlapping operation.
|
||||
struct AlignedLoop {
|
||||
Loop Loop;
|
||||
size_t Alignment = 0; // Size of the alignment.
|
||||
AlignArg AlignTo = AlignArg::_1; // Which buffer to align.
|
||||
|
||||
COMPARABLE_AND_HASHABLE(AlignedLoop, Loop, Alignment, AlignTo)
|
||||
};
|
||||
|
||||
// Some processors offer special instruction to handle the memory function
|
||||
// completely, we refer to such instructions as accelerators.
|
||||
struct Accelerator {
|
||||
SizeSpan Span;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(Accelerator, Span)
|
||||
};
|
||||
|
||||
// The memory functions are assembled out of primitives that can be implemented
|
||||
// with regular scalar operations (SCALAR), with the help of vector or bitcount
|
||||
// instructions (NATIVE) or by deferring it to the compiler (BUILTIN).
|
||||
enum class ElementTypeClass {
|
||||
SCALAR,
|
||||
NATIVE,
|
||||
BUILTIN,
|
||||
};
|
||||
|
||||
// A simple enum to categorize which function is being implemented.
|
||||
enum class FunctionType {
|
||||
MEMCPY,
|
||||
MEMCMP,
|
||||
BCMP,
|
||||
MEMSET,
|
||||
BZERO,
|
||||
};
|
||||
|
||||
// This struct describes the skeleton of the implementation, it does not go into
|
||||
// every detail but is enough to uniquely identify the implementation.
|
||||
struct FunctionDescriptor {
|
||||
FunctionType Type;
|
||||
std::optional<Contiguous> Contiguous;
|
||||
std::optional<Overlap> Overlap;
|
||||
std::optional<Loop> Loop;
|
||||
std::optional<AlignedLoop> AlignedLoop;
|
||||
std::optional<Accelerator> Accelerator;
|
||||
ElementTypeClass ElementClass;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(FunctionDescriptor, Type, Contiguous, Overlap, Loop,
|
||||
AlignedLoop, Accelerator, ElementClass)
|
||||
|
||||
inline size_t id() const { return llvm::hash_value(asTuple()); }
|
||||
};
|
||||
|
||||
// Same as above but with the function name.
|
||||
struct NamedFunctionDescriptor {
|
||||
StringRef Name;
|
||||
FunctionDescriptor Desc;
|
||||
};
|
||||
|
||||
template <typename T> llvm::hash_code hash_value(const ArrayRef<T> &V) {
|
||||
return llvm::hash_combine_range(V.begin(), V.end());
|
||||
}
|
||||
template <typename T> llvm::hash_code hash_value(const T &O) {
|
||||
return llvm::hash_value(O.asTuple());
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H */
|
||||
@@ -1,62 +0,0 @@
|
||||
//===-- Generate random but valid function descriptors ---------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H
|
||||
#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H
|
||||
|
||||
#include "automemcpy/FunctionDescriptor.h"
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <llvm/ADT/ArrayRef.h>
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <z3++.h>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
// Holds the state for the constraint solver.
|
||||
// It implements a single method that returns the next valid description.
|
||||
struct RandomFunctionGenerator {
|
||||
RandomFunctionGenerator();
|
||||
|
||||
// Get the next valid FunctionDescriptor or std::nullopt.
|
||||
std::optional<FunctionDescriptor> next();
|
||||
|
||||
private:
|
||||
// Returns an expression where `Variable` is forced to be one of the `Values`.
|
||||
z3::expr inSetConstraint(z3::expr &Variable, ArrayRef<int> Values) const;
|
||||
// Add constaints to `Begin` and `End` so that they are:
|
||||
// - between 0 and kMaxSize (inclusive)
|
||||
// - ordered (begin<=End)
|
||||
// - amongst a set of predefined values.
|
||||
void addBoundsAndAnchors(z3::expr &Begin, z3::expr &End);
|
||||
// Add constraints to make sure that the loop block size is amongst a set of
|
||||
// predefined values. Also makes sure that the loop that the loop is iterated
|
||||
// at least `LoopMinIter` times.
|
||||
void addLoopConstraints(const z3::expr &LoopBegin, const z3::expr &LoopEnd,
|
||||
z3::expr &LoopBlockSize, int LoopMinIter);
|
||||
|
||||
z3::context Context;
|
||||
z3::solver Solver;
|
||||
|
||||
z3::expr Type;
|
||||
z3::expr ContiguousBegin, ContiguousEnd;
|
||||
z3::expr OverlapBegin, OverlapEnd;
|
||||
z3::expr LoopBegin, LoopEnd, LoopBlockSize;
|
||||
z3::expr AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
|
||||
AlignedAlignment, AlignedArg;
|
||||
z3::expr AcceleratorBegin, AcceleratorEnd;
|
||||
z3::expr ElementClass;
|
||||
};
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_RANDOM_FUNCTION_GENERATOR_H */
|
||||
@@ -1,109 +0,0 @@
|
||||
//===-- Analyze benchmark JSON files ----------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H
|
||||
#define LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H
|
||||
|
||||
#include "automemcpy/FunctionDescriptor.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
// A Grade as in the Majority Judgment voting system.
|
||||
struct Grade {
|
||||
enum GradeEnum {
|
||||
EXCELLENT,
|
||||
VERY_GOOD,
|
||||
GOOD,
|
||||
PASSABLE,
|
||||
INADEQUATE,
|
||||
MEDIOCRE,
|
||||
BAD,
|
||||
ARRAY_SIZE,
|
||||
};
|
||||
|
||||
// Returns a human readable string of the enum.
|
||||
static StringRef getString(const GradeEnum &GE);
|
||||
|
||||
// Turns 'Score' into a GradeEnum.
|
||||
static GradeEnum judge(double Score);
|
||||
};
|
||||
|
||||
// A 'GradeEnum' indexed array with counts for each grade.
|
||||
using GradeHistogram = std::array<size_t, Grade::ARRAY_SIZE>;
|
||||
|
||||
// Identifies a Function by its name and type. Used as a key in a map.
|
||||
struct FunctionId {
|
||||
StringRef Name;
|
||||
FunctionType Type;
|
||||
COMPARABLE_AND_HASHABLE(FunctionId, Type, Name)
|
||||
};
|
||||
|
||||
struct PerDistributionData {
|
||||
std::vector<double> BytesPerSecondSamples;
|
||||
double BytesPerSecondMedian; // Median of samples for this distribution.
|
||||
double BytesPerSecondMean; // Mean of samples for this distribution.
|
||||
double BytesPerSecondVariance; // Variance of samples for this distribution.
|
||||
double Score; // Normalized score for this distribution.
|
||||
Grade::GradeEnum Grade; // Grade for this distribution.
|
||||
};
|
||||
|
||||
struct FunctionData {
|
||||
FunctionId Id;
|
||||
StringMap<PerDistributionData> PerDistributionData;
|
||||
double ScoresGeoMean; // Geomean of scores for each distribution.
|
||||
GradeHistogram GradeHisto = {}; // GradeEnum indexed array
|
||||
Grade::GradeEnum FinalGrade = Grade::BAD; // Overall grade for this function
|
||||
};
|
||||
|
||||
// Identifies a Distribution by its name. Used as a key in a map.
|
||||
struct DistributionId {
|
||||
StringRef Name;
|
||||
COMPARABLE_AND_HASHABLE(DistributionId, Name)
|
||||
};
|
||||
|
||||
// Identifies a Sample by its distribution and function. Used as a key in a map.
|
||||
struct SampleId {
|
||||
FunctionId Function;
|
||||
DistributionId Distribution;
|
||||
COMPARABLE_AND_HASHABLE(SampleId, Function.Type, Function.Name,
|
||||
Distribution.Name)
|
||||
};
|
||||
|
||||
// The type of Samples as reported by the Google Benchmark's JSON result file.
|
||||
// We are only interested in the "iteration" samples, the "aggregate" ones
|
||||
// represent derived metrics such as 'mean' or 'median'.
|
||||
enum class SampleType { UNKNOWN, ITERATION, AGGREGATE };
|
||||
|
||||
// A SampleId with an associated measured throughput.
|
||||
struct Sample {
|
||||
SampleId Id;
|
||||
SampleType Type = SampleType::UNKNOWN;
|
||||
double BytesPerSecond = 0;
|
||||
};
|
||||
|
||||
// This function collects Samples that belong to the same distribution and
|
||||
// function and retains the median one. It then stores each of them into a
|
||||
// 'FunctionData' and returns them as a vector.
|
||||
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples);
|
||||
|
||||
// Normalize the function's throughput per distribution.
|
||||
void fillScores(MutableArrayRef<FunctionData> Functions);
|
||||
|
||||
// Convert scores into Grades, stores an histogram of Grade for each functions
|
||||
// and cast a median grade for the function.
|
||||
void castVotes(MutableArrayRef<FunctionData> Functions);
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LIBC_BENCHMARKS_AUTOMEMCPY_RESULTANALYZER_H
|
||||
@@ -1,37 +0,0 @@
|
||||
add_library(automemcpy_codegen CodeGen.cpp)
|
||||
target_link_libraries(automemcpy_codegen PUBLIC LLVMSupport)
|
||||
target_include_directories(automemcpy_codegen PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR})
|
||||
llvm_update_compile_flags(automemcpy_codegen)
|
||||
|
||||
add_executable(automemcpy_codegen_main CodeGenMain.cpp RandomFunctionGenerator.cpp)
|
||||
target_link_libraries(automemcpy_codegen_main PUBLIC automemcpy_codegen ${Z3_LIBRARIES})
|
||||
llvm_update_compile_flags(automemcpy_codegen_main)
|
||||
|
||||
set(Implementations "${CMAKE_CURRENT_BINARY_DIR}/Implementations.cpp")
|
||||
add_custom_command(
|
||||
OUTPUT ${Implementations}
|
||||
COMMAND "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/automemcpy_codegen_main" > "${Implementations}"
|
||||
COMMAND echo "automemcpy implementations generated in ${Implementations}"
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
DEPENDS automemcpy_codegen_main
|
||||
)
|
||||
|
||||
add_library(automemcpy_implementations "${Implementations}")
|
||||
target_link_libraries(automemcpy_implementations PUBLIC LLVMSupport libc-memory-benchmark)
|
||||
target_include_directories(automemcpy_implementations PRIVATE
|
||||
${LIBC_SOURCE_DIR} ${LIBC_AUTOMEMCPY_INCLUDE_DIR})
|
||||
target_compile_options(automemcpy_implementations PRIVATE ${LIBC_COMPILE_OPTIONS_NATIVE} "SHELL:-mllvm -combiner-global-alias-analysis" -fno-builtin)
|
||||
llvm_update_compile_flags(automemcpy_implementations)
|
||||
|
||||
add_executable(automemcpy EXCLUDE_FROM_ALL ${LIBC_SOURCE_DIR}/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp)
|
||||
target_link_libraries(automemcpy PRIVATE libc-memory-benchmark benchmark_main automemcpy_implementations)
|
||||
llvm_update_compile_flags(automemcpy)
|
||||
|
||||
add_library(automemcpy_result_analyzer_lib EXCLUDE_FROM_ALL ResultAnalyzer.cpp)
|
||||
target_link_libraries(automemcpy_result_analyzer_lib PUBLIC LLVMSupport)
|
||||
target_include_directories(automemcpy_result_analyzer_lib PUBLIC ${LIBC_AUTOMEMCPY_INCLUDE_DIR})
|
||||
llvm_update_compile_flags(automemcpy_result_analyzer_lib)
|
||||
|
||||
add_executable(automemcpy_result_analyzer EXCLUDE_FROM_ALL ResultAnalyzerMain.cpp)
|
||||
target_link_libraries(automemcpy_result_analyzer PRIVATE automemcpy_result_analyzer_lib automemcpy_implementations)
|
||||
llvm_update_compile_flags(automemcpy_result_analyzer)
|
||||
@@ -1,644 +0,0 @@
|
||||
//===-- C++ code generation from NamedFunctionDescriptors -----------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This code is responsible for generating the "Implementation.cpp" file.
|
||||
// The file is composed like this:
|
||||
//
|
||||
// 1. Includes
|
||||
// 2. Using statements to help readability.
|
||||
// 3. Source code for all the mem function implementations.
|
||||
// 4. The function to retrieve all the function descriptors with their name.
|
||||
// llvm::ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors();
|
||||
// 5. The functions for the benchmarking infrastructure:
|
||||
// llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations();
|
||||
// llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations();
|
||||
// llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations();
|
||||
// llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations();
|
||||
// llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations();
|
||||
//
|
||||
//
|
||||
// Sections 3, 4 and 5 are handled by the following namespaces:
|
||||
// - codegen::functions
|
||||
// - codegen::descriptors
|
||||
// - codegen::configurations
|
||||
//
|
||||
// The programming style is functionnal. In each of these namespace, the
|
||||
// original `NamedFunctionDescriptor` object is turned into a different type. We
|
||||
// make use of overloaded stream operators to format the resulting type into
|
||||
// either a function, a descriptor or a configuration. The entry point of each
|
||||
// namespace is the Serialize function.
|
||||
//
|
||||
// Note the code here is better understood by starting from the `Serialize`
|
||||
// function at the end of the file.
|
||||
|
||||
#include "automemcpy/CodeGen.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include <cassert>
|
||||
#include <llvm/ADT/STLExtras.h>
|
||||
#include <llvm/ADT/StringSet.h>
|
||||
#include <llvm/Support/FormatVariadic.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
namespace codegen {
|
||||
|
||||
// The indentation string.
|
||||
static constexpr StringRef kIndent = " ";
|
||||
|
||||
// The codegen namespace handles the serialization of a NamedFunctionDescriptor
|
||||
// into source code for the function, the descriptor and the configuration.
|
||||
|
||||
namespace functions {
|
||||
|
||||
// This namespace turns a NamedFunctionDescriptor into an actual implementation.
|
||||
// -----------------------------------------------------------------------------
|
||||
// e.g.
|
||||
// static void memcpy_0xB20D4702493C397E(char *__restrict dst,
|
||||
// const char *__restrict src,
|
||||
// size_t size) {
|
||||
// using namespace LIBC_NAMESPACE::x86;
|
||||
// if(size == 0) return;
|
||||
// if(size == 1) return copy<_1>(dst, src);
|
||||
// if(size < 4) return copy<HeadTail<_2>>(dst, src, size);
|
||||
// if(size < 8) return copy<HeadTail<_4>>(dst, src, size);
|
||||
// if(size < 16) return copy<HeadTail<_8>>(dst, src, size);
|
||||
// if(size < 32) return copy<HeadTail<_16>>(dst, src, size);
|
||||
// return copy<Accelerator>(dst, src, size);
|
||||
// }
|
||||
|
||||
// The `Serialize` method turns a `NamedFunctionDescriptor` into a
|
||||
// `FunctionImplementation` which holds all the information needed to produce
|
||||
// the C++ source code.
|
||||
|
||||
// An Element with its size (e.g. `_16` in the example above).
|
||||
struct ElementType {
|
||||
size_t Size;
|
||||
};
|
||||
// The case `if(size == 0)` is encoded as a the Zero type.
|
||||
struct Zero {
|
||||
StringRef DefaultReturnValue;
|
||||
};
|
||||
// An individual size `if(size == X)` is encoded as an Individual type.
|
||||
struct Individual {
|
||||
size_t IfEq;
|
||||
ElementType Element;
|
||||
};
|
||||
// An overlap strategy is encoded as an Overlap type.
|
||||
struct Overlap {
|
||||
size_t IfLt;
|
||||
ElementType Element;
|
||||
};
|
||||
// A loop strategy is encoded as a Loop type.
|
||||
struct Loop {
|
||||
size_t IfLt;
|
||||
ElementType Element;
|
||||
};
|
||||
// An aligned loop strategy is encoded as an AlignedLoop type.
|
||||
struct AlignedLoop {
|
||||
size_t IfLt;
|
||||
ElementType Element;
|
||||
ElementType Alignment;
|
||||
StringRef AlignTo;
|
||||
};
|
||||
// The accelerator strategy.
|
||||
struct Accelerator {
|
||||
size_t IfLt;
|
||||
};
|
||||
// The Context stores data about the function type.
|
||||
struct Context {
|
||||
StringRef FunctionReturnType; // e.g. void* or int
|
||||
StringRef FunctionArgs;
|
||||
StringRef ElementOp; // copy, three_way_compare, splat_set, ...
|
||||
StringRef FixedSizeArgs;
|
||||
StringRef RuntimeSizeArgs;
|
||||
StringRef DefaultReturnValue;
|
||||
};
|
||||
// A detailed representation of the function implementation mapped from the
|
||||
// NamedFunctionDescriptor.
|
||||
struct FunctionImplementation {
|
||||
Context Ctx;
|
||||
StringRef Name;
|
||||
std::vector<Individual> Individuals;
|
||||
std::vector<Overlap> Overlaps;
|
||||
std::optional<Loop> Loop;
|
||||
std::optional<AlignedLoop> AlignedLoop;
|
||||
std::optional<Accelerator> Accelerator;
|
||||
ElementTypeClass ElementClass;
|
||||
};
|
||||
|
||||
// Returns the Context for each FunctionType.
|
||||
static Context getCtx(FunctionType FT) {
|
||||
switch (FT) {
|
||||
case FunctionType::MEMCPY:
|
||||
return {"void",
|
||||
"(char *__restrict dst, const char *__restrict src, size_t size)",
|
||||
"copy",
|
||||
"(dst, src)",
|
||||
"(dst, src, size)",
|
||||
""};
|
||||
case FunctionType::MEMCMP:
|
||||
return {"int",
|
||||
"(const char * lhs, const char * rhs, size_t size)",
|
||||
"three_way_compare",
|
||||
"(lhs, rhs)",
|
||||
"(lhs, rhs, size)",
|
||||
"0"};
|
||||
case FunctionType::MEMSET:
|
||||
return {"void",
|
||||
"(char * dst, int value, size_t size)",
|
||||
"splat_set",
|
||||
"(dst, value)",
|
||||
"(dst, value, size)",
|
||||
""};
|
||||
case FunctionType::BZERO:
|
||||
return {"void", "(char * dst, size_t size)",
|
||||
"splat_set", "(dst, 0)",
|
||||
"(dst, 0, size)", ""};
|
||||
default:
|
||||
report_fatal_error("Not yet implemented");
|
||||
}
|
||||
}
|
||||
|
||||
static StringRef getAligntoString(const AlignArg &AlignTo) {
|
||||
switch (AlignTo) {
|
||||
case AlignArg::_1:
|
||||
return "Arg::P1";
|
||||
case AlignArg::_2:
|
||||
return "Arg::P2";
|
||||
case AlignArg::ARRAY_SIZE:
|
||||
report_fatal_error("logic error");
|
||||
}
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const ElementType &E) {
|
||||
return Stream << '_' << E.Size;
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Individual &O) {
|
||||
return Stream << O.Element;
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Overlap &O) {
|
||||
return Stream << "HeadTail<" << O.Element << '>';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Loop &O) {
|
||||
return Stream << "Loop<" << O.Element << '>';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const AlignedLoop &O) {
|
||||
return Stream << "Align<" << O.Alignment << ',' << O.AlignTo << ">::Then<"
|
||||
<< Loop{O.IfLt, O.Element} << ">";
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Accelerator &O) {
|
||||
return Stream << "Accelerator";
|
||||
}
|
||||
|
||||
template <typename T> struct IfEq {
|
||||
StringRef Op;
|
||||
StringRef Args;
|
||||
const T ∈
|
||||
};
|
||||
|
||||
template <typename T> struct IfLt {
|
||||
StringRef Op;
|
||||
StringRef Args;
|
||||
const T ∈
|
||||
};
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Zero &O) {
|
||||
Stream << kIndent << "if(size == 0) return";
|
||||
if (!O.DefaultReturnValue.empty())
|
||||
Stream << ' ' << O.DefaultReturnValue;
|
||||
return Stream << ";\n";
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const IfEq<T> &O) {
|
||||
return Stream << kIndent << "if(size == " << O.Element.IfEq << ") return "
|
||||
<< O.Op << '<' << O.Element << '>' << O.Args << ";\n";
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const IfLt<T> &O) {
|
||||
Stream << kIndent;
|
||||
if (O.Element.IfLt != kMaxSize)
|
||||
Stream << "if(size < " << O.Element.IfLt << ") ";
|
||||
return Stream << "return " << O.Op << '<' << O.Element << '>' << O.Args
|
||||
<< ";\n";
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const ElementTypeClass &Class) {
|
||||
switch (Class) {
|
||||
case ElementTypeClass::SCALAR:
|
||||
return Stream << "scalar";
|
||||
case ElementTypeClass::BUILTIN:
|
||||
return Stream << "builtin";
|
||||
case ElementTypeClass::NATIVE:
|
||||
// FIXME: the framework should provide a `native` namespace that redirect to
|
||||
// x86, arm or other architectures.
|
||||
return Stream << "x86";
|
||||
}
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const FunctionImplementation &FI) {
|
||||
const auto &Ctx = FI.Ctx;
|
||||
Stream << "static " << Ctx.FunctionReturnType << ' ' << FI.Name
|
||||
<< Ctx.FunctionArgs << " {\n";
|
||||
Stream << kIndent << "using namespace LIBC_NAMESPACE::" << FI.ElementClass
|
||||
<< ";\n";
|
||||
for (const auto &I : FI.Individuals)
|
||||
if (I.Element.Size == 0)
|
||||
Stream << Zero{Ctx.DefaultReturnValue};
|
||||
else
|
||||
Stream << IfEq<Individual>{Ctx.ElementOp, Ctx.FixedSizeArgs, I};
|
||||
for (const auto &O : FI.Overlaps)
|
||||
Stream << IfLt<Overlap>{Ctx.ElementOp, Ctx.RuntimeSizeArgs, O};
|
||||
if (const auto &C = FI.Loop)
|
||||
Stream << IfLt<Loop>{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C};
|
||||
if (const auto &C = FI.AlignedLoop)
|
||||
Stream << IfLt<AlignedLoop>{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C};
|
||||
if (const auto &C = FI.Accelerator)
|
||||
Stream << IfLt<Accelerator>{Ctx.ElementOp, Ctx.RuntimeSizeArgs, *C};
|
||||
return Stream << "}\n";
|
||||
}
|
||||
|
||||
// Turns a `NamedFunctionDescriptor` into a `FunctionImplementation` unfolding
|
||||
// the contiguous and overlap region into several statements. The zero case is
|
||||
// also mapped to its own type.
|
||||
static FunctionImplementation
|
||||
getImplementation(const NamedFunctionDescriptor &NamedFD) {
|
||||
const FunctionDescriptor &FD = NamedFD.Desc;
|
||||
FunctionImplementation Impl;
|
||||
Impl.Ctx = getCtx(FD.Type);
|
||||
Impl.Name = NamedFD.Name;
|
||||
Impl.ElementClass = FD.ElementClass;
|
||||
if (auto C = FD.Contiguous)
|
||||
for (size_t I = C->Span.Begin; I < C->Span.End; ++I)
|
||||
Impl.Individuals.push_back(Individual{I, ElementType{I}});
|
||||
if (auto C = FD.Overlap)
|
||||
for (size_t I = C->Span.Begin; I < C->Span.End; I *= 2)
|
||||
Impl.Overlaps.push_back(Overlap{2 * I, ElementType{I}});
|
||||
if (const auto &L = FD.Loop)
|
||||
Impl.Loop = Loop{L->Span.End, ElementType{L->BlockSize}};
|
||||
if (const auto &AL = FD.AlignedLoop)
|
||||
Impl.AlignedLoop =
|
||||
AlignedLoop{AL->Loop.Span.End, ElementType{AL->Loop.BlockSize},
|
||||
ElementType{AL->Alignment}, getAligntoString(AL->AlignTo)};
|
||||
if (const auto &A = FD.Accelerator)
|
||||
Impl.Accelerator = Accelerator{A->Span.End};
|
||||
return Impl;
|
||||
}
|
||||
|
||||
static void Serialize(raw_ostream &Stream,
|
||||
ArrayRef<NamedFunctionDescriptor> Descriptors) {
|
||||
|
||||
for (const auto &FD : Descriptors)
|
||||
Stream << getImplementation(FD);
|
||||
}
|
||||
|
||||
} // namespace functions
|
||||
|
||||
namespace descriptors {
|
||||
|
||||
// This namespace generates the getFunctionDescriptors function:
|
||||
// -------------------------------------------------------------
|
||||
// e.g.
|
||||
// ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors() {
|
||||
// static constexpr NamedFunctionDescriptor kDescriptors[] = {
|
||||
// {"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,std::nullopt,std::nullopt,std::nullopt,std::nullopt,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}},
|
||||
// {"memcpy_0x8661D80472487AB5",{FunctionType::MEMCPY,Contiguous{{0,1}},std::nullopt,std::nullopt,std::nullopt,Accelerator{{1,kMaxSize}},ElementTypeClass::NATIVE}},
|
||||
// ...
|
||||
// };
|
||||
// return ArrayRef(kDescriptors);
|
||||
// }
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const SizeSpan &SS) {
|
||||
Stream << "{" << SS.Begin << ',';
|
||||
if (SS.End == kMaxSize)
|
||||
Stream << "kMaxSize";
|
||||
else
|
||||
Stream << SS.End;
|
||||
return Stream << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Contiguous &O) {
|
||||
return Stream << "Contiguous{" << O.Span << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Overlap &O) {
|
||||
return Stream << "Overlap{" << O.Span << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Loop &O) {
|
||||
return Stream << "Loop{" << O.Span << ',' << O.BlockSize << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const AlignArg &O) {
|
||||
switch (O) {
|
||||
case AlignArg::_1:
|
||||
return Stream << "AlignArg::_1";
|
||||
case AlignArg::_2:
|
||||
return Stream << "AlignArg::_2";
|
||||
case AlignArg::ARRAY_SIZE:
|
||||
report_fatal_error("logic error");
|
||||
}
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const AlignedLoop &O) {
|
||||
return Stream << "AlignedLoop{" << O.Loop << ',' << O.Alignment << ','
|
||||
<< O.AlignTo << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Accelerator &O) {
|
||||
return Stream << "Accelerator{" << O.Span << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const ElementTypeClass &O) {
|
||||
switch (O) {
|
||||
case ElementTypeClass::SCALAR:
|
||||
return Stream << "ElementTypeClass::SCALAR";
|
||||
case ElementTypeClass::BUILTIN:
|
||||
return Stream << "ElementTypeClass::BUILTIN";
|
||||
case ElementTypeClass::NATIVE:
|
||||
return Stream << "ElementTypeClass::NATIVE";
|
||||
}
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const FunctionType &T) {
|
||||
switch (T) {
|
||||
case FunctionType::MEMCPY:
|
||||
return Stream << "FunctionType::MEMCPY";
|
||||
case FunctionType::MEMCMP:
|
||||
return Stream << "FunctionType::MEMCMP";
|
||||
case FunctionType::BCMP:
|
||||
return Stream << "FunctionType::BCMP";
|
||||
case FunctionType::MEMSET:
|
||||
return Stream << "FunctionType::MEMSET";
|
||||
case FunctionType::BZERO:
|
||||
return Stream << "FunctionType::BZERO";
|
||||
}
|
||||
}
|
||||
template <typename T>
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const std::optional<T> &MaybeT) {
|
||||
if (MaybeT)
|
||||
return Stream << *MaybeT;
|
||||
return Stream << "std::nullopt";
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const FunctionDescriptor &FD) {
|
||||
return Stream << '{' << FD.Type << ',' << FD.Contiguous << ',' << FD.Overlap
|
||||
<< ',' << FD.Loop << ',' << FD.AlignedLoop << ','
|
||||
<< FD.Accelerator << ',' << FD.ElementClass << '}';
|
||||
}
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const NamedFunctionDescriptor &NFD) {
|
||||
return Stream << '{' << '"' << NFD.Name << '"' << ',' << NFD.Desc << '}';
|
||||
}
|
||||
template <typename T>
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const std::vector<T> &VectorT) {
|
||||
Stream << '{';
|
||||
bool First = true;
|
||||
for (const auto &Obj : VectorT) {
|
||||
if (!First)
|
||||
Stream << ',';
|
||||
Stream << Obj;
|
||||
First = false;
|
||||
}
|
||||
return Stream << '}';
|
||||
}
|
||||
|
||||
static void Serialize(raw_ostream &Stream,
|
||||
ArrayRef<NamedFunctionDescriptor> Descriptors) {
|
||||
Stream << R"(ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors() {
|
||||
static constexpr NamedFunctionDescriptor kDescriptors[] = {
|
||||
)";
|
||||
for (size_t I = 0, E = Descriptors.size(); I < E; ++I) {
|
||||
Stream << kIndent << kIndent << Descriptors[I] << ",\n";
|
||||
}
|
||||
Stream << R"( };
|
||||
return ArrayRef(kDescriptors);
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
} // namespace descriptors
|
||||
|
||||
namespace configurations {
|
||||
|
||||
// This namespace generates the getXXXConfigurations functions:
|
||||
// ------------------------------------------------------------
|
||||
// e.g.
|
||||
// llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations() {
|
||||
// using namespace LIBC_NAMESPACE;
|
||||
// static constexpr MemcpyConfiguration kConfigurations[] = {
|
||||
// {Wrap<memcpy_0xE00E29EE73994E2B>, "memcpy_0xE00E29EE73994E2B"},
|
||||
// {Wrap<memcpy_0x8661D80472487AB5>, "memcpy_0x8661D80472487AB5"},
|
||||
// ...
|
||||
// };
|
||||
// return llvm::ArrayRef(kConfigurations);
|
||||
// }
|
||||
|
||||
// The `Wrap` template function is provided in the `Main` function below.
|
||||
// It is used to adapt the gnerated code to the prototype of the C function.
|
||||
// For instance, the generated code for a `memcpy` takes `char*` pointers and
|
||||
// returns nothing but the original C `memcpy` function take and returns `void*`
|
||||
// pointers.
|
||||
|
||||
struct FunctionName {
|
||||
FunctionType ForType;
|
||||
};
|
||||
|
||||
struct ReturnType {
|
||||
FunctionType ForType;
|
||||
};
|
||||
|
||||
struct Configuration {
|
||||
FunctionName Name;
|
||||
ReturnType Type;
|
||||
std::vector<const NamedFunctionDescriptor *> Descriptors;
|
||||
};
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const FunctionName &FN) {
|
||||
switch (FN.ForType) {
|
||||
case FunctionType::MEMCPY:
|
||||
return Stream << "getMemcpyConfigurations";
|
||||
case FunctionType::MEMCMP:
|
||||
return Stream << "getMemcmpConfigurations";
|
||||
case FunctionType::BCMP:
|
||||
return Stream << "getBcmpConfigurations";
|
||||
case FunctionType::MEMSET:
|
||||
return Stream << "getMemsetConfigurations";
|
||||
case FunctionType::BZERO:
|
||||
return Stream << "getBzeroConfigurations";
|
||||
}
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const ReturnType &RT) {
|
||||
switch (RT.ForType) {
|
||||
case FunctionType::MEMCPY:
|
||||
return Stream << "MemcpyConfiguration";
|
||||
case FunctionType::MEMCMP:
|
||||
case FunctionType::BCMP:
|
||||
return Stream << "MemcmpOrBcmpConfiguration";
|
||||
case FunctionType::MEMSET:
|
||||
return Stream << "MemsetConfiguration";
|
||||
case FunctionType::BZERO:
|
||||
return Stream << "BzeroConfiguration";
|
||||
}
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream,
|
||||
const NamedFunctionDescriptor *FD) {
|
||||
return Stream << formatv("{Wrap<{0}>, \"{0}\"}", FD->Name);
|
||||
}
|
||||
|
||||
static raw_ostream &
|
||||
operator<<(raw_ostream &Stream,
|
||||
const std::vector<const NamedFunctionDescriptor *> &Descriptors) {
|
||||
for (size_t I = 0, E = Descriptors.size(); I < E; ++I)
|
||||
Stream << kIndent << kIndent << Descriptors[I] << ",\n";
|
||||
return Stream;
|
||||
}
|
||||
|
||||
static raw_ostream &operator<<(raw_ostream &Stream, const Configuration &C) {
|
||||
Stream << "llvm::ArrayRef<" << C.Type << "> " << C.Name << "() {\n";
|
||||
if (C.Descriptors.empty())
|
||||
Stream << kIndent << "return {};\n";
|
||||
else {
|
||||
Stream << kIndent << "using namespace LIBC_NAMESPACE;\n";
|
||||
Stream << kIndent << "static constexpr " << C.Type
|
||||
<< " kConfigurations[] = {\n";
|
||||
Stream << C.Descriptors;
|
||||
Stream << kIndent << "};\n";
|
||||
Stream << kIndent << "return llvm::ArrayRef(kConfigurations);\n";
|
||||
}
|
||||
Stream << "}\n";
|
||||
return Stream;
|
||||
}
|
||||
|
||||
static void Serialize(raw_ostream &Stream, FunctionType FT,
|
||||
ArrayRef<NamedFunctionDescriptor> Descriptors) {
|
||||
Configuration Conf;
|
||||
Conf.Name = {FT};
|
||||
Conf.Type = {FT};
|
||||
for (const auto &FD : Descriptors)
|
||||
if (FD.Desc.Type == FT)
|
||||
Conf.Descriptors.push_back(&FD);
|
||||
Stream << Conf;
|
||||
}
|
||||
|
||||
} // namespace configurations
|
||||
static void Serialize(raw_ostream &Stream,
|
||||
ArrayRef<NamedFunctionDescriptor> Descriptors) {
|
||||
Stream << "// This file is auto-generated by libc/benchmarks/automemcpy.\n";
|
||||
Stream << "// Functions : " << Descriptors.size() << "\n";
|
||||
Stream << "\n";
|
||||
Stream << "#include \"LibcFunctionPrototypes.h\"\n";
|
||||
Stream << "#include \"automemcpy/FunctionDescriptor.h\"\n";
|
||||
Stream << "#include \"src/string/memory_utils/elements.h\"\n";
|
||||
Stream << "\n";
|
||||
Stream << "using llvm::libc_benchmarks::BzeroConfiguration;\n";
|
||||
Stream << "using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;\n";
|
||||
Stream << "using llvm::libc_benchmarks::MemcpyConfiguration;\n";
|
||||
Stream << "using llvm::libc_benchmarks::MemmoveConfiguration;\n";
|
||||
Stream << "using llvm::libc_benchmarks::MemsetConfiguration;\n";
|
||||
Stream << "\n";
|
||||
Stream << "namespace LIBC_NAMESPACE_DECL {\n";
|
||||
Stream << "\n";
|
||||
codegen::functions::Serialize(Stream, Descriptors);
|
||||
Stream << "\n";
|
||||
Stream << "} // namespace LIBC_NAMESPACE_DECL\n";
|
||||
Stream << "\n";
|
||||
Stream << "namespace llvm {\n";
|
||||
Stream << "namespace automemcpy {\n";
|
||||
Stream << "\n";
|
||||
codegen::descriptors::Serialize(Stream, Descriptors);
|
||||
Stream << "\n";
|
||||
Stream << "} // namespace automemcpy\n";
|
||||
Stream << "} // namespace llvm\n";
|
||||
Stream << "\n";
|
||||
Stream << R"(
|
||||
using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t);
|
||||
template <MemcpyStub Foo>
|
||||
void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) {
|
||||
Foo(reinterpret_cast<char *__restrict>(dst),
|
||||
reinterpret_cast<const char *__restrict>(src), size);
|
||||
return dst;
|
||||
}
|
||||
)";
|
||||
codegen::configurations::Serialize(Stream, FunctionType::MEMCPY, Descriptors);
|
||||
Stream << R"(
|
||||
using MemcmpStub = int (*)(const char *, const char *, size_t);
|
||||
template <MemcmpStub Foo>
|
||||
int Wrap(const void *lhs, const void *rhs, size_t size) {
|
||||
return Foo(reinterpret_cast<const char *>(lhs),
|
||||
reinterpret_cast<const char *>(rhs), size);
|
||||
}
|
||||
)";
|
||||
codegen::configurations::Serialize(Stream, FunctionType::MEMCMP, Descriptors);
|
||||
codegen::configurations::Serialize(Stream, FunctionType::BCMP, Descriptors);
|
||||
Stream << R"(
|
||||
using MemsetStub = void (*)(char *, int, size_t);
|
||||
template <MemsetStub Foo> void *Wrap(void *dst, int value, size_t size) {
|
||||
Foo(reinterpret_cast<char *>(dst), value, size);
|
||||
return dst;
|
||||
}
|
||||
)";
|
||||
codegen::configurations::Serialize(Stream, FunctionType::MEMSET, Descriptors);
|
||||
Stream << R"(
|
||||
using BzeroStub = void (*)(char *, size_t);
|
||||
template <BzeroStub Foo> void Wrap(void *dst, size_t size) {
|
||||
Foo(reinterpret_cast<char *>(dst), size);
|
||||
}
|
||||
)";
|
||||
codegen::configurations::Serialize(Stream, FunctionType::BZERO, Descriptors);
|
||||
Stream << R"(
|
||||
llvm::ArrayRef<MemmoveConfiguration> getMemmoveConfigurations() {
|
||||
return {};
|
||||
}
|
||||
)";
|
||||
Stream << "// Functions : " << Descriptors.size() << "\n";
|
||||
}
|
||||
|
||||
} // namespace codegen
|
||||
|
||||
// Stores `VolatileStr` into a cache and returns a StringRef of the cached
|
||||
// version.
|
||||
StringRef getInternalizedString(std::string VolatileStr) {
|
||||
static llvm::StringSet StringCache;
|
||||
return StringCache.insert(std::move(VolatileStr)).first->getKey();
|
||||
}
|
||||
|
||||
static StringRef getString(FunctionType FT) {
|
||||
switch (FT) {
|
||||
case FunctionType::MEMCPY:
|
||||
return "memcpy";
|
||||
case FunctionType::MEMCMP:
|
||||
return "memcmp";
|
||||
case FunctionType::BCMP:
|
||||
return "bcmp";
|
||||
case FunctionType::MEMSET:
|
||||
return "memset";
|
||||
case FunctionType::BZERO:
|
||||
return "bzero";
|
||||
}
|
||||
}
|
||||
|
||||
void Serialize(raw_ostream &Stream, ArrayRef<FunctionDescriptor> Descriptors) {
|
||||
std::vector<NamedFunctionDescriptor> FunctionDescriptors;
|
||||
FunctionDescriptors.reserve(Descriptors.size());
|
||||
for (auto &FD : Descriptors) {
|
||||
FunctionDescriptors.emplace_back();
|
||||
FunctionDescriptors.back().Name = getInternalizedString(
|
||||
formatv("{0}_{1:X16}", getString(FD.Type), FD.id()));
|
||||
FunctionDescriptors.back().Desc = std::move(FD);
|
||||
}
|
||||
// Sort functions so they are easier to spot in the generated C++ file.
|
||||
std::sort(FunctionDescriptors.begin(), FunctionDescriptors.end(),
|
||||
[](const NamedFunctionDescriptor &A,
|
||||
const NamedFunctionDescriptor &B) { return A.Desc < B.Desc; });
|
||||
codegen::Serialize(Stream, FunctionDescriptors);
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
@@ -1,29 +0,0 @@
|
||||
#include "automemcpy/CodeGen.h"
|
||||
#include "automemcpy/RandomFunctionGenerator.h"
|
||||
#include <optional>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
std::vector<FunctionDescriptor> generateFunctionDescriptors() {
|
||||
std::unordered_set<FunctionDescriptor, FunctionDescriptor::Hasher> Seen;
|
||||
std::vector<FunctionDescriptor> FunctionDescriptors;
|
||||
RandomFunctionGenerator P;
|
||||
while (std::optional<FunctionDescriptor> MaybeFD = P.next()) {
|
||||
FunctionDescriptor FD = *MaybeFD;
|
||||
if (Seen.count(FD)) // FIXME: Z3 sometimes returns twice the same object.
|
||||
continue;
|
||||
Seen.insert(FD);
|
||||
FunctionDescriptors.push_back(std::move(FD));
|
||||
}
|
||||
return FunctionDescriptors;
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
int main(int, char **) {
|
||||
llvm::automemcpy::Serialize(llvm::outs(),
|
||||
llvm::automemcpy::generateFunctionDescriptors());
|
||||
}
|
||||
@@ -1,280 +0,0 @@
|
||||
//===-- Generate random but valid function descriptors -------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "automemcpy/RandomFunctionGenerator.h"
|
||||
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
|
||||
#include <optional>
|
||||
#include <set>
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
// Exploration parameters
|
||||
// ----------------------
|
||||
// Here we define a set of values that will contraint the exploration and
|
||||
// limit combinatorial explosion.
|
||||
|
||||
// We limit the number of cases for individual sizes to sizes up to 4.
|
||||
// More individual sizes don't bring much over the overlapping strategy.
|
||||
static constexpr int kMaxIndividualSize = 4;
|
||||
|
||||
// We limit Overlapping Strategy to sizes up to 256.
|
||||
// An overlap of 256B means accessing 128B at once which is usually not
|
||||
// feasible by current CPUs. We rely on the compiler to generate multiple
|
||||
// loads/stores if needed but higher sizes are unlikely to benefit from hardware
|
||||
// acceleration.
|
||||
static constexpr int kMaxOverlapSize = 256;
|
||||
|
||||
// For the loop strategies, we make sure that they iterate at least a certain
|
||||
// number of times to amortize the cost of looping.
|
||||
static constexpr int kLoopMinIter = 3;
|
||||
static constexpr int kAlignedLoopMinIter = 2;
|
||||
|
||||
// We restrict the size of the block of data to handle in a loop.
|
||||
// Generally speaking block size <= 16 perform poorly.
|
||||
static constexpr int kLoopBlockSize[] = {16, 32, 64};
|
||||
|
||||
// We restrict alignment to the following values.
|
||||
static constexpr int kLoopAlignments[] = {16, 32, 64};
|
||||
|
||||
// We make sure that the region bounds are one of the following values.
|
||||
static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48,
|
||||
64, 96, 128, 256, 512, 1024, kMaxSize};
|
||||
|
||||
// We also allow disabling loops, aligned loops and accelerators.
|
||||
static constexpr bool kDisableLoop = false;
|
||||
static constexpr bool kDisableAlignedLoop = false;
|
||||
static constexpr bool kDisableAccelerator = false;
|
||||
|
||||
// For memcpy, we can also explore whether aligning on source or destination has
|
||||
// an effect.
|
||||
static constexpr bool kExploreAlignmentArg = true;
|
||||
|
||||
// The function we generate code for.
|
||||
// BCMP is specifically disabled for now.
|
||||
static constexpr int kFunctionTypes[] = {
|
||||
(int)FunctionType::MEMCPY,
|
||||
(int)FunctionType::MEMCMP,
|
||||
// (int)FunctionType::BCMP,
|
||||
(int)FunctionType::MEMSET,
|
||||
(int)FunctionType::BZERO,
|
||||
};
|
||||
|
||||
// The actual implementation of each function can be handled via primitive types
|
||||
// (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN).
|
||||
// We want to move toward delegating the code generation entirely to the
|
||||
// compiler but for now we have to make use of -per microarchitecture- custom
|
||||
// implementations. Scalar being more portable but also less performant, we
|
||||
// remove it as well.
|
||||
static constexpr int kElementClasses[] = {
|
||||
// (int)ElementTypeClass::SCALAR,
|
||||
(int)ElementTypeClass::NATIVE,
|
||||
// (int)ElementTypeClass::BUILTIN
|
||||
};
|
||||
|
||||
RandomFunctionGenerator::RandomFunctionGenerator()
|
||||
: Solver(Context), Type(Context.int_const("Type")),
|
||||
ContiguousBegin(Context.int_const("ContiguousBegin")),
|
||||
ContiguousEnd(Context.int_const("ContiguousEnd")),
|
||||
OverlapBegin(Context.int_const("OverlapBegin")),
|
||||
OverlapEnd(Context.int_const("OverlapEnd")),
|
||||
LoopBegin(Context.int_const("LoopBegin")),
|
||||
LoopEnd(Context.int_const("LoopEnd")),
|
||||
LoopBlockSize(Context.int_const("LoopBlockSize")),
|
||||
AlignedLoopBegin(Context.int_const("AlignedLoopBegin")),
|
||||
AlignedLoopEnd(Context.int_const("AlignedLoopEnd")),
|
||||
AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")),
|
||||
AlignedAlignment(Context.int_const("AlignedAlignment")),
|
||||
AlignedArg(Context.int_const("AlignedArg")),
|
||||
AcceleratorBegin(Context.int_const("AcceleratorBegin")),
|
||||
AcceleratorEnd(Context.int_const("AcceleratorEnd")),
|
||||
ElementClass(Context.int_const("ElementClass")) {
|
||||
// All possible functions.
|
||||
Solver.add(inSetConstraint(Type, kFunctionTypes));
|
||||
|
||||
// Add constraints for region bounds.
|
||||
addBoundsAndAnchors(ContiguousBegin, ContiguousEnd);
|
||||
addBoundsAndAnchors(OverlapBegin, OverlapEnd);
|
||||
addBoundsAndAnchors(LoopBegin, LoopEnd);
|
||||
addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd);
|
||||
addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd);
|
||||
// We always consider strategies in this order, and we
|
||||
// always end with the `Accelerator` strategy, as it's typically more
|
||||
// efficient for large sizes.
|
||||
// Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator
|
||||
Solver.add(ContiguousEnd == OverlapBegin);
|
||||
Solver.add(OverlapEnd == LoopBegin);
|
||||
Solver.add(LoopEnd == AlignedLoopBegin);
|
||||
Solver.add(AlignedLoopEnd == AcceleratorBegin);
|
||||
// Fix endpoints: The minimum size that we want to copy is 0, and we always
|
||||
// start with the `Contiguous` strategy. The max size is `kMaxSize`.
|
||||
Solver.add(ContiguousBegin == 0);
|
||||
Solver.add(AcceleratorEnd == kMaxSize);
|
||||
// Contiguous
|
||||
Solver.add(ContiguousEnd <= kMaxIndividualSize + 1);
|
||||
// Overlap
|
||||
Solver.add(OverlapEnd <= kMaxOverlapSize + 1);
|
||||
// Overlap only ever makes sense when accessing multiple bytes at a time.
|
||||
// i.e. Overlap<1> is useless.
|
||||
Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2);
|
||||
// Loop
|
||||
addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter);
|
||||
// Aligned Loop
|
||||
addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize,
|
||||
kAlignedLoopMinIter);
|
||||
Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments));
|
||||
Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64);
|
||||
Solver.add(AlignedLoopBlockSize >= AlignedAlignment);
|
||||
Solver.add(AlignedLoopBlockSize >= LoopBlockSize);
|
||||
z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY;
|
||||
z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg;
|
||||
Solver.add(
|
||||
(ExploreAlignment &&
|
||||
inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) ||
|
||||
(!ExploreAlignment && AlignedArg == (int)AlignArg::_1));
|
||||
// Accelerator
|
||||
Solver.add(IsMemcpy ||
|
||||
(AcceleratorBegin ==
|
||||
AcceleratorEnd)); // Only Memcpy has accelerator for now.
|
||||
// Element classes
|
||||
Solver.add(inSetConstraint(ElementClass, kElementClasses));
|
||||
|
||||
if (kDisableLoop)
|
||||
Solver.add(LoopBegin == LoopEnd);
|
||||
if (kDisableAlignedLoop)
|
||||
Solver.add(AlignedLoopBegin == AlignedLoopEnd);
|
||||
if (kDisableAccelerator)
|
||||
Solver.add(AcceleratorBegin == AcceleratorEnd);
|
||||
}
|
||||
|
||||
// Creates SizeSpan from Begin/End values.
|
||||
// Returns std::nullopt if Begin==End.
|
||||
static std::optional<SizeSpan> AsSizeSpan(size_t Begin, size_t End) {
|
||||
if (Begin == End)
|
||||
return std::nullopt;
|
||||
SizeSpan SS;
|
||||
SS.Begin = Begin;
|
||||
SS.End = End;
|
||||
return SS;
|
||||
}
|
||||
|
||||
// Generic method to create a `Region` struct with a Span or std::nullopt if
|
||||
// span is empty.
|
||||
template <typename Region>
|
||||
static std::optional<Region> As(size_t Begin, size_t End) {
|
||||
if (auto Span = AsSizeSpan(Begin, End)) {
|
||||
Region Output;
|
||||
Output.Span = *Span;
|
||||
return Output;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Returns a Loop struct or std::nullopt if span is empty.
|
||||
static std::optional<Loop> AsLoop(size_t Begin, size_t End, size_t BlockSize) {
|
||||
if (auto Span = AsSizeSpan(Begin, End)) {
|
||||
Loop Output;
|
||||
Output.Span = *Span;
|
||||
Output.BlockSize = BlockSize;
|
||||
return Output;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Returns an AlignedLoop struct or std::nullopt if span is empty.
|
||||
static std::optional<AlignedLoop> AsAlignedLoop(size_t Begin, size_t End,
|
||||
size_t BlockSize,
|
||||
size_t Alignment,
|
||||
AlignArg AlignTo) {
|
||||
if (auto Loop = AsLoop(Begin, End, BlockSize)) {
|
||||
AlignedLoop Output;
|
||||
Output.Loop = *Loop;
|
||||
Output.Alignment = Alignment;
|
||||
Output.AlignTo = AlignTo;
|
||||
return Output;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<FunctionDescriptor> RandomFunctionGenerator::next() {
|
||||
if (Solver.check() != z3::sat)
|
||||
return {};
|
||||
|
||||
z3::model m = Solver.get_model();
|
||||
|
||||
// Helper method to get the current numerical value of a z3::expr.
|
||||
const auto E = [&m](z3::expr &V) -> int {
|
||||
return m.eval(V).get_numeral_int();
|
||||
};
|
||||
|
||||
// Fill is the function descriptor to return.
|
||||
FunctionDescriptor R;
|
||||
R.Type = FunctionType(E(Type));
|
||||
R.Contiguous = As<Contiguous>(E(ContiguousBegin), E(ContiguousEnd));
|
||||
R.Overlap = As<Overlap>(E(OverlapBegin), E(OverlapEnd));
|
||||
R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize));
|
||||
R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd),
|
||||
E(AlignedLoopBlockSize), E(AlignedAlignment),
|
||||
AlignArg(E(AlignedArg)));
|
||||
R.Accelerator = As<Accelerator>(E(AcceleratorBegin), E(AcceleratorEnd));
|
||||
R.ElementClass = ElementTypeClass(E(ElementClass));
|
||||
|
||||
// Express current state as a set of constraints.
|
||||
z3::expr CurrentLayout =
|
||||
(Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) &&
|
||||
(ContiguousEnd == E(ContiguousEnd)) &&
|
||||
(OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) &&
|
||||
(LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) &&
|
||||
(LoopBlockSize == E(LoopBlockSize)) &&
|
||||
(AlignedLoopBegin == E(AlignedLoopBegin)) &&
|
||||
(AlignedLoopEnd == E(AlignedLoopEnd)) &&
|
||||
(AlignedLoopBlockSize == E(AlignedLoopBlockSize)) &&
|
||||
(AlignedAlignment == E(AlignedAlignment)) &&
|
||||
(AlignedArg == E(AlignedArg)) &&
|
||||
(AcceleratorBegin == E(AcceleratorBegin)) &&
|
||||
(AcceleratorEnd == E(AcceleratorEnd)) &&
|
||||
(ElementClass == E(ElementClass));
|
||||
|
||||
// Ask solver to never show this configuration ever again.
|
||||
Solver.add(!CurrentLayout);
|
||||
return R;
|
||||
}
|
||||
|
||||
// Make sure `Variable` is one of the provided values.
|
||||
z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable,
|
||||
ArrayRef<int> Values) const {
|
||||
z3::expr_vector Args(Variable.ctx());
|
||||
for (int Value : Values)
|
||||
Args.push_back(Variable == Value);
|
||||
return z3::mk_or(Args);
|
||||
}
|
||||
|
||||
void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin,
|
||||
z3::expr &End) {
|
||||
// Begin and End are picked amongst a set of predefined values.
|
||||
Solver.add(inSetConstraint(Begin, kAnchors));
|
||||
Solver.add(inSetConstraint(End, kAnchors));
|
||||
Solver.add(Begin >= 0);
|
||||
Solver.add(Begin <= End);
|
||||
Solver.add(End <= kMaxSize);
|
||||
}
|
||||
|
||||
void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin,
|
||||
const z3::expr &LoopEnd,
|
||||
z3::expr &LoopBlockSize,
|
||||
int LoopMinIter) {
|
||||
Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize));
|
||||
Solver.add(LoopBegin == LoopEnd ||
|
||||
(LoopBegin > (LoopMinIter * LoopBlockSize)));
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
@@ -1,204 +0,0 @@
|
||||
//===-- Analyze benchmark JSON files --------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This code analyzes the json file produced by the `automemcpy` binary.
|
||||
//
|
||||
// As a remainder, `automemcpy` will benchmark each autogenerated memory
|
||||
// functions against one of the predefined distributions available in the
|
||||
// `libc/benchmarks/distributions` folder.
|
||||
//
|
||||
// It works as follows:
|
||||
// - Reads one or more json files.
|
||||
// - If there are several runs for the same function and distribution, picks the
|
||||
// median throughput (aka `BytesPerSecond`).
|
||||
// - Aggregates the throughput per distributions and scores them from worst (0)
|
||||
// to best (1).
|
||||
// - Each distribution categorizes each function into one of the following
|
||||
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
|
||||
// BAD.
|
||||
// - A process similar to the Majority Judgment voting system is used to `elect`
|
||||
// the best function. The histogram of grades is returned so we can
|
||||
// distinguish between functions with the same final grade. In the following
|
||||
// example both functions grade EXCELLENT but we may prefer the second one.
|
||||
//
|
||||
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
|
||||
// |------------|-----------|-----------|------|----------| ...
|
||||
// | Function_1 | 7 | 1 | 2 | | ...
|
||||
// | Function_2 | 6 | 4 | | | ...
|
||||
|
||||
#include "automemcpy/ResultAnalyzer.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include <numeric>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace automemcpy {
|
||||
|
||||
StringRef Grade::getString(const GradeEnum &GE) {
|
||||
switch (GE) {
|
||||
case EXCELLENT:
|
||||
return "EXCELLENT";
|
||||
case VERY_GOOD:
|
||||
return "VERY_GOOD";
|
||||
case GOOD:
|
||||
return "GOOD";
|
||||
case PASSABLE:
|
||||
return "PASSABLE";
|
||||
case INADEQUATE:
|
||||
return "INADEQUATE";
|
||||
case MEDIOCRE:
|
||||
return "MEDIOCRE";
|
||||
case BAD:
|
||||
return "BAD";
|
||||
case ARRAY_SIZE:
|
||||
report_fatal_error("logic error");
|
||||
}
|
||||
}
|
||||
|
||||
Grade::GradeEnum Grade::judge(double Score) {
|
||||
if (Score >= 6. / 7)
|
||||
return EXCELLENT;
|
||||
if (Score >= 5. / 7)
|
||||
return VERY_GOOD;
|
||||
if (Score >= 4. / 7)
|
||||
return GOOD;
|
||||
if (Score >= 3. / 7)
|
||||
return PASSABLE;
|
||||
if (Score >= 2. / 7)
|
||||
return INADEQUATE;
|
||||
if (Score >= 1. / 7)
|
||||
return MEDIOCRE;
|
||||
return BAD;
|
||||
}
|
||||
|
||||
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
|
||||
const double SampleMean) {
|
||||
assert(!Samples.empty());
|
||||
if (Samples.size() == 1)
|
||||
return 0;
|
||||
double DiffSquaresSum = 0;
|
||||
for (const double S : Samples) {
|
||||
const double Diff = S - SampleMean;
|
||||
DiffSquaresSum += Diff * Diff;
|
||||
}
|
||||
return DiffSquaresSum / (Samples.size() - 1);
|
||||
}
|
||||
|
||||
static void processPerDistributionData(PerDistributionData &Data) {
|
||||
auto &Samples = Data.BytesPerSecondSamples;
|
||||
assert(!Samples.empty());
|
||||
// Sample Mean
|
||||
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
|
||||
Data.BytesPerSecondMean = Sum / Samples.size();
|
||||
// Unbiased Sample Variance
|
||||
Data.BytesPerSecondVariance =
|
||||
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
|
||||
// Median
|
||||
const size_t HalfSize = Samples.size() / 2;
|
||||
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
|
||||
Data.BytesPerSecondMedian = Samples[HalfSize];
|
||||
}
|
||||
|
||||
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
|
||||
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
|
||||
for (const auto &S : Samples) {
|
||||
if (S.Type != SampleType::ITERATION)
|
||||
break;
|
||||
auto &Function = Functions[S.Id.Function];
|
||||
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
|
||||
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
|
||||
}
|
||||
|
||||
std::vector<FunctionData> Output;
|
||||
for (auto &[FunctionId, Function] : Functions) {
|
||||
Function.Id = FunctionId;
|
||||
for (auto &Pair : Function.PerDistributionData)
|
||||
processPerDistributionData(Pair.second);
|
||||
Output.push_back(std::move(Function));
|
||||
}
|
||||
return Output;
|
||||
}
|
||||
|
||||
void fillScores(MutableArrayRef<FunctionData> Functions) {
|
||||
// A key to bucket throughput per function type and distribution.
|
||||
struct Key {
|
||||
FunctionType Type;
|
||||
StringRef Distribution;
|
||||
|
||||
COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
|
||||
};
|
||||
|
||||
// Tracks minimum and maximum values.
|
||||
struct MinMax {
|
||||
double Min = std::numeric_limits<double>::max();
|
||||
double Max = std::numeric_limits<double>::min();
|
||||
void update(double Value) {
|
||||
if (Value < Min)
|
||||
Min = Value;
|
||||
if (Value > Max)
|
||||
Max = Value;
|
||||
}
|
||||
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
|
||||
};
|
||||
|
||||
std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
|
||||
for (const auto &Function : Functions) {
|
||||
const FunctionType Type = Function.Id.Type;
|
||||
for (const auto &Pair : Function.PerDistributionData) {
|
||||
const auto &Distribution = Pair.getKey();
|
||||
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
||||
const Key K{Type, Distribution};
|
||||
ThroughputMinMax[K].update(Throughput);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &Function : Functions) {
|
||||
const FunctionType Type = Function.Id.Type;
|
||||
for (const auto &Pair : Function.PerDistributionData) {
|
||||
const auto &Distribution = Pair.getKey();
|
||||
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
||||
const Key K{Type, Distribution};
|
||||
Function.PerDistributionData[Distribution].Score =
|
||||
ThroughputMinMax[K].normalize(Throughput);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void castVotes(MutableArrayRef<FunctionData> Functions) {
|
||||
for (FunctionData &Function : Functions) {
|
||||
Function.ScoresGeoMean = 1.0;
|
||||
for (const auto &Pair : Function.PerDistributionData) {
|
||||
const StringRef Distribution = Pair.getKey();
|
||||
const double Score = Pair.getValue().Score;
|
||||
Function.ScoresGeoMean *= Score;
|
||||
const auto G = Grade::judge(Score);
|
||||
++(Function.GradeHisto[G]);
|
||||
Function.PerDistributionData[Distribution].Grade = G;
|
||||
}
|
||||
}
|
||||
|
||||
for (FunctionData &Function : Functions) {
|
||||
const auto &GradeHisto = Function.GradeHisto;
|
||||
const size_t Votes =
|
||||
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
|
||||
const size_t MedianVote = Votes / 2;
|
||||
size_t CountedVotes = 0;
|
||||
Grade::GradeEnum MedianGrade = Grade::BAD;
|
||||
for (size_t I = 0; I < GradeHisto.size(); ++I) {
|
||||
CountedVotes += GradeHisto[I];
|
||||
if (CountedVotes > MedianVote) {
|
||||
MedianGrade = Grade::GradeEnum(I);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Function.FinalGrade = MedianGrade;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
@@ -1,175 +0,0 @@
|
||||
//===-- Application to analyze benchmark JSON files -----------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "automemcpy/ResultAnalyzer.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/JSON.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
// User can specify one or more json filenames to process on the command line.
|
||||
static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
|
||||
cl::desc("<input json files>"));
|
||||
|
||||
// User can filter the distributions to be taken into account.
|
||||
static cl::list<std::string>
|
||||
KeepOnlyDistributions("keep-only-distributions",
|
||||
cl::desc("<comma separated list of distribution "
|
||||
"names, keeps all if unspecified>"));
|
||||
|
||||
namespace automemcpy {
|
||||
|
||||
// This is defined in the autogenerated 'Implementations.cpp' file.
|
||||
extern ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors();
|
||||
|
||||
// Iterates over all functions and fills a map of function name to function
|
||||
// descriptor pointers.
|
||||
static StringMap<const FunctionDescriptor *> createFunctionDescriptorMap() {
|
||||
StringMap<const FunctionDescriptor *> Descriptors;
|
||||
for (const NamedFunctionDescriptor &FD : getFunctionDescriptors())
|
||||
Descriptors.insert_or_assign(FD.Name, &FD.Desc);
|
||||
return Descriptors;
|
||||
}
|
||||
|
||||
// Retrieves the function descriptor for a particular function name.
|
||||
static const FunctionDescriptor &getFunctionDescriptor(StringRef FunctionName) {
|
||||
static StringMap<const FunctionDescriptor *> Descriptors =
|
||||
createFunctionDescriptorMap();
|
||||
const auto *FD = Descriptors.lookup(FunctionName);
|
||||
if (!FD)
|
||||
report_fatal_error(
|
||||
Twine("No FunctionDescriptor for ").concat(FunctionName));
|
||||
return *FD;
|
||||
}
|
||||
|
||||
// Functions and distributions names are stored quite a few times so it's more
|
||||
// efficient to internalize these strings and refer to them through 'StringRef'.
|
||||
static StringRef getInternalizedString(StringRef VolatileStr) {
|
||||
static llvm::StringSet StringCache;
|
||||
return StringCache.insert(VolatileStr).first->getKey();
|
||||
}
|
||||
|
||||
// Helper function for the LLVM JSON API.
|
||||
bool fromJSON(const json::Value &V, Sample &Out, json::Path P) {
|
||||
std::string Label;
|
||||
std::string RunType;
|
||||
json::ObjectMapper O(V, P);
|
||||
if (O && O.map("bytes_per_second", Out.BytesPerSecond) &&
|
||||
O.map("run_type", RunType) && O.map("label", Label)) {
|
||||
const auto LabelPair = StringRef(Label).split(',');
|
||||
Out.Id.Function.Name = getInternalizedString(LabelPair.first);
|
||||
Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type;
|
||||
Out.Id.Distribution.Name = getInternalizedString(LabelPair.second);
|
||||
Out.Type = StringSwitch<SampleType>(RunType)
|
||||
.Case("aggregate", SampleType::AGGREGATE)
|
||||
.Case("iteration", SampleType::ITERATION);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// An object to represent the content of the JSON file.
|
||||
// This is easier to parse/serialize JSON when the structures of the json file
|
||||
// maps the structure of the object.
|
||||
struct JsonFile {
|
||||
std::vector<Sample> Samples;
|
||||
};
|
||||
|
||||
// Helper function for the LLVM JSON API.
|
||||
bool fromJSON(const json::Value &V, JsonFile &JF, json::Path P) {
|
||||
json::ObjectMapper O(V, P);
|
||||
return O && O.map("benchmarks", JF.Samples);
|
||||
}
|
||||
|
||||
// Global object to ease error reporting, it consumes errors and crash the
|
||||
// application with a meaningful message.
|
||||
static ExitOnError ExitOnErr;
|
||||
|
||||
// Main JSON parsing method. Reads the content of the file pointed to by
|
||||
// 'Filename' and returns a JsonFile object.
|
||||
JsonFile parseJsonResultFile(StringRef Filename) {
|
||||
auto Buf = ExitOnErr(errorOrToExpected(
|
||||
MemoryBuffer::getFile(Filename, /*bool IsText=*/true,
|
||||
/*RequiresNullTerminator=*/false)));
|
||||
auto JsonValue = ExitOnErr(json::parse(Buf->getBuffer()));
|
||||
json::Path::Root Root;
|
||||
JsonFile JF;
|
||||
if (!fromJSON(JsonValue, JF, Root))
|
||||
ExitOnErr(Root.getError());
|
||||
return JF;
|
||||
}
|
||||
|
||||
// Serializes the 'GradeHisto' to the provided 'Stream'.
|
||||
static void Serialize(raw_ostream &Stream, const GradeHistogram &GH) {
|
||||
static constexpr std::array<StringRef, 9> kCharacters = {
|
||||
" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
|
||||
|
||||
const size_t Max = *std::max_element(GH.begin(), GH.end());
|
||||
for (size_t I = 0; I < GH.size(); ++I) {
|
||||
size_t Index = (float(GH[I]) / Max) * (kCharacters.size() - 1);
|
||||
Stream << kCharacters.at(Index);
|
||||
}
|
||||
}
|
||||
|
||||
int Main(int argc, char **argv) {
|
||||
ExitOnErr.setBanner("Automemcpy Json Results Analyzer stopped with error: ");
|
||||
cl::ParseCommandLineOptions(argc, argv, "Automemcpy Json Results Analyzer\n");
|
||||
|
||||
// Reads all samples stored in the input JSON files.
|
||||
std::vector<Sample> Samples;
|
||||
for (const auto &Filename : InputFilenames) {
|
||||
auto Result = parseJsonResultFile(Filename);
|
||||
llvm::append_range(Samples, Result.Samples);
|
||||
}
|
||||
|
||||
if (!KeepOnlyDistributions.empty()) {
|
||||
llvm::StringSet ValidDistributions;
|
||||
ValidDistributions.insert(KeepOnlyDistributions.begin(),
|
||||
KeepOnlyDistributions.end());
|
||||
llvm::erase_if(Samples, [&ValidDistributions](const Sample &S) {
|
||||
return !ValidDistributions.contains(S.Id.Distribution.Name);
|
||||
});
|
||||
}
|
||||
|
||||
// Extracts median of throughputs.
|
||||
std::vector<FunctionData> Functions = getThroughputs(Samples);
|
||||
fillScores(Functions);
|
||||
castVotes(Functions);
|
||||
|
||||
// Present data by function type, Grade and Geomean of scores.
|
||||
std::sort(Functions.begin(), Functions.end(),
|
||||
[](const FunctionData &A, const FunctionData &B) {
|
||||
const auto Less = [](const FunctionData &FD) {
|
||||
return std::make_tuple(FD.Id.Type, FD.FinalGrade,
|
||||
-FD.ScoresGeoMean);
|
||||
};
|
||||
return Less(A) < Less(B);
|
||||
});
|
||||
|
||||
// Print result.
|
||||
for (const FunctionData &Function : Functions) {
|
||||
outs() << formatv("{0,-10}", Grade::getString(Function.FinalGrade));
|
||||
outs() << " |";
|
||||
Serialize(outs(), Function.GradeHisto);
|
||||
outs() << "| ";
|
||||
outs().resetColor();
|
||||
outs() << formatv("{0,+25}", Function.Id.Name);
|
||||
outs() << "\n";
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
int main(int argc, char **argv) { return llvm::automemcpy::Main(argc, argv); }
|
||||
@@ -1,9 +0,0 @@
|
||||
add_libc_benchmark_unittest(libc-automemcpy-codegen-test
|
||||
SRCS CodeGenTest.cpp
|
||||
DEPENDS automemcpy_codegen
|
||||
)
|
||||
|
||||
add_libc_benchmark_unittest(libc-automemcpy-result-analyzer-test
|
||||
SRCS ResultAnalyzerTest.cpp
|
||||
DEPENDS automemcpy_result_analyzer_lib
|
||||
)
|
||||
@@ -1,226 +0,0 @@
|
||||
//===-- Automemcpy CodeGen Test -------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "automemcpy/CodeGen.h"
|
||||
#include "automemcpy/RandomFunctionGenerator.h"
|
||||
#include "src/__support/macros/config.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <optional>
|
||||
|
||||
using testing::AllOf;
|
||||
using testing::AnyOf;
|
||||
using testing::ElementsAre;
|
||||
using testing::Ge;
|
||||
using testing::Gt;
|
||||
using testing::Le;
|
||||
using testing::Lt;
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
namespace {
|
||||
|
||||
TEST(Automemcpy, Codegen) {
|
||||
static constexpr FunctionDescriptor kDescriptors[] = {
|
||||
{FunctionType::MEMCPY, std::nullopt, std::nullopt, std::nullopt, std::nullopt,
|
||||
Accelerator{{0, kMaxSize}}, ElementTypeClass::NATIVE},
|
||||
{FunctionType::MEMCPY, Contiguous{{0, 4}}, Overlap{{4, 256}},
|
||||
Loop{{256, kMaxSize}, 64}, std::nullopt, std::nullopt,
|
||||
ElementTypeClass::NATIVE},
|
||||
{FunctionType::MEMCMP, Contiguous{{0, 2}}, Overlap{{2, 64}}, std::nullopt,
|
||||
AlignedLoop{Loop{{64, kMaxSize}, 16}, 16, AlignArg::_1}, std::nullopt,
|
||||
ElementTypeClass::NATIVE},
|
||||
{FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, std::nullopt,
|
||||
AlignedLoop{Loop{{256, kMaxSize}, 32}, 16, AlignArg::_1}, std::nullopt,
|
||||
ElementTypeClass::NATIVE},
|
||||
{FunctionType::MEMSET, Contiguous{{0, 2}}, Overlap{{2, 256}}, std::nullopt,
|
||||
AlignedLoop{Loop{{256, kMaxSize}, 32}, 32, AlignArg::_1}, std::nullopt,
|
||||
ElementTypeClass::NATIVE},
|
||||
{FunctionType::BZERO, Contiguous{{0, 4}}, Overlap{{4, 128}}, std::nullopt,
|
||||
AlignedLoop{Loop{{128, kMaxSize}, 32}, 32, AlignArg::_1}, std::nullopt,
|
||||
ElementTypeClass::NATIVE},
|
||||
};
|
||||
|
||||
std::string Output;
|
||||
raw_string_ostream OutputStream(Output);
|
||||
Serialize(OutputStream, kDescriptors);
|
||||
|
||||
EXPECT_STREQ(Output.c_str(),
|
||||
R"(// This file is auto-generated by libc/benchmarks/automemcpy.
|
||||
// Functions : 6
|
||||
|
||||
#include "LibcFunctionPrototypes.h"
|
||||
#include "automemcpy/FunctionDescriptor.h"
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
|
||||
using llvm::libc_benchmarks::BzeroConfiguration;
|
||||
using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;
|
||||
using llvm::libc_benchmarks::MemcpyConfiguration;
|
||||
using llvm::libc_benchmarks::MemmoveConfiguration;
|
||||
using llvm::libc_benchmarks::MemsetConfiguration;
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
static void memcpy_0xE00E29EE73994E2B(char *__restrict dst, const char *__restrict src, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
return copy<Accelerator>(dst, src, size);
|
||||
}
|
||||
static void memcpy_0x7381B60C7BE75EF9(char *__restrict dst, const char *__restrict src, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
if(size == 0) return;
|
||||
if(size == 1) return copy<_1>(dst, src);
|
||||
if(size == 2) return copy<_2>(dst, src);
|
||||
if(size == 3) return copy<_3>(dst, src);
|
||||
if(size < 8) return copy<HeadTail<_4>>(dst, src, size);
|
||||
if(size < 16) return copy<HeadTail<_8>>(dst, src, size);
|
||||
if(size < 32) return copy<HeadTail<_16>>(dst, src, size);
|
||||
if(size < 64) return copy<HeadTail<_32>>(dst, src, size);
|
||||
if(size < 128) return copy<HeadTail<_64>>(dst, src, size);
|
||||
if(size < 256) return copy<HeadTail<_128>>(dst, src, size);
|
||||
return copy<Loop<_64>>(dst, src, size);
|
||||
}
|
||||
static int memcmp_0x348D7BA6DB0EE033(const char * lhs, const char * rhs, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
if(size == 0) return 0;
|
||||
if(size == 1) return three_way_compare<_1>(lhs, rhs);
|
||||
if(size < 4) return three_way_compare<HeadTail<_2>>(lhs, rhs, size);
|
||||
if(size < 8) return three_way_compare<HeadTail<_4>>(lhs, rhs, size);
|
||||
if(size < 16) return three_way_compare<HeadTail<_8>>(lhs, rhs, size);
|
||||
if(size < 32) return three_way_compare<HeadTail<_16>>(lhs, rhs, size);
|
||||
if(size < 64) return three_way_compare<HeadTail<_32>>(lhs, rhs, size);
|
||||
return three_way_compare<Align<_16,Arg::Lhs>::Then<Loop<_16>>>(lhs, rhs, size);
|
||||
}
|
||||
static void memset_0x71E761699B999863(char * dst, int value, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
if(size == 0) return;
|
||||
if(size == 1) return splat_set<_1>(dst, value);
|
||||
if(size < 4) return splat_set<HeadTail<_2>>(dst, value, size);
|
||||
if(size < 8) return splat_set<HeadTail<_4>>(dst, value, size);
|
||||
if(size < 16) return splat_set<HeadTail<_8>>(dst, value, size);
|
||||
if(size < 32) return splat_set<HeadTail<_16>>(dst, value, size);
|
||||
if(size < 64) return splat_set<HeadTail<_32>>(dst, value, size);
|
||||
if(size < 128) return splat_set<HeadTail<_64>>(dst, value, size);
|
||||
if(size < 256) return splat_set<HeadTail<_128>>(dst, value, size);
|
||||
return splat_set<Align<_16,Arg::Dst>::Then<Loop<_32>>>(dst, value, size);
|
||||
}
|
||||
static void memset_0x3DF0F44E2ED6A50F(char * dst, int value, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
if(size == 0) return;
|
||||
if(size == 1) return splat_set<_1>(dst, value);
|
||||
if(size < 4) return splat_set<HeadTail<_2>>(dst, value, size);
|
||||
if(size < 8) return splat_set<HeadTail<_4>>(dst, value, size);
|
||||
if(size < 16) return splat_set<HeadTail<_8>>(dst, value, size);
|
||||
if(size < 32) return splat_set<HeadTail<_16>>(dst, value, size);
|
||||
if(size < 64) return splat_set<HeadTail<_32>>(dst, value, size);
|
||||
if(size < 128) return splat_set<HeadTail<_64>>(dst, value, size);
|
||||
if(size < 256) return splat_set<HeadTail<_128>>(dst, value, size);
|
||||
return splat_set<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, value, size);
|
||||
}
|
||||
static void bzero_0x475977492C218AD4(char * dst, size_t size) {
|
||||
using namespace LIBC_NAMESPACE::x86;
|
||||
if(size == 0) return;
|
||||
if(size == 1) return splat_set<_1>(dst, 0);
|
||||
if(size == 2) return splat_set<_2>(dst, 0);
|
||||
if(size == 3) return splat_set<_3>(dst, 0);
|
||||
if(size < 8) return splat_set<HeadTail<_4>>(dst, 0, size);
|
||||
if(size < 16) return splat_set<HeadTail<_8>>(dst, 0, size);
|
||||
if(size < 32) return splat_set<HeadTail<_16>>(dst, 0, size);
|
||||
if(size < 64) return splat_set<HeadTail<_32>>(dst, 0, size);
|
||||
if(size < 128) return splat_set<HeadTail<_64>>(dst, 0, size);
|
||||
return splat_set<Align<_32,Arg::Dst>::Then<Loop<_32>>>(dst, 0, size);
|
||||
}
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
|
||||
ArrayRef<NamedFunctionDescriptor> getFunctionDescriptors() {
|
||||
static constexpr NamedFunctionDescriptor kDescriptors[] = {
|
||||
{"memcpy_0xE00E29EE73994E2B",{FunctionType::MEMCPY,std::nullopt,std::nullopt,std::nullopt,std::nullopt,Accelerator{{0,kMaxSize}},ElementTypeClass::NATIVE}},
|
||||
{"memcpy_0x7381B60C7BE75EF9",{FunctionType::MEMCPY,Contiguous{{0,4}},Overlap{{4,256}},Loop{{256,kMaxSize},64},std::nullopt,std::nullopt,ElementTypeClass::NATIVE}},
|
||||
{"memcmp_0x348D7BA6DB0EE033",{FunctionType::MEMCMP,Contiguous{{0,2}},Overlap{{2,64}},std::nullopt,AlignedLoop{Loop{{64,kMaxSize},16},16,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}},
|
||||
{"memset_0x71E761699B999863",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},std::nullopt,AlignedLoop{Loop{{256,kMaxSize},32},16,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}},
|
||||
{"memset_0x3DF0F44E2ED6A50F",{FunctionType::MEMSET,Contiguous{{0,2}},Overlap{{2,256}},std::nullopt,AlignedLoop{Loop{{256,kMaxSize},32},32,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}},
|
||||
{"bzero_0x475977492C218AD4",{FunctionType::BZERO,Contiguous{{0,4}},Overlap{{4,128}},std::nullopt,AlignedLoop{Loop{{128,kMaxSize},32},32,AlignArg::_1},std::nullopt,ElementTypeClass::NATIVE}},
|
||||
};
|
||||
return ArrayRef(kDescriptors);
|
||||
}
|
||||
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
|
||||
|
||||
using MemcpyStub = void (*)(char *__restrict, const char *__restrict, size_t);
|
||||
template <MemcpyStub Foo>
|
||||
void *Wrap(void *__restrict dst, const void *__restrict src, size_t size) {
|
||||
Foo(reinterpret_cast<char *__restrict>(dst),
|
||||
reinterpret_cast<const char *__restrict>(src), size);
|
||||
return dst;
|
||||
}
|
||||
llvm::ArrayRef<MemcpyConfiguration> getMemcpyConfigurations() {
|
||||
using namespace LIBC_NAMESPACE;
|
||||
static constexpr MemcpyConfiguration kConfigurations[] = {
|
||||
{Wrap<memcpy_0xE00E29EE73994E2B>, "memcpy_0xE00E29EE73994E2B"},
|
||||
{Wrap<memcpy_0x7381B60C7BE75EF9>, "memcpy_0x7381B60C7BE75EF9"},
|
||||
};
|
||||
return llvm::ArrayRef(kConfigurations);
|
||||
}
|
||||
|
||||
using MemcmpStub = int (*)(const char *, const char *, size_t);
|
||||
template <MemcmpStub Foo>
|
||||
int Wrap(const void *lhs, const void *rhs, size_t size) {
|
||||
return Foo(reinterpret_cast<const char *>(lhs),
|
||||
reinterpret_cast<const char *>(rhs), size);
|
||||
}
|
||||
llvm::ArrayRef<MemcmpOrBcmpConfiguration> getMemcmpConfigurations() {
|
||||
using namespace LIBC_NAMESPACE;
|
||||
static constexpr MemcmpOrBcmpConfiguration kConfigurations[] = {
|
||||
{Wrap<memcmp_0x348D7BA6DB0EE033>, "memcmp_0x348D7BA6DB0EE033"},
|
||||
};
|
||||
return llvm::ArrayRef(kConfigurations);
|
||||
}
|
||||
llvm::ArrayRef<MemcmpOrBcmpConfiguration> getBcmpConfigurations() {
|
||||
return {};
|
||||
}
|
||||
|
||||
using MemsetStub = void (*)(char *, int, size_t);
|
||||
template <MemsetStub Foo> void *Wrap(void *dst, int value, size_t size) {
|
||||
Foo(reinterpret_cast<char *>(dst), value, size);
|
||||
return dst;
|
||||
}
|
||||
llvm::ArrayRef<MemsetConfiguration> getMemsetConfigurations() {
|
||||
using namespace LIBC_NAMESPACE;
|
||||
static constexpr MemsetConfiguration kConfigurations[] = {
|
||||
{Wrap<memset_0x71E761699B999863>, "memset_0x71E761699B999863"},
|
||||
{Wrap<memset_0x3DF0F44E2ED6A50F>, "memset_0x3DF0F44E2ED6A50F"},
|
||||
};
|
||||
return llvm::ArrayRef(kConfigurations);
|
||||
}
|
||||
|
||||
using BzeroStub = void (*)(char *, size_t);
|
||||
template <BzeroStub Foo> void Wrap(void *dst, size_t size) {
|
||||
Foo(reinterpret_cast<char *>(dst), size);
|
||||
}
|
||||
llvm::ArrayRef<BzeroConfiguration> getBzeroConfigurations() {
|
||||
using namespace LIBC_NAMESPACE;
|
||||
static constexpr BzeroConfiguration kConfigurations[] = {
|
||||
{Wrap<bzero_0x475977492C218AD4>, "bzero_0x475977492C218AD4"},
|
||||
};
|
||||
return llvm::ArrayRef(kConfigurations);
|
||||
}
|
||||
|
||||
llvm::ArrayRef<MemmoveConfiguration> getMemmoveConfigurations() {
|
||||
return {};
|
||||
}
|
||||
// Functions : 6
|
||||
)");
|
||||
}
|
||||
} // namespace
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
@@ -1,191 +0,0 @@
|
||||
//===-- Automemcpy Json Results Analyzer Test ----------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "automemcpy/ResultAnalyzer.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using testing::DoubleNear;
|
||||
using testing::ElementsAre;
|
||||
using testing::Pair;
|
||||
using testing::SizeIs;
|
||||
|
||||
namespace llvm {
|
||||
namespace automemcpy {
|
||||
namespace {
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
|
||||
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId DistA = {{"A"}};
|
||||
static constexpr SampleId Id = {Foo1, DistA};
|
||||
static constexpr Sample kSamples[] = {
|
||||
Sample{Id, SampleType::ITERATION, 4},
|
||||
Sample{Id, SampleType::AGGREGATE, -1}, // Aggegates gets discarded
|
||||
};
|
||||
|
||||
const std::vector<FunctionData> Data = getThroughputs(kSamples);
|
||||
EXPECT_THAT(Data, SizeIs(1));
|
||||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
|
||||
// A single value is provided.
|
||||
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMean, 4);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0);
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
|
||||
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId DistA = {{"A"}};
|
||||
static constexpr SampleId Id = {Foo1, DistA};
|
||||
static constexpr Sample kSamples[] = {Sample{Id, SampleType::ITERATION, 4},
|
||||
Sample{Id, SampleType::ITERATION, 5},
|
||||
Sample{Id, SampleType::ITERATION, 5}};
|
||||
|
||||
const std::vector<FunctionData> Data = getThroughputs(kSamples);
|
||||
EXPECT_THAT(Data, SizeIs(1));
|
||||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
|
||||
// When multiple values are provided we pick the median one (here median of 4,
|
||||
// 5, 5).
|
||||
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1));
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01));
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
|
||||
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId DistA = {{"A"}};
|
||||
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId DistB = {{"B"}};
|
||||
static constexpr Sample kSamples[] = {
|
||||
Sample{{Foo1, DistA}, SampleType::ITERATION, 1},
|
||||
Sample{{Foo1, DistB}, SampleType::ITERATION, 2},
|
||||
Sample{{Foo2, DistA}, SampleType::ITERATION, 3},
|
||||
Sample{{Foo2, DistB}, SampleType::ITERATION, 4}};
|
||||
// Data is aggregated per function.
|
||||
const std::vector<FunctionData> Data = getThroughputs(kSamples);
|
||||
EXPECT_THAT(Data, SizeIs(2)); // 2 functions Foo1 and Foo2.
|
||||
// Each function has data for both distributions DistA and DistB.
|
||||
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(2));
|
||||
EXPECT_THAT(Data[1].PerDistributionData, SizeIs(2));
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
|
||||
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
|
||||
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
|
||||
static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId Dist = {{"A"}};
|
||||
static constexpr Sample kSamples[] = {
|
||||
Sample{{Foo1, Dist}, SampleType::ITERATION, 1},
|
||||
Sample{{Foo2, Dist}, SampleType::ITERATION, 2},
|
||||
Sample{{Foo3, Dist}, SampleType::ITERATION, 3}};
|
||||
|
||||
// Data is aggregated per function.
|
||||
std::vector<FunctionData> Data = getThroughputs(kSamples);
|
||||
|
||||
// Sort Data by function name so we can test them.
|
||||
std::sort(
|
||||
Data.begin(), Data.end(),
|
||||
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });
|
||||
|
||||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1);
|
||||
EXPECT_THAT(Data[1].Id, Foo2);
|
||||
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2);
|
||||
EXPECT_THAT(Data[2].Id, Foo3);
|
||||
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3);
|
||||
|
||||
// Normalizes throughput per distribution.
|
||||
fillScores(Data);
|
||||
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").Score, 0);
|
||||
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").Score, 0.5);
|
||||
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").Score, 1);
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, castVotes) {
|
||||
static constexpr double kAbsErr = 0.01;
|
||||
|
||||
static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
|
||||
static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
|
||||
static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY};
|
||||
static constexpr DistributionId DistA = {{"A"}};
|
||||
static constexpr DistributionId DistB = {{"B"}};
|
||||
static constexpr Sample kSamples[] = {
|
||||
Sample{{Foo1, DistA}, SampleType::ITERATION, 0},
|
||||
Sample{{Foo1, DistB}, SampleType::ITERATION, 30},
|
||||
Sample{{Foo2, DistA}, SampleType::ITERATION, 1},
|
||||
Sample{{Foo2, DistB}, SampleType::ITERATION, 100},
|
||||
Sample{{Foo3, DistA}, SampleType::ITERATION, 7},
|
||||
Sample{{Foo3, DistB}, SampleType::ITERATION, 100},
|
||||
};
|
||||
|
||||
// DistA Thoughput ranges from 0 to 7.
|
||||
// DistB Thoughput ranges from 30 to 100.
|
||||
|
||||
// Data is aggregated per function.
|
||||
std::vector<FunctionData> Data = getThroughputs(kSamples);
|
||||
|
||||
// Sort Data by function name so we can test them.
|
||||
std::sort(
|
||||
Data.begin(), Data.end(),
|
||||
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });
|
||||
|
||||
// Normalizes throughput per distribution.
|
||||
fillScores(Data);
|
||||
|
||||
// Cast votes
|
||||
castVotes(Data);
|
||||
|
||||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[1].Id, Foo2);
|
||||
EXPECT_THAT(Data[2].Id, Foo3);
|
||||
|
||||
const auto GetDistData = [&Data](size_t Index, StringRef Name) {
|
||||
return Data[Index].PerDistributionData.lookup(Name);
|
||||
};
|
||||
|
||||
// Distribution A
|
||||
// Throughput is 0, 1 and 7, so normalized scores are 0, 1/7 and 1.
|
||||
EXPECT_THAT(GetDistData(0, "A").Score, DoubleNear(0, kAbsErr));
|
||||
EXPECT_THAT(GetDistData(1, "A").Score, DoubleNear(1. / 7, kAbsErr));
|
||||
EXPECT_THAT(GetDistData(2, "A").Score, DoubleNear(1, kAbsErr));
|
||||
// which are turned into grades BAD, MEDIOCRE and EXCELLENT.
|
||||
EXPECT_THAT(GetDistData(0, "A").Grade, Grade::BAD);
|
||||
EXPECT_THAT(GetDistData(1, "A").Grade, Grade::MEDIOCRE);
|
||||
EXPECT_THAT(GetDistData(2, "A").Grade, Grade::EXCELLENT);
|
||||
|
||||
// Distribution B
|
||||
// Throughput is 30, 100 and 100, so normalized scores are 0, 1 and 1.
|
||||
EXPECT_THAT(GetDistData(0, "B").Score, DoubleNear(0, kAbsErr));
|
||||
EXPECT_THAT(GetDistData(1, "B").Score, DoubleNear(1, kAbsErr));
|
||||
EXPECT_THAT(GetDistData(2, "B").Score, DoubleNear(1, kAbsErr));
|
||||
// which are turned into grades BAD, EXCELLENT and EXCELLENT.
|
||||
EXPECT_THAT(GetDistData(0, "B").Grade, Grade::BAD);
|
||||
EXPECT_THAT(GetDistData(1, "B").Grade, Grade::EXCELLENT);
|
||||
EXPECT_THAT(GetDistData(2, "B").Grade, Grade::EXCELLENT);
|
||||
|
||||
// Now looking from the functions point of view.
|
||||
EXPECT_THAT(Data[0].ScoresGeoMean, DoubleNear(0, kAbsErr));
|
||||
EXPECT_THAT(Data[1].ScoresGeoMean, DoubleNear(1. * (1. / 7), kAbsErr));
|
||||
EXPECT_THAT(Data[2].ScoresGeoMean, DoubleNear(1, kAbsErr));
|
||||
|
||||
// Note the array is indexed by GradeEnum values (EXCELLENT=0 / BAD = 6)
|
||||
EXPECT_THAT(Data[0].GradeHisto, ElementsAre(0, 0, 0, 0, 0, 0, 2));
|
||||
EXPECT_THAT(Data[1].GradeHisto, ElementsAre(1, 0, 0, 0, 0, 1, 0));
|
||||
EXPECT_THAT(Data[2].GradeHisto, ElementsAre(2, 0, 0, 0, 0, 0, 0));
|
||||
|
||||
EXPECT_THAT(Data[0].FinalGrade, Grade::BAD);
|
||||
EXPECT_THAT(Data[1].FinalGrade, Grade::MEDIOCRE);
|
||||
EXPECT_THAT(Data[2].FinalGrade, Grade::EXCELLENT);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace automemcpy
|
||||
} // namespace llvm
|
||||
@@ -29,8 +29,7 @@ The ``benchmarks`` directory
|
||||
----------------------------
|
||||
|
||||
The ``benchmarks`` directory contains LLVM-libc's benchmarking utilities. These
|
||||
are mostly used for the memory functions. This also includes the automemcpy
|
||||
subdirectory for automatic generation of optimized memory functions.
|
||||
are mostly used for the memory functions.
|
||||
|
||||
The ``config`` directory
|
||||
------------------------
|
||||
|
||||
Reference in New Issue
Block a user