[Support] On Windows, add optional support for {rpmalloc|snmalloc|mimalloc}

This patch optionally replaces the CRT allocator (i.e., malloc and free) with rpmalloc (mixed public domain licence/MIT licence) or snmalloc (MIT licence) or mimalloc (MIT licence). Please note that the source code for these allocators must be available outside of LLVM's tree.

To enable, use `cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:/git/rpmalloc -DLLVM_USE_CRT_RELEASE=MT` where `D:/git/rpmalloc` has already been git clone'd from `https://github.com/mjansson/rpmalloc`. The same applies to snmalloc and mimalloc.

When enabled, the allocator will be embeded (statically linked) into the LLVM tools & libraries. This currently only works with the static CRT (/MT), although using the dynamic CRT (/MD) could potentially work as well in the future.

When enabled, this changes the memory stack from:
  new/delete -> MS VC++ CRT malloc/free -> HeapAlloc -> VirtualAlloc
to:
  new/delete -> {rpmalloc|snmalloc|mimalloc} -> VirtualAlloc

The goal of this patch is to bypass the application's global heap - which is thread-safe thus inducing locking - and instead take advantage of a modern lock-free, thread cache, allocator. On a 6-core Xeon Skylake we observe a 2.5x decrease in execution time when linking a large scale application with LLD and ThinLTO (12 min 20 sec -> 5 min 34 sec), when all hardware threads are being used (using LLD's flag /opt:lldltojobs=all). On a dual 36-core Xeon Skylake with all hardware threads used, we observe a 24x decrease in execution time (1 h 2 min -> 2 min 38 sec) when linking a large application with LLD and ThinLTO. Clang build times also see a decrease in the range 5-10% depending on the configuration.

Differential Revision: https://reviews.llvm.org/D71786
This commit is contained in:
Alexandre Ganea
2020-08-27 11:09:20 -04:00
parent 6923b0a76e
commit a6a37a2fcd
6 changed files with 79 additions and 0 deletions

View File

@@ -567,6 +567,19 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT
option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
"Show target and host info when tools are invoked with --version." ON)
option(LLVM_INTEGRATED_CRT_ALLOC "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled." OFF)
if(LLVM_INTEGRATED_CRT_ALLOC)
if(NOT WIN32)
message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.")
endif()
if(LLVM_USE_SANITIZER)
message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!")
endif()
if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!")
endif()
endif()
# You can configure which libraries from LLVM you want to include in the
# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
# list of LLVM components. All component names handled by llvm-config are valid.

View File

@@ -461,6 +461,23 @@ LLVM-specific variables
**LLVM_PARALLEL_LINK_JOBS**:STRING
Define the maximum number of concurrent link jobs.
**LLVM_USE_CRT_{target}**:STRING
On Windows, tells which version of the C runtime library (CRT) should be used.
For example, -DLLVM_USE_CRT_RELEASE=MT would statically link the CRT into the
LLVM tools and library.
**LLVM_INTEGRATED_CRT_ALLOC**:PATH
On Windows, allows embedding a different C runtime allocator into the LLVM
tools and libraries. Using a lock-free allocator such as the ones listed below
greatly decreases ThinLTO link time by about an order of magnitude. It also
midly improves Clang build times, by about 5-10%. At the moment, rpmalloc,
snmalloc and mimalloc are supported. Use the path to `git clone` to select
the respective allocator, for example:
D:\git> git clone https://github.com/mjansson/rpmalloc
D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc
This flag needs to be used along with the static CRT, ie. if building the
Release target, add -DLLVM_USE_CRT_RELEASE=MT.
**LLVM_BUILD_DOCS**:BOOL
Adds all *enabled* documentation targets (i.e. Doxgyen and Sphinx targets) as
dependencies of the default build targets. This results in all of the (enabled)

View File

@@ -60,6 +60,34 @@ if(LLVM_WITH_Z3)
set(system_libs ${system_libs} ${Z3_LIBRARIES})
endif()
# Override the C runtime allocator on Windows and embed it into LLVM tools & libraries
if(LLVM_INTEGRATED_CRT_ALLOC)
if (CMAKE_BUILD_TYPE AND NOT ${LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE}} MATCHES "^(MT|MTd)$")
message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC only works with /MT or /MTd. Use LLVM_USE_CRT_${uppercase_CMAKE_BUILD_TYPE} to set the appropriate option.")
endif()
string(REGEX REPLACE "(/|\\\\)$" "" LLVM_INTEGRATED_CRT_ALLOC "${LLVM_INTEGRATED_CRT_ALLOC}")
if(NOT EXISTS "${LLVM_INTEGRATED_CRT_ALLOC}")
message(FATAL_ERROR "Cannot find the path to `git clone` for the CRT allocator! (${LLVM_INTEGRATED_CRT_ALLOC}). Currently, rpmalloc, snmalloc and mimalloc are supported.")
endif()
if(LLVM_INTEGRATED_CRT_ALLOC MATCHES "rpmalloc$")
add_definitions(-DENABLE_OVERRIDE -DENABLE_PRELOAD)
set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/rpmalloc/rpmalloc.c")
elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "snmalloc$")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /std:c++17" PARENT_SCOPE)
set(ALLOCATOR_FILES "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/malloc.cc" "${LLVM_INTEGRATED_CRT_ALLOC}/src/override/new.cc")
set(system_libs ${system_libs} "mincore.lib" "-INCLUDE:malloc")
elseif(LLVM_INTEGRATED_CRT_ALLOC MATCHES "mimalloc$")
set(MIMALLOC_LIB "${LLVM_INTEGRATED_CRT_ALLOC}/out/msvc-x64/Release/mimalloc-static.lib")
if(NOT EXISTS "${MIMALLOC_LIB}")
message(FATAL_ERROR "Cannot find the mimalloc static library. To build it, first apply the patch from https://github.com/microsoft/mimalloc/issues/268 then build the Release x64 target through ${LLVM_INTEGRATED_CRT_ALLOC}\\ide\\vs2019\\mimalloc.sln")
endif()
set(system_libs ${system_libs} "${MIMALLOC_LIB}" "-INCLUDE:malloc")
endif()
endif()
add_llvm_component_library(LLVMSupport
AArch64TargetParser.cpp
ABIBreak.cpp
@@ -181,6 +209,8 @@ add_llvm_component_library(LLVMSupport
xxhash.cpp
Z3Solver.cpp
${ALLOCATOR_FILES}
# System
Atomic.cpp
DynamicLibrary.cpp

View File

@@ -176,4 +176,9 @@ if(LLVM_BUILD_LLVM_C_DYLIB AND MSVC)
# Finally link the target.
add_llvm_library(LLVM-C SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen)
if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
# Make sure we search LLVMSupport first, before the CRT libs
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
endif()
endif()

View File

@@ -10,6 +10,11 @@ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Remarks.exports)
add_llvm_library(Remarks SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES})
if (LLVM_INTEGRATED_CRT_ALLOC AND MSVC)
# Make sure we search LLVMSupport first, before the CRT libs
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -INCLUDE:malloc")
endif()
install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/Remarks.h
DESTINATION include/llvm-c
COMPONENT Remarks)

View File

@@ -38,6 +38,15 @@ function(dynlib_add_module NAME)
)
add_dependencies(DynamicLibraryTests ${NAME})
# We need to link in the Support lib for the Memory allocator override,
# otherwise the DynamicLibrary.Shutdown test will fail, because it would
# allocate memory with the CRT allocator, and release it with our custom
# allocator (see llvm/lib/Support/Windows/Memory.inc).
# /INCLUDE:malloc is there to force searching into LLVMSupport before libucrt
llvm_map_components_to_libnames(llvm_libs Support)
target_link_libraries(${NAME} ${llvm_libs} "-INCLUDE:malloc")
endfunction(dynlib_add_module)
# Revert -Wl,-z,nodelete on this test since it relies on the file