From bffc94b08f713cc9916009575664b132aee76bcf Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 10 Dec 2020 13:50:31 -0800 Subject: [PATCH 1/2] compilers: Add support for using multiple threads with lto Both Clang and GCC support using multiple threads for preforming link time optimizaions, and they can now be configured using the `-Db_lto_threads` option. Fixes #7820 --- docs/markdown/Builtin-options.md | 43 ++++++++++++++------------- docs/markdown/snippets/lto_threads.md | 7 +++++ mesonbuild/compilers/compilers.py | 20 +++++++++++-- mesonbuild/compilers/mixins/clang.py | 10 ++++++- mesonbuild/compilers/mixins/gnu.py | 17 +++++++++-- run_unittests.py | 21 +++++++++++++ 6 files changed, 93 insertions(+), 25 deletions(-) create mode 100644 docs/markdown/snippets/lto_threads.md diff --git a/docs/markdown/Builtin-options.md b/docs/markdown/Builtin-options.md index ef327e3b8..f57755c06 100644 --- a/docs/markdown/Builtin-options.md +++ b/docs/markdown/Builtin-options.md @@ -119,30 +119,33 @@ no options. The following options are available. Note that they may not be available on all platforms or with all compilers: -| Option | Default value | Possible values | Description | -| ----------- | ------------- | --------------- | ----------- | -| b_asneeded | true | true, false | Use -Wl,--as-needed when linking | -| b_bitcode | false | true, false | Embed Apple bitcode, see below | -| b_colorout | always | auto, always, never | Use colored output | -| b_coverage | false | true, false | Enable coverage tracking | -| b_lundef | true | true, false | Don't allow undefined symbols when linking | -| b_lto | false | true, false | Use link time optimization | -| b_ndebug | false | true, false, if-release | Disable asserts | -| b_pch | true | true, false | Use precompiled headers | -| b_pgo | off | off, generate, use | Use profile guided optimization | -| b_sanitize | none | see below | Code sanitizer to use | -| b_staticpic | true | true, false | Build static libraries as position independent | -| b_pie | false | true, false | Build position-independent executables (since 0.49.0)| -| b_vscrt | from_buildtype| none, md, mdd, mt, mtd, from_buildtype, static_from_buildtype | VS runtime library to use (since 0.48.0) (static_from_buildtype since 0.56.0) | +| Option | Default value | Possible values | Description | +|---------------|----------------|------------------------------------------------------------------|-------------------------------------------------------------------------------| +| b_asneeded | true | true, false | Use -Wl,--as-needed when linking | +| b_bitcode | false | true, false | Embed Apple bitcode, see below | +| b_colorout | always | auto, always, never | Use colored output | +| b_coverage | false | true, false | Enable coverage tracking | +| b_lundef | true | true, false | Don't allow undefined symbols when linking | +| b_lto | false | true, false | Use link time optimization | +| b_lto_threads | 0 | Any integer* | Use multiple threads for lto. *(Added in 0.57.0)* | +| b_ndebug | false | true, false, if-release | Disable asserts | +| b_pch | true | true, false | Use precompiled headers | +| b_pgo | off | off, generate, use | Use profile guided optimization | +| b_sanitize | none | see below | Code sanitizer to use | +| b_staticpic | true | true, false | Build static libraries as position independent | +| b_pie | false | true, false | Build position-independent executables (since 0.49.0) | +| b_vscrt | from_buildtype | none, md, mdd, mt, mtd, from_buildtype, static_from_buildtype | VS runtime library to use (since 0.48.0) (static_from_buildtype since 0.56.0) | The value of `b_sanitize` can be one of: `none`, `address`, `thread`, `undefined`, `memory`, `address,undefined`. - The default value of `b_vscrt` -is `from_buildtype`. The following table is used internally to pick -the CRT compiler arguments for `from_buildtype` or -`static_from_buildtype` *(since 0.56)* based on the value of the -`buildtype` option: +* < 0 means disable, == 0 means automatic selection, > 0 sets a specific number to use + + +The default value of `b_vscrt` is `from_buildtype`. The following table is +used internally to pick the CRT compiler arguments for `from_buildtype` or +`static_from_buildtype` *(since 0.56)* based on the value of the `buildtype` +option: | buildtype | from_buildtype | static_from_buildtype | | -------- | -------------- | --------------------- | diff --git a/docs/markdown/snippets/lto_threads.md b/docs/markdown/snippets/lto_threads.md new file mode 100644 index 000000000..a6f761481 --- /dev/null +++ b/docs/markdown/snippets/lto_threads.md @@ -0,0 +1,7 @@ +## Knob to control LTO thread + +Both the gnu linker and lld support using threads for speading up LTO, meson +now provides a knob for this: `-Db_lto_threads`. Currently this is only +supported for clang and gcc. Any positive integer is supported, `0` means +`auto`. If the compiler or linker implemnets it's on `auto` we use that, +otherwise the number of threads on the machine is used. diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 0f83f4cae..07569a727 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -268,6 +268,8 @@ clike_debug_args = {False: [], base_options: 'KeyedOptionDictType' = { OptionKey('b_pch'): coredata.UserBooleanOption('Use precompiled headers', True), OptionKey('b_lto'): coredata.UserBooleanOption('Use link time optimization', False), + OptionKey('b_lto'): coredata.UserBooleanOption('Use link time optimization', False), + OptionKey('b_lto_threads'): coredata.UserIntegerOption('Use multiple threads for Link Time Optimization', (None, None,0)), OptionKey('b_sanitize'): coredata.UserComboOption('Code sanitizer to use', ['none', 'address', 'thread', 'undefined', 'memory', 'address,undefined'], 'none'), @@ -300,11 +302,25 @@ def option_enabled(boptions: T.Set[OptionKey], options: 'KeyedOptionDictType', except KeyError: return False + +def get_option_value(options: 'KeyedOptionDictType', opt: OptionKey, fallback: '_T') -> '_T': + """Get the value of an option, or the fallback value.""" + try: + v: '_T' = options[opt].value + except KeyError: + return fallback + + assert isinstance(v, type(fallback)), f'Should have {type(fallback)!r} but was {type(v)!r}' + # Mypy doesn't understand that the above assert ensures that v is type _T + return v + + def get_base_compile_args(options: 'KeyedOptionDictType', compiler: 'Compiler') -> T.List[str]: args = [] # type T.List[str] try: if options[OptionKey('b_lto')].value: - args.extend(compiler.get_lto_compile_args()) + args.extend(compiler.get_lto_compile_args( + threads=get_option_value(options, OptionKey('b_lto_threads'), 0))) except KeyError: pass try: @@ -926,7 +942,7 @@ class Compiler(metaclass=abc.ABCMeta): ret.append(arg) return ret - def get_lto_compile_args(self) -> T.List[str]: + def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: return [] def get_lto_link_args(self) -> T.List[str]: diff --git a/mesonbuild/compilers/mixins/clang.py b/mesonbuild/compilers/mixins/clang.py index fcb22258e..9c17a55bd 100644 --- a/mesonbuild/compilers/mixins/clang.py +++ b/mesonbuild/compilers/mixins/clang.py @@ -49,7 +49,8 @@ class ClangCompiler(GnuLikeCompiler): super().__init__() self.id = 'clang' self.defines = defines or {} - self.base_options.add(OptionKey('b_colorout')) + self.base_options.update({OptionKey('b_colorout'), OptionKey('b_lto_threads')}) + # TODO: this really should be part of the linker base_options, but # linkers don't have base_options. if isinstance(self.linker, AppleDynamicLinker): @@ -135,3 +136,10 @@ class ClangCompiler(GnuLikeCompiler): def get_coverage_link_args(self) -> T.List[str]: return ['--coverage'] + + def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + args = super().get_lto_compile_args(threads=threads) + # In clang -flto=0 means auto + if threads >= 0: + args.append(f'-flto-jobs={threads}') + return args diff --git a/mesonbuild/compilers/mixins/gnu.py b/mesonbuild/compilers/mixins/gnu.py index 95bcd7cc3..5afbb83d5 100644 --- a/mesonbuild/compilers/mixins/gnu.py +++ b/mesonbuild/compilers/mixins/gnu.py @@ -17,6 +17,7 @@ import abc import functools import os +import multiprocessing import pathlib import re import subprocess @@ -281,7 +282,9 @@ class GnuLikeCompiler(Compiler, metaclass=abc.ABCMeta): return self._split_fetch_real_dirs(line.split('=', 1)[1]) return [] - def get_lto_compile_args(self) -> T.List[str]: + def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + # This provides a base for many compilers, GCC and Clang override this + # for their specific arguments return ['-flto'] def sanitizer_compile_args(self, value: str) -> T.List[str]: @@ -330,7 +333,7 @@ class GnuCompiler(GnuLikeCompiler): super().__init__() self.id = 'gcc' self.defines = defines or {} - self.base_options.add(OptionKey('b_colorout')) + self.base_options.update({OptionKey('b_colorout'), OptionKey('b_lto_threads')}) def get_colorout_args(self, colortype: str) -> T.List[str]: if mesonlib.version_compare(self.version, '>=4.9.0'): @@ -383,3 +386,13 @@ class GnuCompiler(GnuLikeCompiler): def get_prelink_args(self, prelink_name: str, obj_list: T.List[str]) -> T.List[str]: return ['-r', '-o', prelink_name] + obj_list + + def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + if threads == 0: + if mesonlib.version_compare(self.version, '>= 10.0'): + return ['-flto=auto'] + # This matches clang's behavior of using the number of cpus + return [f'-flto={multiprocessing.cpu_count()}'] + elif threads > 0: + return [f'-flto={threads}'] + return super().get_lto_compile_args(threads=threads) diff --git a/run_unittests.py b/run_unittests.py index 2a14f7800..aff94c49b 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -2843,6 +2843,27 @@ class AllPlatformTests(BasePlatformTests): self.build() self.run_tests() + @skip_if_not_base_option('b_lto_threads') + def test_lto_threads(self): + testdir = os.path.join(self.common_test_dir, '6 linkshared') + + env = get_fake_env(testdir, self.builddir, self.prefix) + cc = env.detect_c_compiler(MachineChoice.HOST) + if cc.get_id() == 'clang' and is_windows(): + raise unittest.SkipTest('LTO not (yet) supported by windows clang') + + self.init(testdir, extra_args=['-Db_lto=true', '-Db_lto_threads=8']) + self.build() + self.run_tests() + + expected = set(cc.get_lto_compile_args(threads=8)) + targets = self.introspect('--targets') + # This assumes all of the targets support lto + for t in targets: + for s in t['target_sources']: + for e in expected: + self.assertIn(e, s['parameters']) + def test_dist_git(self): if not shutil.which('git'): raise unittest.SkipTest('Git not found') From 6f532b72c85e38880cf7953098bb91e8f3feb696 Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Thu, 10 Dec 2020 14:16:45 -0800 Subject: [PATCH 2/2] Add support for LLVM's thinLTO This uses a separate option, b_lto_mode. It works in conjunction with b_lto_threads. Fixes #7493 --- docs/markdown/Builtin-options.md | 3 +++ docs/markdown/snippets/lto_mode.md | 5 +++++ mesonbuild/compilers/compilers.py | 8 ++++++-- mesonbuild/compilers/mixins/clang.py | 17 +++++++++++++---- mesonbuild/compilers/mixins/gnu.py | 4 ++-- run_unittests.py | 25 +++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 docs/markdown/snippets/lto_mode.md diff --git a/docs/markdown/Builtin-options.md b/docs/markdown/Builtin-options.md index f57755c06..2d7c01cae 100644 --- a/docs/markdown/Builtin-options.md +++ b/docs/markdown/Builtin-options.md @@ -128,6 +128,7 @@ available on all platforms or with all compilers: | b_lundef | true | true, false | Don't allow undefined symbols when linking | | b_lto | false | true, false | Use link time optimization | | b_lto_threads | 0 | Any integer* | Use multiple threads for lto. *(Added in 0.57.0)* | +| b_lto_mode | default | default, thin | Select between lto modes, thin and default. *(Added in 0.57.0)* | | b_ndebug | false | true, false, if-release | Disable asserts | | b_pch | true | true, false | Use precompiled headers | | b_pgo | off | off, generate, use | Use profile guided optimization | @@ -141,6 +142,8 @@ The value of `b_sanitize` can be one of: `none`, `address`, `thread`, * < 0 means disable, == 0 means automatic selection, > 0 sets a specific number to use +LLVM supports `thin` lto, for more discussion see [LLVM's documentation](https://clang.llvm.org/docs/ThinLTO.html) + The default value of `b_vscrt` is `from_buildtype`. The following table is used internally to pick the CRT compiler arguments for `from_buildtype` or diff --git a/docs/markdown/snippets/lto_mode.md b/docs/markdown/snippets/lto_mode.md new file mode 100644 index 000000000..c1df0661e --- /dev/null +++ b/docs/markdown/snippets/lto_mode.md @@ -0,0 +1,5 @@ +## Support added for LLVM's thinLTO + +A new `b_lto_mode` option has been added, which may be set to `default` or +`thin`. Thin only works for clang, and only with gnu gold, lld variants, or +ld64. diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 07569a727..08db6d72e 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -270,6 +270,9 @@ base_options: 'KeyedOptionDictType' = { OptionKey('b_lto'): coredata.UserBooleanOption('Use link time optimization', False), OptionKey('b_lto'): coredata.UserBooleanOption('Use link time optimization', False), OptionKey('b_lto_threads'): coredata.UserIntegerOption('Use multiple threads for Link Time Optimization', (None, None,0)), + OptionKey('b_lto_mode'): coredata.UserComboOption('Select between different LTO modes.', + ['default', 'thin'], + 'default'), OptionKey('b_sanitize'): coredata.UserComboOption('Code sanitizer to use', ['none', 'address', 'thread', 'undefined', 'memory', 'address,undefined'], 'none'), @@ -320,7 +323,8 @@ def get_base_compile_args(options: 'KeyedOptionDictType', compiler: 'Compiler') try: if options[OptionKey('b_lto')].value: args.extend(compiler.get_lto_compile_args( - threads=get_option_value(options, OptionKey('b_lto_threads'), 0))) + threads=get_option_value(options, OptionKey('b_lto_threads'), 0), + mode=get_option_value(options, OptionKey('b_lto_mode'), 'default'))) except KeyError: pass try: @@ -942,7 +946,7 @@ class Compiler(metaclass=abc.ABCMeta): ret.append(arg) return ret - def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + def get_lto_compile_args(self, *, threads: int = 0, mode: str = 'default') -> T.List[str]: return [] def get_lto_link_args(self) -> T.List[str]: diff --git a/mesonbuild/compilers/mixins/clang.py b/mesonbuild/compilers/mixins/clang.py index 9c17a55bd..1778c3131 100644 --- a/mesonbuild/compilers/mixins/clang.py +++ b/mesonbuild/compilers/mixins/clang.py @@ -19,7 +19,7 @@ import shutil import typing as T from ... import mesonlib -from ...linkers import AppleDynamicLinker +from ...linkers import AppleDynamicLinker, ClangClDynamicLinker, LLVMDynamicLinker, GnuGoldDynamicLinker from ...mesonlib import OptionKey from ..compilers import CompileCheckMode from .gnu import GnuLikeCompiler @@ -49,7 +49,8 @@ class ClangCompiler(GnuLikeCompiler): super().__init__() self.id = 'clang' self.defines = defines or {} - self.base_options.update({OptionKey('b_colorout'), OptionKey('b_lto_threads')}) + self.base_options.update( + {OptionKey('b_colorout'), OptionKey('b_lto_threads'), OptionKey('b_lto_mode')}) # TODO: this really should be part of the linker base_options, but # linkers don't have base_options. @@ -137,8 +138,16 @@ class ClangCompiler(GnuLikeCompiler): def get_coverage_link_args(self) -> T.List[str]: return ['--coverage'] - def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: - args = super().get_lto_compile_args(threads=threads) + def get_lto_compile_args(self, *, threads: int = 0, mode: str = 'default') -> T.List[str]: + args: T.List[str] = [] + if mode == 'thin': + # Thin LTO requires the use of gold, lld, ld64, or lld-link + if not isinstance(self.linker, (AppleDynamicLinker, ClangClDynamicLinker, LLVMDynamicLinker, GnuGoldDynamicLinker)): + raise mesonlib.MesonException(f"LLVM's thinLTO only works with gnu gold, lld, lld-link, and ld64, not {self.linker.id}") + args.append(f'-flto={mode}') + else: + assert mode == 'default', 'someone forgot to wire something up' + args.extend(super().get_lto_compile_args(threads=threads)) # In clang -flto=0 means auto if threads >= 0: args.append(f'-flto-jobs={threads}') diff --git a/mesonbuild/compilers/mixins/gnu.py b/mesonbuild/compilers/mixins/gnu.py index 5afbb83d5..464c66418 100644 --- a/mesonbuild/compilers/mixins/gnu.py +++ b/mesonbuild/compilers/mixins/gnu.py @@ -282,7 +282,7 @@ class GnuLikeCompiler(Compiler, metaclass=abc.ABCMeta): return self._split_fetch_real_dirs(line.split('=', 1)[1]) return [] - def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + def get_lto_compile_args(self, *, threads: int = 0, mode: str = 'default') -> T.List[str]: # This provides a base for many compilers, GCC and Clang override this # for their specific arguments return ['-flto'] @@ -387,7 +387,7 @@ class GnuCompiler(GnuLikeCompiler): def get_prelink_args(self, prelink_name: str, obj_list: T.List[str]) -> T.List[str]: return ['-r', '-o', prelink_name] + obj_list - def get_lto_compile_args(self, *, threads: int = 0) -> T.List[str]: + def get_lto_compile_args(self, *, threads: int = 0, mode: str = 'default') -> T.List[str]: if threads == 0: if mesonlib.version_compare(self.version, '>= 10.0'): return ['-flto=auto'] diff --git a/run_unittests.py b/run_unittests.py index aff94c49b..2b8812a7c 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -2864,6 +2864,31 @@ class AllPlatformTests(BasePlatformTests): for e in expected: self.assertIn(e, s['parameters']) + @skip_if_not_base_option('b_lto_mode') + @skip_if_not_base_option('b_lto_threads') + def test_lto_mode(self): + testdir = os.path.join(self.common_test_dir, '6 linkshared') + + env = get_fake_env(testdir, self.builddir, self.prefix) + cc = env.detect_c_compiler(MachineChoice.HOST) + if cc.get_id() != 'clang': + raise unittest.SkipTest('Only clang currently supports thinLTO') + if cc.linker.id not in {'ld.lld', 'ld.gold', 'ld64', 'lld-link'}: + raise unittest.SkipTest('thinLTO requires ld.lld, ld.gold, ld64, or lld-link') + elif is_windows(): + raise unittest.SkipTest('LTO not (yet) supported by windows clang') + + self.init(testdir, extra_args=['-Db_lto=true', '-Db_lto_mode=thin', '-Db_lto_threads=8']) + self.build() + self.run_tests() + + expected = set(cc.get_lto_compile_args(threads=8, mode='thin')) + targets = self.introspect('--targets') + # This assumes all of the targets support lto + for t in targets: + for s in t['target_sources']: + assert expected.issubset(set(s['parameters'])), f'Incorrect values for {t["name"]}' + def test_dist_git(self): if not shutil.which('git'): raise unittest.SkipTest('Git not found')