Merge pull request #13021 from dcbaker/submit/depscanner-fixes-and-speedups

Cleanups, fixes, and speedups for the depscanner
2024-03-29 23:54:06 +02:00 · 2024-03-29 23:54:06 +02:00 · 516a485136
parent 27caa765ff 2171a017be
commit 516a485136
2 changed files with 81 additions and 83 deletions
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2012-2017 The Meson development team
+# Copyright © 2023 Intel Corporation

 from __future__ import annotations

@ -134,10 +135,23 @@ Please report this error with a test case to the Meson bug tracker.'''
        raise MesonException(errmsg)
    return quote_re.sub(r'$\g<0>', text)

+
+@dataclass
 class TargetDependencyScannerInfo:
-    def __init__(self, private_dir: str, source2object: T.Dict[str, str]):
-        self.private_dir = private_dir
-        self.source2object = source2object
+
+    """Information passed to the depscanner about a target.
+
+    :param private_dir: The private scratch directory for the target.
+    :param source2object: A mapping of source file names to the objects that
+        will be created from them.
+    :param sources: a list of sources mapping them to the language rules to use
+        to scan them.
+    """
+
+    private_dir: str
+    source2object: T.Dict[str, str]
+    sources: T.List[T.Tuple[str, Literal['cpp', 'fortran']]]
+

@unique
 class Quoting(Enum):
@ -480,6 +494,7 @@ class NinjaBackend(backends.Backend):
        self.created_llvm_ir_rule = PerMachine(False, False)
        self.rust_crates: T.Dict[str, RustCrate] = {}
        self.implicit_meson_outs = []
+        self._uses_dyndeps = False

    def create_phony_target(self, dummy_outfile: str, rulename: str, phony_infilename: str) -> NinjaBuildElement:
        '''
@ -655,7 +670,8 @@ class NinjaBackend(backends.Backend):
        os.replace(tempfilename, outfilename)
        mlog.cmd_ci_include(outfilename)  # For CI debugging
        # Refresh Ninja's caches. https://github.com/ninja-build/ninja/pull/1685
-        if mesonlib.version_compare(self.ninja_version, '>=1.10.0') and os.path.exists(os.path.join(self.environment.build_dir, '.ninja_log')):
+        # Cannot use when running with dyndeps: https://github.com/ninja-build/ninja/issues/1952
+        if mesonlib.version_compare(self.ninja_version, '>=1.10.0') and os.path.exists(os.path.join(self.environment.build_dir, '.ninja_log')) and not self._uses_dyndeps:
            subprocess.call(self.ninja_command + ['-t', 'restat'], cwd=self.environment.build_dir)
            subprocess.call(self.ninja_command + ['-t', 'cleandead'], cwd=self.environment.build_dir)
        self.generate_compdb()
@ -844,8 +860,8 @@ class NinjaBackend(backends.Backend):
            self.generate_custom_target(target)
        if isinstance(target, build.RunTarget):
            self.generate_run_target(target)
-        compiled_sources = []
-        source2object = {}
+        compiled_sources: T.List[str] = []
+        source2object: T.Dict[str, str] = {}
        name = target.get_id()
        if name in self.processed_targets:
            return
@ -928,7 +944,7 @@ class NinjaBackend(backends.Backend):
        # this target's sources (generated sources and preexisting sources).
        # This will be set as dependencies of all the target's sources. At the
        # same time, also deal with generated sources that need to be compiled.
-        generated_source_files = []
+        generated_source_files: T.List[File] = []
        for rel_src in generated_sources.keys():
            raw_src = File.from_built_relative(rel_src)
            if self.environment.is_source(rel_src):
@ -1073,48 +1089,53 @@ class NinjaBackend(backends.Backend):
            return False
        return True

-    def generate_dependency_scan_target(self, target: build.BuildTarget, compiled_sources, source2object, generated_source_files: T.List[mesonlib.File],
+    def generate_dependency_scan_target(self, target: build.BuildTarget,
+                                        compiled_sources: T.List[str],
+                                        source2object: T.Dict[str, str],
+                                        generated_source_files: T.List[mesonlib.File],
                                        object_deps: T.List['mesonlib.FileOrString']) -> None:
        if not self.should_use_dyndeps_for_target(target):
            return
+        self._uses_dyndeps = True
        depscan_file = self.get_dep_scan_file_for(target)
        pickle_base = target.name + '.dat'
        pickle_file = os.path.join(self.get_target_private_dir(target), pickle_base).replace('\\', '/')
        pickle_abs = os.path.join(self.get_target_private_dir_abs(target), pickle_base).replace('\\', '/')
-        json_abs = os.path.join(self.get_target_private_dir_abs(target), f'{target.name}-deps.json').replace('\\', '/')
        rule_name = 'depscan'
-        scan_sources = self.select_sources_to_scan(compiled_sources)
+        scan_sources = list(self.select_sources_to_scan(compiled_sources))

-        # Dump the sources as a json list. This avoids potential problems where
-        # the number of sources passed to depscan exceeds the limit imposed by
-        # the OS.
-        with open(json_abs, 'w', encoding='utf-8') as f:
-            json.dump(scan_sources, f)
-        elem = NinjaBuildElement(self.all_outputs, depscan_file, rule_name, json_abs)
-        elem.add_item('picklefile', pickle_file)
+        scaninfo = TargetDependencyScannerInfo(
+            self.get_target_private_dir(target), source2object, scan_sources)
+
+        write = True
+        if os.path.exists(pickle_abs):
+            with open(pickle_abs, 'rb') as p:
+                old = pickle.load(p)
+            write = old != scaninfo
+
+        if write:
+            with open(pickle_abs, 'wb') as p:
+                pickle.dump(scaninfo, p)
+
+        elem = NinjaBuildElement(self.all_outputs, depscan_file, rule_name, pickle_file)
        # Add any generated outputs to the order deps of the scan target, so
        # that those sources are present
        for g in generated_source_files:
            elem.orderdeps.add(g.relative_name())
        elem.orderdeps.update(object_deps)
-        scaninfo = TargetDependencyScannerInfo(self.get_target_private_dir(target), source2object)
-        with open(pickle_abs, 'wb') as p:
-            pickle.dump(scaninfo, p)
        self.add_build(elem)

-    def select_sources_to_scan(self, compiled_sources):
+    def select_sources_to_scan(self, compiled_sources: T.List[str]
+                               ) -> T.Iterable[T.Tuple[str, Literal['cpp', 'fortran']]]:
        # in practice pick up C++ and Fortran files. If some other language
        # requires scanning (possibly Java to deal with inner class files)
        # then add them here.
-        all_suffixes = set(compilers.lang_suffixes['cpp']) | set(compilers.lang_suffixes['fortran'])
-        selected_sources = []
        for source in compiled_sources:
            ext = os.path.splitext(source)[1][1:]
-            if ext != 'C':
-                ext = ext.lower()
-            if ext in all_suffixes:
-                selected_sources.append(source)
-        return selected_sources
+            if ext.lower() in compilers.lang_suffixes['cpp'] or ext == 'C':
+                yield source, 'cpp'
+            elif ext.lower() in compilers.lang_suffixes['fortran']:
+                yield source, 'fortran'

    def process_target_dependencies(self, target):
        for t in target.get_dependencies():
@ -2752,7 +2773,7 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
    def get_link_debugfile_args(self, linker, target):
        return linker.get_link_debugfile_args(self.get_target_debug_filename(target))

-    def generate_llvm_ir_compile(self, target, src):
+    def generate_llvm_ir_compile(self, target, src: mesonlib.FileOrString):
        base_proxy = target.get_options()
        compiler = get_compiler_for_source(target.compilers.values(), src)
        commands = compiler.compiler_args()
@ -2771,10 +2792,11 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
        rel_obj = os.path.join(self.get_target_private_dir(target), obj_basename)
        rel_obj += '.' + self.environment.machines[target.for_machine].get_object_suffix()
        commands += self.get_compile_debugfile_args(compiler, target, rel_obj)
-        if isinstance(src, File) and src.is_built:
-            rel_src = src.fname
-        elif isinstance(src, File):
-            rel_src = src.rel_to_builddir(self.build_to_src)
+        if isinstance(src, File):
+            if src.is_built:
+                rel_src = src.fname
+            else:
+                rel_src = src.rel_to_builddir(self.build_to_src)
        else:
            raise InvalidArguments(f'Invalid source type: {src!r}')
        # Write the Ninja build command
@ -2913,7 +2935,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
                                is_generated: bool = False, header_deps=None,
                                order_deps: T.Optional[T.List['mesonlib.FileOrString']] = None,
                                extra_args: T.Optional[T.List[str]] = None,
-                                unity_sources: T.Optional[T.List[mesonlib.FileOrString]] = None) -> None:
+                                unity_sources: T.Optional[T.List[mesonlib.FileOrString]] = None,
+                                ) -> T.Tuple[str, str]:
        """
        Compiles C/C++, ObjC/ObjC++, Fortran, and D sources
        """
--- a/mesonbuild/scripts/depscan.py
+++ b/mesonbuild/scripts/depscan.py
@ -1,20 +1,20 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2020 The Meson development team
+# Copyright © 2023 Intel Corporation

 from __future__ import annotations

-import json
+import collections
 import os
 import pathlib
 import pickle
 import re
-import sys
 import typing as T

 from ..backend.ninjabackend import ninja_quote
-from ..compilers.compilers import lang_suffixes

 if T.TYPE_CHECKING:
+    from typing_extensions import Literal
    from ..backend.ninjabackend import TargetDependencyScannerInfo

 CPP_IMPORT_RE = re.compile(r'\w*import ([a-zA-Z0-9]+);')
@ -30,26 +30,21 @@ FORTRAN_SUBMOD_RE = re.compile(FORTRAN_SUBMOD_PAT, re.IGNORECASE)
 FORTRAN_USE_RE = re.compile(FORTRAN_USE_PAT, re.IGNORECASE)

 class DependencyScanner:
-    def __init__(self, pickle_file: str, outfile: str, sources: T.List[str]):
+    def __init__(self, pickle_file: str, outfile: str):
        with open(pickle_file, 'rb') as pf:
            self.target_data: TargetDependencyScannerInfo = pickle.load(pf)
        self.outfile = outfile
-        self.sources = sources
+        self.sources = self.target_data.sources
        self.provided_by: T.Dict[str, str] = {}
        self.exports: T.Dict[str, str] = {}
-        self.needs: T.Dict[str, T.List[str]] = {}
+        self.needs: collections.defaultdict[str, T.List[str]] = collections.defaultdict(list)
        self.sources_with_exports: T.List[str] = []

-    def scan_file(self, fname: str) -> None:
-        suffix = os.path.splitext(fname)[1][1:]
-        if suffix != 'C':
-            suffix = suffix.lower()
-        if suffix in lang_suffixes['fortran']:
+    def scan_file(self, fname: str, lang: Literal['cpp', 'fortran']) -> None:
+        if lang == 'fortran':
            self.scan_fortran_file(fname)
-        elif suffix in lang_suffixes['cpp']:
-            self.scan_cpp_file(fname)
        else:
-            sys.exit(f'Can not scan files with suffix .{suffix}.')
+            self.scan_cpp_file(fname)

    def scan_fortran_file(self, fname: str) -> None:
        fpath = pathlib.Path(fname)
@ -63,10 +58,7 @@ class DependencyScanner:
                # In Fortran you have an using declaration also for the module
                # you define in the same file. Prevent circular dependencies.
                if needed not in modules_in_this_file:
-                    if fname in self.needs:
-                        self.needs[fname].append(needed)
-                    else:
-                        self.needs[fname] = [needed]
+                    self.needs[fname].append(needed)
            if export_match:
                exported_module = export_match.group(1).lower()
                assert exported_module not in modules_in_this_file
@ -97,10 +89,7 @@ class DependencyScanner:
                # submodule (a1:a2) a3        <- requires a1@a2.smod
                #
                # a3 does not depend on the a1 parent module directly, only transitively.
-                if fname in self.needs:
-                    self.needs[fname].append(parent_module_name_full)
-                else:
-                    self.needs[fname] = [parent_module_name_full]
+                self.needs[fname].append(parent_module_name_full)

    def scan_cpp_file(self, fname: str) -> None:
        fpath = pathlib.Path(fname)
@ -109,10 +98,7 @@ class DependencyScanner:
            export_match = CPP_EXPORT_RE.match(line)
            if import_match:
                needed = import_match.group(1)
-                if fname in self.needs:
-                    self.needs[fname].append(needed)
-                else:
-                    self.needs[fname] = [needed]
+                self.needs[fname].append(needed)
            if export_match:
                exported_module = export_match.group(1)
                if exported_module in self.provided_by:
@ -121,14 +107,8 @@ class DependencyScanner:
                self.provided_by[exported_module] = fname
                self.exports[fname] = exported_module

-    def objname_for(self, src: str) -> str:
-        objname = self.target_data.source2object[src]
-        assert isinstance(objname, str)
-        return objname
-
-    def module_name_for(self, src: str) -> str:
-        suffix = os.path.splitext(src)[1][1:].lower()
-        if suffix in lang_suffixes['fortran']:
+    def module_name_for(self, src: str, lang: Literal['cpp', 'fortran']) -> str:
+        if lang == 'fortran':
            exported = self.exports[src]
            # Module foo:bar goes to a file name foo@bar.smod
            # Module Foo goes to a file name foo.mod
@ -138,23 +118,20 @@ class DependencyScanner:
            else:
                extension = 'mod'
            return os.path.join(self.target_data.private_dir, f'{namebase}.{extension}')
-        elif suffix in lang_suffixes['cpp']:
-            return '{}.ifc'.format(self.exports[src])
-        else:
-            raise RuntimeError('Unreachable code.')
+        return '{}.ifc'.format(self.exports[src])

    def scan(self) -> int:
-        for s in self.sources:
-            self.scan_file(s)
+        for s, lang in self.sources:
+            self.scan_file(s, lang)
        with open(self.outfile, 'w', encoding='utf-8') as ofile:
            ofile.write('ninja_dyndep_version = 1\n')
-            for src in self.sources:
-                objfilename = self.objname_for(src)
+            for src, lang in self.sources:
+                objfilename = self.target_data.source2object[src]
                mods_and_submods_needed = []
                module_files_generated = []
                module_files_needed = []
                if src in self.sources_with_exports:
-                    module_files_generated.append(self.module_name_for(src))
+                    module_files_generated.append(self.module_name_for(src, lang))
                if src in self.needs:
                    for modname in self.needs[src]:
                        if modname not in self.provided_by:
@ -167,7 +144,7 @@ class DependencyScanner:

                for modname in mods_and_submods_needed:
                    provider_src = self.provided_by[modname]
-                    provider_modfile = self.module_name_for(provider_src)
+                    provider_modfile = self.module_name_for(provider_src, lang)
                    # Prune self-dependencies
                    if provider_src != src:
                        module_files_needed.append(provider_modfile)
@ -190,9 +167,7 @@ class DependencyScanner:
        return 0

 def run(args: T.List[str]) -> int:
-    assert len(args) == 3, 'got wrong number of arguments!'
-    pickle_file, outfile, jsonfile = args
-    with open(jsonfile, encoding='utf-8') as f:
-        sources = json.load(f)
-    scanner = DependencyScanner(pickle_file, outfile, sources)
+    assert len(args) == 2, 'got wrong number of arguments!'
+    outfile, pickle_file = args
+    scanner = DependencyScanner(pickle_file, outfile)
    return scanner.scan()