2023-12-14 03:38:41 +08:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2020-11-22 04:34:08 +08:00
|
|
|
# Copyright 2020 The Meson development team
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
# Copyright © 2023-2024 Intel Corporation
|
2020-11-22 04:34:08 +08:00
|
|
|
|
2022-03-23 08:28:59 +08:00
|
|
|
from __future__ import annotations
|
2020-11-22 04:34:08 +08:00
|
|
|
|
2023-11-21 15:29:20 +08:00
|
|
|
import collections
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
import json
|
2021-08-18 03:37:21 +08:00
|
|
|
import os
|
2020-11-22 04:34:08 +08:00
|
|
|
import pathlib
|
|
|
|
import pickle
|
|
|
|
import re
|
|
|
|
import typing as T
|
|
|
|
|
2022-03-23 08:28:59 +08:00
|
|
|
if T.TYPE_CHECKING:
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
from typing_extensions import Literal, TypedDict, NotRequired
|
2022-03-23 08:28:59 +08:00
|
|
|
from ..backend.ninjabackend import TargetDependencyScannerInfo
|
|
|
|
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
Require = TypedDict(
|
|
|
|
'Require',
|
|
|
|
{
|
|
|
|
'logical-name': str,
|
|
|
|
'compiled-module-path': NotRequired[str],
|
|
|
|
'source-path': NotRequired[str],
|
|
|
|
'unique-on-source-path': NotRequired[bool],
|
|
|
|
'lookup-method': NotRequired[Literal['by-name', 'include-angle', 'include-quote']]
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
Provide = TypedDict(
|
|
|
|
'Provide',
|
|
|
|
{
|
|
|
|
'logical-name': str,
|
|
|
|
'compiled-module-path': NotRequired[str],
|
|
|
|
'source-path': NotRequired[str],
|
|
|
|
'unique-on-source-path': NotRequired[bool],
|
|
|
|
'is-interface': NotRequired[bool],
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
Rule = TypedDict(
|
|
|
|
'Rule',
|
|
|
|
{
|
|
|
|
'primary-output': NotRequired[str],
|
|
|
|
'outputs': NotRequired[T.List[str]],
|
|
|
|
'provides': NotRequired[T.List[Provide]],
|
|
|
|
'requires': NotRequired[T.List[Require]],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
class Description(TypedDict):
|
|
|
|
|
|
|
|
version: int
|
|
|
|
revision: int
|
|
|
|
rules: T.List[Rule]
|
|
|
|
|
|
|
|
|
2021-03-05 06:02:02 +08:00
|
|
|
CPP_IMPORT_RE = re.compile(r'\w*import ([a-zA-Z0-9]+);')
|
|
|
|
CPP_EXPORT_RE = re.compile(r'\w*export module ([a-zA-Z0-9]+);')
|
2020-12-15 00:56:30 +08:00
|
|
|
|
|
|
|
FORTRAN_INCLUDE_PAT = r"^\s*include\s*['\"](\w+\.\w+)['\"]"
|
|
|
|
FORTRAN_MODULE_PAT = r"^\s*\bmodule\b\s+(\w+)\s*(?:!+.*)*$"
|
|
|
|
FORTRAN_SUBMOD_PAT = r"^\s*\bsubmodule\b\s*\((\w+:?\w+)\)\s*(\w+)"
|
|
|
|
FORTRAN_USE_PAT = r"^\s*use,?\s*(?:non_intrinsic)?\s*(?:::)?\s*(\w+)"
|
|
|
|
|
|
|
|
FORTRAN_MODULE_RE = re.compile(FORTRAN_MODULE_PAT, re.IGNORECASE)
|
|
|
|
FORTRAN_SUBMOD_RE = re.compile(FORTRAN_SUBMOD_PAT, re.IGNORECASE)
|
|
|
|
FORTRAN_USE_RE = re.compile(FORTRAN_USE_PAT, re.IGNORECASE)
|
2020-11-22 04:34:08 +08:00
|
|
|
|
|
|
|
class DependencyScanner:
|
2023-12-12 02:01:10 +08:00
|
|
|
def __init__(self, pickle_file: str, outfile: str):
|
2020-11-22 04:34:08 +08:00
|
|
|
with open(pickle_file, 'rb') as pf:
|
2022-03-23 08:28:59 +08:00
|
|
|
self.target_data: TargetDependencyScannerInfo = pickle.load(pf)
|
2020-11-22 04:34:08 +08:00
|
|
|
self.outfile = outfile
|
2023-12-12 02:01:10 +08:00
|
|
|
self.sources = self.target_data.sources
|
2022-03-23 08:28:59 +08:00
|
|
|
self.provided_by: T.Dict[str, str] = {}
|
|
|
|
self.exports: T.Dict[str, str] = {}
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
self.imports: collections.defaultdict[str, T.List[str]] = collections.defaultdict(list)
|
2022-03-23 08:28:59 +08:00
|
|
|
self.sources_with_exports: T.List[str] = []
|
2020-12-15 00:56:30 +08:00
|
|
|
|
2023-11-21 15:56:06 +08:00
|
|
|
def scan_file(self, fname: str, lang: Literal['cpp', 'fortran']) -> None:
|
|
|
|
if lang == 'fortran':
|
2020-12-15 00:56:30 +08:00
|
|
|
self.scan_fortran_file(fname)
|
|
|
|
else:
|
2023-11-21 15:56:06 +08:00
|
|
|
self.scan_cpp_file(fname)
|
2020-12-15 00:56:30 +08:00
|
|
|
|
|
|
|
def scan_fortran_file(self, fname: str) -> None:
|
|
|
|
fpath = pathlib.Path(fname)
|
|
|
|
modules_in_this_file = set()
|
2022-07-07 04:12:43 +08:00
|
|
|
for line in fpath.read_text(encoding='utf-8', errors='ignore').split('\n'):
|
2020-12-15 00:56:30 +08:00
|
|
|
import_match = FORTRAN_USE_RE.match(line)
|
|
|
|
export_match = FORTRAN_MODULE_RE.match(line)
|
|
|
|
submodule_export_match = FORTRAN_SUBMOD_RE.match(line)
|
|
|
|
if import_match:
|
|
|
|
needed = import_match.group(1).lower()
|
|
|
|
# In Fortran you have an using declaration also for the module
|
|
|
|
# you define in the same file. Prevent circular dependencies.
|
|
|
|
if needed not in modules_in_this_file:
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
self.imports[fname].append(needed)
|
2020-12-15 00:56:30 +08:00
|
|
|
if export_match:
|
|
|
|
exported_module = export_match.group(1).lower()
|
2021-09-01 00:55:01 +08:00
|
|
|
assert exported_module not in modules_in_this_file
|
2020-12-15 00:56:30 +08:00
|
|
|
modules_in_this_file.add(exported_module)
|
|
|
|
if exported_module in self.provided_by:
|
2021-03-05 06:16:11 +08:00
|
|
|
raise RuntimeError(f'Multiple files provide module {exported_module}.')
|
2020-12-15 00:56:30 +08:00
|
|
|
self.sources_with_exports.append(fname)
|
|
|
|
self.provided_by[exported_module] = fname
|
|
|
|
self.exports[fname] = exported_module
|
|
|
|
if submodule_export_match:
|
|
|
|
# Store submodule "Foo" "Bar" as "foo:bar".
|
|
|
|
# A submodule declaration can be both an import and an export declaration:
|
|
|
|
#
|
|
|
|
# submodule (a1:a2) a3
|
|
|
|
# - requires a1@a2.smod
|
|
|
|
# - produces a1@a3.smod
|
|
|
|
parent_module_name_full = submodule_export_match.group(1).lower()
|
|
|
|
parent_module_name = parent_module_name_full.split(':')[0]
|
|
|
|
submodule_name = submodule_export_match.group(2).lower()
|
2021-03-05 06:16:11 +08:00
|
|
|
concat_name = f'{parent_module_name}:{submodule_name}'
|
2020-12-15 00:56:30 +08:00
|
|
|
self.sources_with_exports.append(fname)
|
|
|
|
self.provided_by[concat_name] = fname
|
|
|
|
self.exports[fname] = concat_name
|
|
|
|
# Fortran requires that the immediate parent module must be built
|
|
|
|
# before the current one. Thus:
|
|
|
|
#
|
|
|
|
# submodule (parent) parent <- requires parent.mod (really parent.smod, but they are created at the same time)
|
|
|
|
# submodule (a1:a2) a3 <- requires a1@a2.smod
|
|
|
|
#
|
|
|
|
# a3 does not depend on the a1 parent module directly, only transitively.
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
self.imports[fname].append(parent_module_name_full)
|
2020-12-15 00:56:30 +08:00
|
|
|
|
|
|
|
def scan_cpp_file(self, fname: str) -> None:
|
|
|
|
fpath = pathlib.Path(fname)
|
2022-07-07 04:12:43 +08:00
|
|
|
for line in fpath.read_text(encoding='utf-8', errors='ignore').split('\n'):
|
2020-12-15 00:56:30 +08:00
|
|
|
import_match = CPP_IMPORT_RE.match(line)
|
|
|
|
export_match = CPP_EXPORT_RE.match(line)
|
2020-11-22 04:34:08 +08:00
|
|
|
if import_match:
|
|
|
|
needed = import_match.group(1)
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
self.imports[fname].append(needed)
|
2020-11-22 04:34:08 +08:00
|
|
|
if export_match:
|
|
|
|
exported_module = export_match.group(1)
|
|
|
|
if exported_module in self.provided_by:
|
2021-03-05 06:16:11 +08:00
|
|
|
raise RuntimeError(f'Multiple files provide module {exported_module}.')
|
2020-11-22 04:34:08 +08:00
|
|
|
self.sources_with_exports.append(fname)
|
|
|
|
self.provided_by[exported_module] = fname
|
|
|
|
self.exports[fname] = exported_module
|
|
|
|
|
2023-11-21 15:56:06 +08:00
|
|
|
def module_name_for(self, src: str, lang: Literal['cpp', 'fortran']) -> str:
|
|
|
|
if lang == 'fortran':
|
2020-12-15 00:56:30 +08:00
|
|
|
exported = self.exports[src]
|
|
|
|
# Module foo:bar goes to a file name foo@bar.smod
|
|
|
|
# Module Foo goes to a file name foo.mod
|
|
|
|
namebase = exported.replace(':', '@')
|
|
|
|
if ':' in exported:
|
|
|
|
extension = 'smod'
|
|
|
|
else:
|
|
|
|
extension = 'mod'
|
2021-03-05 06:16:11 +08:00
|
|
|
return os.path.join(self.target_data.private_dir, f'{namebase}.{extension}')
|
2023-11-21 15:56:06 +08:00
|
|
|
return '{}.ifc'.format(self.exports[src])
|
2020-11-22 04:34:08 +08:00
|
|
|
|
2020-12-13 00:48:27 +08:00
|
|
|
def scan(self) -> int:
|
2023-11-21 15:56:06 +08:00
|
|
|
for s, lang in self.sources:
|
|
|
|
self.scan_file(s, lang)
|
backend/ninja: use a two step process for dependency scanning
This splits the scanner into two discrete steps, one that scans the
source files, and one that that reads in the dependency information and
produces a dyndep.
The scanner uses the JSON format from
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html,
which is the same format the MSVC and Clang use for C++ modules
scanning. This will allow us to more easily move to using MSVC and
clang-scan-deps when possible.
As an added bonus, this correctly tracks dependencies across TU and
Target boundaries, unlike the previous implementation, which assumed
that if it couldn't find a provider that everything was good, but could
run into issues. Because of that limitation Fortran code had to fully
depend on all of it's dependencies, transitive or not. Now, when using
the dep scanner, we can remove that restriction, allowing more
parallelism.
2024-03-12 03:35:25 +08:00
|
|
|
description: Description = {
|
|
|
|
'version': 1,
|
|
|
|
'revision': 0,
|
|
|
|
'rules': [],
|
|
|
|
}
|
|
|
|
for src, lang in self.sources:
|
|
|
|
rule: Rule = {
|
|
|
|
'primary-output': self.target_data.source2object[src],
|
|
|
|
'requires': [],
|
|
|
|
'provides': [],
|
|
|
|
}
|
|
|
|
if src in self.sources_with_exports:
|
|
|
|
rule['outputs'] = [self.module_name_for(src, lang)]
|
|
|
|
if src in self.imports:
|
|
|
|
for modname in self.imports[src]:
|
|
|
|
provider_src = self.provided_by.get(modname)
|
|
|
|
if provider_src == src:
|
|
|
|
continue
|
|
|
|
rule['requires'].append({
|
|
|
|
'logical-name': modname,
|
|
|
|
})
|
|
|
|
if provider_src:
|
|
|
|
rule['requires'][-1].update({
|
|
|
|
'source-path': provider_src,
|
|
|
|
'compiled-module-path': self.module_name_for(provider_src, lang),
|
|
|
|
})
|
|
|
|
if src in self.exports:
|
|
|
|
modname = self.exports[src]
|
|
|
|
rule['provides'].append({
|
|
|
|
'logical-name': modname,
|
|
|
|
'source-path': src,
|
|
|
|
'compiled-module-path': self.module_name_for(src, lang),
|
|
|
|
})
|
|
|
|
description['rules'].append(rule)
|
|
|
|
|
|
|
|
with open(self.outfile, 'w', encoding='utf-8') as f:
|
|
|
|
json.dump(description, f)
|
|
|
|
|
2020-11-22 04:34:08 +08:00
|
|
|
return 0
|
|
|
|
|
|
|
|
def run(args: T.List[str]) -> int:
|
2023-12-12 02:01:10 +08:00
|
|
|
assert len(args) == 2, 'got wrong number of arguments!'
|
|
|
|
outfile, pickle_file = args
|
|
|
|
scanner = DependencyScanner(pickle_file, outfile)
|
2020-11-22 04:34:08 +08:00
|
|
|
return scanner.scan()
|