From fdc369a235ad41cd318e6a3030bc6c2284bfa16a Mon Sep 17 00:00:00 2001 From: Kristoffer Richardsson Date: Mon, 24 Apr 2023 16:27:27 +0200 Subject: [PATCH] Add dependency checker tool --- .gitignore | 2 + docs/development/dependency_tool.md | 67 +++++ tools/dependency/deplib/graph.py | 396 +++++++++++++++++++++++++ tools/dependency/deplib/visualize.py | 47 +++ tools/dependency/deps.py | 82 +++++ tools/dependency/web/dependencies.html | 55 ++++ 6 files changed, 649 insertions(+) create mode 100644 docs/development/dependency_tool.md create mode 100644 tools/dependency/deplib/graph.py create mode 100644 tools/dependency/deplib/visualize.py create mode 100755 tools/dependency/deps.py create mode 100644 tools/dependency/web/dependencies.html diff --git a/.gitignore b/.gitignore index cca7bcdc33..96545b0665 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ build/ **/*.cmd **/*.a *.elf + +tools/dependency/web/data.js diff --git a/docs/development/dependency_tool.md b/docs/development/dependency_tool.md new file mode 100644 index 0000000000..0dc574c755 --- /dev/null +++ b/docs/development/dependency_tool.md @@ -0,0 +1,67 @@ +--- +title: Dependency tool +page_id: dependency_tool +--- + +There is a tool that can be used to visualize and analyze dependencies in the source file tree. the code for the tool is +located in `tools/dependency` and is called by running the `deps.py` file. + +The tool extracts subsets of the full file tree, based on user input, and either visualize the subset, or count the +number of files. + +The tool uses information from the source files, but also from intermediate files generated by kbuild, for that reason +valid results will only be available after a successful build. The results are based on the current build configuration, +for instance created by `make menuconfig`. Remember to run `make` before using the tool. + +In visualizations, c-files are shown as triangles and h-files as circles. The color is based on the first part of the +path. Hovering over a file shows the full path. A node with dependencies can be highlighted by clicking it, and it can +be moved by dragging. + +## Example uses + +### Visualize dependencies of commander.c + +Use the `-d` flag for finding dependencies and the `-v` for visualizing the result. + +`./tools/dependency/deps.py -d -v src/modules/src/commander.c` + +The tool will first try to find the full target (`src/modules/src/commander.c` in this case) in the file tree, if not +found, it will try to find the target in the file names. This makes it possible to do +`./tools/dependency/deps.py -d -v commander.c` instead. Note: it there would exist two `commander.c` files in the +file tree, at different paths, both of them would be included. + +It is possible to limit the number of levels of dependencies to search for using the `-l` flag. To only show one level +of dependencies, use `./tools/dependency/deps.py -d -l 1 -v commander.c` + +### Visualize all files that include deck.h + +Use the `-u` to find the files that is using a file. + +`./tools/dependency/deps.py -u -v cpx.h` + +### Visualize multiple targets + +More than one target can be used `./tools/dependency/deps.py -d -l 1 -v commander.c cpx.c` will show first level +dependencies to commander.c and cpx.c + +It is also possible to use regex for targets. If the target does not match a full path, or a file name, the tool will +try to interpret it as a regex. For instance, to show all files in src/hal/interface/, use +`./tools/dependency/deps.py -d -l 0 -v src/hal/interface/.*` + +### Visualizing dependencies from one location to another + +Use the `-f` flag. The first target is the "from" location and the rest is the "to" location. To visualize dependencies +from /src/platform to /src/utils, use `./tools/dependency/deps.py -f -v src/platform/.* src/utils/.*` + +To show dependencies form a directory to "all other directories", you can use regex. The following essentially means +find dependencies from "platform" to "anything but platform": +`./tools/dependency/deps.py -f -v src/platform/.* '^src/(?!platform/).*$'` + +### Count files + +The `-t` flag prints the total number of files found, `./tools/dependency/deps.py -d -t src/modules/src/commander.c`. +`-cc` counts the number of c-files and `-hh` counts the number of h-files. + +The `-z` flag returns a non-zero result (fail) if the total file count is not zero. This is useful in build tools to +make sure there are no "backwards" dependencies. For instance, if we want to make sure files in src/utils do not have +dependencies to files outside src/utils, use `./tools/dependency/deps.py -f -z src/utils/.* '^src/(?!utils/).*$'` diff --git a/tools/dependency/deplib/graph.py b/tools/dependency/deplib/graph.py new file mode 100644 index 0000000000..664567d0c1 --- /dev/null +++ b/tools/dependency/deplib/graph.py @@ -0,0 +1,396 @@ +import re +from collections.abc import Iterable +from os import path +from pathlib import Path + + +class _CmdFileReader: + def __init__(self, build_root: str, c_file: str) -> None: + self.c_file = c_file + + dir_name, file_name = path.split(c_file) + # TODO krri handle .cpp files? + file_name_base = file_name[:-2] + self._cmd_file = f'{build_root}/{dir_name}/.{file_name_base}.o.cmd' + self._values = self._read_cmd_file(self._cmd_file) + + dep_key = f'deps_{dir_name}/{file_name_base}.o' + self._dependencies = self._clean_deps(self._values[dep_key].split()) + + def _read_cmd_file(self, cmd_file: str) -> dict[str, str]: + result: dict[str, str] = {} + + with open(cmd_file, 'r', encoding='utf8') as f: + lines = f.readlines() + + key = '' + value = '' + is_reading_value = False + for line in lines: + remaining = line + if not is_reading_value: + if ':=' in line: + parts = line.split(':=') + key = parts[0].strip() + is_reading_value = True + remaining = parts[1] + else: + continue + else: + remaining = line + + if '\\' in remaining: + value += ' ' + remaining.replace('\\', '').strip() + is_reading_value = True + else: + value += ' ' + remaining.strip() + result[key] = value + is_reading_value = False + key = '' + value = '' + + return result + + def _clean_deps(self, deps: list[str]) -> dict[str, str]: + result: dict[str, str] = {} + + for dep in deps: + # Only keep dependencies in our tree + if dep.startswith('../'): + file_name = path.basename(dep) + + # Fix relative path by removing ../ + clean_dep = dep[3:] + + result[file_name] = clean_dep + + return result + + def map_to_dependency(self, include_file) -> None | str: + if include_file in self._dependencies: + return self._dependencies[include_file] + return None + + +class _DependencyNode: + def __init__(self, source_file: str) -> None: + self.source_file = source_file + self.name = path.basename(source_file) + self.is_processed = False + self.children: set['_DependencyNode'] = set() + self.parents: set['_DependencyNode'] = set() + self.is_h_file = source_file.endswith('.h') + self.is_c_file = source_file.endswith('.c') + + def add_child(self, child: "_DependencyNode"): + self.children.add(child) + + def add_parent(self, parent: "_DependencyNode"): + self.parents.add(parent) + + def mark_processed(self) -> None: + self.is_processed = True + + def print(self, level=0) -> None: + for i in range(level): + print(' ', end='') + print(self.name, end='') + if not self.is_processed: + print(" (not processed)") + print() + + +class DependencyGraph: + """ + A DependencyGraph represents the dependencies in the source tree, all files or a subset. The graph is built from + .c and .h files, but also .cmd files generated by kbuild, this ensures that the graph is valid for the current + configuration. It also means that the dependency graph can only be built after a successful make. + """ + def __init__(self, source_root: str = '.', + build_root: str = 'build', + exclude_dirs: Iterable[str] = ('src/lib', 'vendor')) -> None: + """ + Create an empty Dependency graph. + + C-files in excluded directories will never be included. Dependencies to h-files in excluded directories will be + included to the first level, but not further. + + Args: + source_root (str, optional): The root of the file tree. Defaults to '.'. + build_root (str, optional): The root of the build directory generated by kbuild. Defaults to 'build'. + exclude_dirs (Iterable[str], optional): Directories to exclude. Defaults to ('src/lib', 'vendor'). + """ + self._dep_graph: dict[str, _DependencyNode] = {} + self._ignored_files: list[str] = [] + + self._source_root = source_root + self._build_root = build_root + self._exclude_dirs = exclude_dirs + + def add_and_process_c_file(self, c_file: str) -> None: + """ + Add a c-file with dependencies to the graph + + Args: + c_file (str): path to the c-file to add + """ + try: + cmd_reader = _CmdFileReader(self._build_root, c_file) + except FileNotFoundError: + self._ignored_files.append(c_file) + return + + self._add_to_graph(c_file) + self._process_nodes(cmd_reader, self._source_root) + + def add_and_process_dir(self, dir_name: str) -> None: + """ + Add all c-files with dependencies in a directory with sub-directories to the dependency graph. + + Args: + dir_name (str): path to the directory + """ + for p in Path(dir_name).rglob('*.c'): + file_name = str(p) + if self._include_file(file_name): + self.add_and_process_c_file(file_name) + + def print_tree(self, file_name: str) -> None: + """ + Print a dependency tree, starting from a file. + + Note: the same file might be printed multiple times as this is a tree derived from the graph. + + Args: + file_name (str): The file to start from + """ + node = self._dep_graph[file_name] + self._print_node(node) + + def get_file_count(self) -> int: + """ + Get total number of files in the graph + + Returns: + int: the number of files + """ + return len(self._dep_graph) + + def get_c_file_count(self) -> int: + """ + Get the number of c-files in the graph + + Returns: + int: the number of files + """ + c_files = filter(lambda node: node.is_c_file, self._dep_graph.values()) + return len(list(c_files)) + + def get_h_file_count(self) -> int: + """ + Get the number of h-files in the graph + + Returns: + int: the number of files + """ + c_files = filter(lambda node: node.is_h_file, self._dep_graph.values()) + return len(list(c_files)) + + def get_ignored_files(self) -> list[str]: + """ + Get a list of all ignored files + + Returns: + list[str]: List of ignored files + """ + return self._ignored_files + + def find(self, target: str) -> "DependencyGraph": + """ + Find the subset defined by the target + + Args: + target (str): The search string + + Returns: + DependencyGraph: The subset + """ + result = DependencyGraph() + for node in self._find_targets(target): + result._append(node) + return result + + def depends_on(self, target: str, levels: int) -> "DependencyGraph": + """ + Find the subset that target depends on + + Args: + target (str): the target + levels (int): the maximum number of levels to follow. Negative means no limit. + + Returns: + DependencyGraph: _description_ + """ + result = DependencyGraph() + if levels != 0: + for node in self._find_targets(target): + self._append_depends_on(result, node, levels, 1) + return result + + def used_by(self, target: str, levels: int) -> "DependencyGraph": + """ + Find the subset that uses target + + Args: + target (str): the target + levels (int): the maximum number of levels to follow. Negative means no limit. + + Returns: + DependencyGraph: _description_ + """ + result = DependencyGraph() + if levels != 0: + for node in self._find_targets(target): + self._append_used_by(result, node, levels, 1) + return result + + def with_dependency_to(self, other: "DependencyGraph") -> "DependencyGraph": + """ + Select all nodes that has a dependency to other set + + Args: + other (DependencyGraph): The other set + + Returns: + DependencyGraph: The result set + """ + result = DependencyGraph() + + other_set: set[_DependencyNode] = set(other._dep_graph.values()) + + for node in self._dep_graph.values(): + for child in node.children: + if child in other_set: + result._append(node) + result._append(child) + + return result + + def union(self, other: "DependencyGraph"): + """ + Create the union of this and another graph, that is, add the elements of the other graph to this graph. + + Args: + other (DependencyGraph): Another graph + """ + for node in other._dep_graph.values(): + self._append(node) + + def export(self) -> tuple[dict[str, dict[str, str | int | bool]], list[list[str]]]: + nodes: dict[str, dict[str, str | int | bool]] = {} + edges: list[list[str]] = [] + categories: dict[str, int] = {} + + for file_name, node in self._dep_graph.items(): + category = self._get_category(file_name, categories) + nodes[file_name] = { + 'name': node.name, + 'file_name': file_name, + 'is-c-file': node.is_c_file, + 'category': category} + + for child in node.children: + if child.source_file in self._dep_graph: + edges.append([node.source_file, child.source_file]) + + return nodes, edges + + def _find_targets(self, target: str) -> list[_DependencyNode]: + result: list[_DependencyNode] = [] + + if target in self._dep_graph: + result.append(self._dep_graph[target]) + + if len(result) == 0: + for node in self._dep_graph.values(): + if node.name == target: + result.append(node) + + if len(result) == 0: + for node in self._dep_graph.values(): + if re.fullmatch(target, node.source_file): + result.append(node) + + return result + + def _append_depends_on(self, result: "DependencyGraph", node: _DependencyNode, max_level: int, curr_level: int): + for child in node.children: + if result._append(child): + if curr_level < max_level or max_level < 0: + self._append_depends_on(result, child, max_level, curr_level + 1) + + def _append_used_by(self, result: "DependencyGraph", node: _DependencyNode, max_level: int, curr_level: int): + for parent in node.parents: + if result._append(parent): + if curr_level < max_level or max_level < 0: + self._append_used_by(result, parent, max_level, curr_level + 1) + + def _get_category(self, source_file: str, categories: dict[str, int]) -> int: + result = 0 + parts = source_file.split('/') + if len(parts) >= 2: + if parts[0] == 'src': + fragment = parts[1] + + if fragment not in categories: + categories[fragment] = len(categories) + 1 + result = categories[fragment] + return result + + def _print_node(self, node: _DependencyNode, level=0) -> None: + node.print(level) + for child in node.children: + self._print_node(child, level + 1) + + def _add_to_graph(self, source_file: str) -> _DependencyNode: + if source_file not in self._dep_graph: + self._dep_graph[source_file] = _DependencyNode(source_file) + return self._dep_graph[source_file] + + def _append(self, node: _DependencyNode) -> bool: + if node.source_file not in self._dep_graph: + self._dep_graph[node.source_file] = node + return True + return False + + def _process_nodes(self, cmd_reader: _CmdFileReader, source_root: str) -> None: + while True: + to_process = list(filter(lambda node: not node.is_processed, self._dep_graph.values())) + for node in to_process: + self._process_node(cmd_reader, node, source_root) + if len(to_process) == 0: + break + + def _process_node(self, cmd_reader: _CmdFileReader, node: _DependencyNode, source_root: str) -> None: + if self._include_file(node.source_file): + include_files = self._parse_includes(f'{source_root}/{node.source_file}') + for include_file in include_files: + source_file = cmd_reader.map_to_dependency(include_file) + if source_file: + child = self._add_to_graph(source_file) + node.add_child(child) + child.add_parent(node) + + node.mark_processed() + + def _parse_includes(self, source_file: str) -> list[str]: + with open(source_file, 'r', encoding="utf8") as f: + contents = f.read() + pattern = '#include +(?:"|<)(.*)(?:"|>)' + return re.findall(pattern, contents) + + def _include_file(self, source_file: str): + for exclude in self._exclude_dirs: + if source_file.startswith(exclude): + return False + return True diff --git a/tools/dependency/deplib/visualize.py b/tools/dependency/deplib/visualize.py new file mode 100644 index 0000000000..e606ccc080 --- /dev/null +++ b/tools/dependency/deplib/visualize.py @@ -0,0 +1,47 @@ + +import json +import os +import webbrowser + + +colors = [ + 0xaa0000, + 0x00aa00, + 0x0000aa, + 0x00aaaa, + 0xaa00aa, + 0xaaaa00, + 0xaa6600, + 0xaa0066, + 0x00aa66, +] + + +def get_color(category: int) -> str: + base = 0x808080 + if category > 0 and category < (len(colors) + 1): + base = colors[category - 1] + + return f'#{base:06x}' + + +def render(nodes: dict[str, dict[str, str | int | bool]], edges: list[list[str]]): + vis_nodes: list[dict[str, str]] = [] + for file_name, data in nodes.items(): + color = get_color(int(data['category'])) + + shape = 'dot' + if data['is-c-file']: + shape = 'triangle' + + vis_nodes.append({'id': file_name, 'label': str(data['name']), 'title': file_name, 'shape': shape, 'color': color}) + + vis_edges = [] + for edge in edges: + vis_edges.append({"from": edge[0], "to": edge[1], "arrows": "to"}) + + with open(os.path.realpath('tools/dependency/web/data.js'), 'w', encoding='utf8') as f: + f.write('let nodes = new vis.DataSet(' + json.dumps(vis_nodes) + ');\n') + f.write('let edges = new vis.DataSet(' + json.dumps(vis_edges) + ');\n') + + webbrowser.open('file://' + os.path.realpath('tools/dependency/web/dependencies.html')) diff --git a/tools/dependency/deps.py b/tools/dependency/deps.py new file mode 100755 index 0000000000..d1da3b986c --- /dev/null +++ b/tools/dependency/deps.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +import argparse +import sys +from deplib.graph import DependencyGraph +import deplib.visualize as visualize + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--print', help='Print dependencies for a file.') + + parser.add_argument('-d', '--dependson', help='Select files that the target(s) depends on', action='store_true') + parser.add_argument('-u', '--usedby', help='Select files that are used by the target(s)', action='store_true') + parser.add_argument('-l', '--levels', help='The number of levels of dependencies to add. Negative numbers are ' + + 'interpreted as all levels.', default=-1) + parser.add_argument('-f', '--from-to', help='Select files with dependencies from target_1 to target_2. ' + + 'Example usage, to find all dependencies from utils to other directories, use targets: ' + + "src/utils/.* '^src/(?!utils/).*$'", action='store_true') + + parser.add_argument('-t', '--total', help='Output the total number of files in the selected set', + action='store_true') + parser.add_argument('-cc', '--cfiles', help='Output the number of c files in the selected set', action='store_true') + parser.add_argument('-hh', '--hfiles', help='Output the number of h files in the selected set', action='store_true') + parser.add_argument('-z', '--zero', help='Assert that the total count is zero, fail if not', action='store_true') + + parser.add_argument('-v', '--visualize', help='Visualize the selected set in a web browser', action='store_true') + parser.add_argument('-i', '--ignored', help='Print files that were ignored', action='store_true') + + parser.add_argument('targets', help='The files to operate on. The full graph is selected by default.', + nargs=argparse.REMAINDER) + + args = parser.parse_args() + + graph = DependencyGraph() + graph.add_and_process_dir('src') + + subset = DependencyGraph() + if args.from_to: + if len(args.targets) < 2: + print("Error: must have 2 or more targets") + sys.exit(1) + + from_set = graph.find(args.targets[0]) + to_set = DependencyGraph() + for target in args.targets[1:]: + to_set.union(graph.find(target)) + subset = from_set.with_dependency_to(to_set) + else: + for target in args.targets: + subset.union(graph.find(target)) + + if args.dependson: + for target in args.targets: + subset.union(graph.depends_on(target, int(args.levels))) + + if args.usedby: + for target in args.targets: + subset.union(graph.used_by(target, int(args.levels))) + + if args.ignored: + print('Ignored files:') + print(graph.get_ignored_files()) + + if args.total: + print(subset.get_file_count()) + + if args.cfiles: + print(subset.get_c_file_count()) + + if args.hfiles: + print(subset.get_h_file_count()) + + if args.visualize: + nodes, edges = subset.export() + visualize.render(nodes, edges) + + if args.zero: + count = subset.get_file_count() + if count != 0: + print(f'Error: file count is not 0 ({count})', file=sys.stderr) + sys.exit(1) diff --git a/tools/dependency/web/dependencies.html b/tools/dependency/web/dependencies.html new file mode 100644 index 0000000000..d7d56fe9fe --- /dev/null +++ b/tools/dependency/web/dependencies.html @@ -0,0 +1,55 @@ + + + + + + + + + + +
+
+
+ + + + +