From 616fb0d750555690d7bc6b080685185d3d434ae4 Mon Sep 17 00:00:00 2001 From: Weizhong Xia Date: Tue, 13 Aug 2024 17:17:57 +0000 Subject: [PATCH 1/2] Allow patially update to wpt manifest To run WPTs locally, developers usually only run a few of tests each time. Updating WPT manifest for the whole WPT Repo is not necessary and took ~3 seconds on a performant develop machine. And this can take forever if the developer is using a remote file system. This change allows updating WPT manifest for tests only intended to be run. This brings down the test time for a few tests to ~0.5 seconds. Additional work is needed to plug in this into the test runner. --- tools/manifest/manifest.py | 7 ++++--- tools/manifest/spec.py | 2 +- tools/manifest/update.py | 9 +++++++-- tools/manifest/vcs.py | 27 ++++++++++++++++++++++----- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/tools/manifest/manifest.py b/tools/manifest/manifest.py index e04872c6d53765..faa6ac256e04f5 100644 --- a/tools/manifest/manifest.py +++ b/tools/manifest/manifest.py @@ -3,8 +3,8 @@ from copy import deepcopy from logging import Logger from multiprocessing import Pool -from typing import (Any, Callable, Container, Dict, IO, Iterator, Iterable, Optional, Set, Text, Tuple, Type, - Union) +from typing import (Any, Callable, Container, Dict, IO, Iterator, Iterable, List, Optional, Set, Text, + Tuple, Type, Union) from . import jsonlib from . import vcs @@ -363,6 +363,7 @@ def load_and_update(tests_root: Text, url_base: Text, update: bool = True, rebuild: bool = False, + subdirs_to_update: Optional[List[Text]] = None, metadata_path: Optional[Text] = None, cache_root: Optional[Text] = None, working_copy: bool = True, @@ -401,7 +402,7 @@ def load_and_update(tests_root: Text, for retry in range(2): try: tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root, - working_copy, rebuild) + subdirs_to_update, working_copy, rebuild) changed = manifest.update(tree, parallel) break except InvalidCacheError: diff --git a/tools/manifest/spec.py b/tools/manifest/spec.py index 5404e2ac25e793..0ef0389dabdab5 100644 --- a/tools/manifest/spec.py +++ b/tools/manifest/spec.py @@ -28,7 +28,7 @@ def update_spec(tests_root: Text, logger.info("Updating SPEC_MANIFEST") try: tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root, - working_copy, True) + None, working_copy, True) changed = manifest.update(tree, parallel, compute_manifest_spec_items) except InvalidCacheError: logger.error("Manifest cache in spec.py was invalid.") diff --git a/tools/manifest/update.py b/tools/manifest/update.py index 72cb3b83b722c1..ef7eb047134e45 100755 --- a/tools/manifest/update.py +++ b/tools/manifest/update.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import argparse import os -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, List, Optional, Text, TYPE_CHECKING from . import manifest from . import vcs @@ -23,6 +23,7 @@ def update(tests_root: str, manifest_path: Optional[str] = None, working_copy: bool = True, cache_root: Optional[str] = None, + subdirs_to_update: Optional[List[Text]] = None, rebuild: bool = False, parallel: bool = True ) -> bool: @@ -30,7 +31,7 @@ def update(tests_root: str, logger.info("Updating manifest") tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root, - working_copy, rebuild) + subdirs_to_update, working_copy, rebuild) return manifest.update(tree, parallel) @@ -45,6 +46,7 @@ def update_from_cli(**kwargs: Any) -> None: manifest.load_and_update(tests_root, path, kwargs["url_base"], + subdirs_to_update=kwargs['tests'], update=True, rebuild=kwargs["rebuild"], cache_root=kwargs["cache_root"], @@ -79,6 +81,9 @@ def create_parser() -> argparse.ArgumentParser: parser.add_argument( "--no-parallel", dest="parallel", action="store_false", help="Do not parallelize building the manifest") + parser.add_argument('tests', + nargs='*', + help='Paths of test files or directories to update.') return parser diff --git a/tools/manifest/vcs.py b/tools/manifest/vcs.py index 7b6b73d8779682..aac71018e2e80c 100644 --- a/tools/manifest/vcs.py +++ b/tools/manifest/vcs.py @@ -24,6 +24,7 @@ def get_tree(tests_root: Text, manifest: "Manifest", manifest_path: Optional[Text], cache_root: Optional[Text], + subdirs_to_update: Optional[List[Text]], working_copy: bool = True, rebuild: bool = False) -> "FileSystem": tree = None @@ -43,7 +44,9 @@ def get_tree(tests_root: Text, manifest.url_base, manifest_path=manifest_path, cache_path=cache_root, - rebuild=rebuild) + subdirs_to_update=subdirs_to_update, + rebuild=rebuild, + ) return tree @@ -91,10 +94,12 @@ def __init__(self, tests_root: Text, url_base: Text, cache_path: Optional[Text], + subdirs_to_update: Optional[List[Text]] = None, manifest_path: Optional[Text] = None, rebuild: bool = False) -> None: self.tests_root = tests_root self.url_base = url_base + self.subdirs_to_update = subdirs_to_update or [''] self.ignore_cache = None self.mtime_cache = None tests_root_bytes = tests_root.encode("utf8") @@ -111,15 +116,27 @@ def __init__(self, def __iter__(self) -> Iterator[Tuple[Text, Optional[Text], bool]]: mtime_cache = self.mtime_cache - for dirpath, dirnames, filenames in self.path_filter( - walk(self.tests_root.encode("utf8"))): - for filename, path_stat in filenames: - path = os.path.join(dirpath, filename).decode("utf8") + for subdir in self.subdirs_to_update: + path = os.path.join(self.tests_root, subdir) + if os.path.isfile(path): + path_stat = os.stat(path) + path = subdir if mtime_cache is None or mtime_cache.updated(path, path_stat): file_hash = self.hash_cache.get(path, None) yield path, file_hash, True else: yield path, None, False + elif os.path.isdir(path): + for dirpath, dirnames, filenames in self.path_filter( + walk(path.encode("utf8"))): + for filename, path_stat in filenames: + path = os.path.join(subdir, + os.path.join(dirpath, filename).decode("utf8")) + if mtime_cache is None or mtime_cache.updated(path, path_stat): + file_hash = self.hash_cache.get(path, None) + yield os.path.join(subdir, path), file_hash, True + else: + yield os.path.join(subdir, path), None, False def dump_caches(self) -> None: for cache in [self.mtime_cache, self.ignore_cache]: From f367c07cd10ff1eef730ab430fed927925a91274 Mon Sep 17 00:00:00 2001 From: Weizhong Xia Date: Wed, 13 Nov 2024 20:02:21 +0000 Subject: [PATCH 2/2] update per review comments --- tools/manifest/manifest.py | 4 ++-- tools/manifest/update.py | 17 +++++++++++++---- tools/manifest/vcs.py | 38 +++++++++++++++++++------------------- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/tools/manifest/manifest.py b/tools/manifest/manifest.py index faa6ac256e04f5..c4eca5f26eb77c 100644 --- a/tools/manifest/manifest.py +++ b/tools/manifest/manifest.py @@ -363,7 +363,7 @@ def load_and_update(tests_root: Text, url_base: Text, update: bool = True, rebuild: bool = False, - subdirs_to_update: Optional[List[Text]] = None, + paths_to_update: Optional[List[Text]] = None, metadata_path: Optional[Text] = None, cache_root: Optional[Text] = None, working_copy: bool = True, @@ -402,7 +402,7 @@ def load_and_update(tests_root: Text, for retry in range(2): try: tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root, - subdirs_to_update, working_copy, rebuild) + paths_to_update, working_copy, rebuild) changed = manifest.update(tree, parallel) break except InvalidCacheError: diff --git a/tools/manifest/update.py b/tools/manifest/update.py index ef7eb047134e45..ef6846380ec3b9 100755 --- a/tools/manifest/update.py +++ b/tools/manifest/update.py @@ -23,7 +23,7 @@ def update(tests_root: str, manifest_path: Optional[str] = None, working_copy: bool = True, cache_root: Optional[str] = None, - subdirs_to_update: Optional[List[Text]] = None, + paths_to_update: Optional[List[Text]] = None, rebuild: bool = False, parallel: bool = True ) -> bool: @@ -31,7 +31,7 @@ def update(tests_root: str, logger.info("Updating manifest") tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root, - subdirs_to_update, working_copy, rebuild) + paths_to_update, working_copy, rebuild) return manifest.update(tree, parallel) @@ -43,10 +43,17 @@ def update_from_cli(**kwargs: Any) -> None: if not kwargs["rebuild"] and kwargs["download"]: download_from_github(path, tests_root) + paths_to_update = [] + for path_to_update in kwargs["tests"]: + if path_to_update.startswith(tests_root): + paths_to_update.append(os.path.relpath(path_to_update, tests_root)) + else: + logger.warning(f"{path_to_update} is not a WPT path") + manifest.load_and_update(tests_root, path, kwargs["url_base"], - subdirs_to_update=kwargs['tests'], + paths_to_update=paths_to_update, update=True, rebuild=kwargs["rebuild"], cache_root=kwargs["cache_root"], @@ -82,8 +89,10 @@ def create_parser() -> argparse.ArgumentParser: "--no-parallel", dest="parallel", action="store_false", help="Do not parallelize building the manifest") parser.add_argument('tests', + type=abs_path, nargs='*', - help='Paths of test files or directories to update.') + help=('Test files or directories to update. ' + 'Omit to update all items under the test root.')) return parser diff --git a/tools/manifest/vcs.py b/tools/manifest/vcs.py index aac71018e2e80c..02e1f56df63ab7 100644 --- a/tools/manifest/vcs.py +++ b/tools/manifest/vcs.py @@ -24,7 +24,7 @@ def get_tree(tests_root: Text, manifest: "Manifest", manifest_path: Optional[Text], cache_root: Optional[Text], - subdirs_to_update: Optional[List[Text]], + paths_to_update: Optional[List[Text]], working_copy: bool = True, rebuild: bool = False) -> "FileSystem": tree = None @@ -44,7 +44,7 @@ def get_tree(tests_root: Text, manifest.url_base, manifest_path=manifest_path, cache_path=cache_root, - subdirs_to_update=subdirs_to_update, + paths_to_update=paths_to_update, rebuild=rebuild, ) return tree @@ -94,12 +94,12 @@ def __init__(self, tests_root: Text, url_base: Text, cache_path: Optional[Text], - subdirs_to_update: Optional[List[Text]] = None, + paths_to_update: Optional[List[Text]] = None, manifest_path: Optional[Text] = None, rebuild: bool = False) -> None: self.tests_root = tests_root self.url_base = url_base - self.subdirs_to_update = subdirs_to_update or [''] + self.paths_to_update = paths_to_update or [''] self.ignore_cache = None self.mtime_cache = None tests_root_bytes = tests_root.encode("utf8") @@ -114,29 +114,29 @@ def __init__(self, git = GitHasher(tests_root) self.hash_cache = git.hash_cache() - def __iter__(self) -> Iterator[Tuple[Text, Optional[Text], bool]]: + def _make_file_info(self, + path: Text, + path_stat: os.stat_result) -> Tuple[Text, Optional[Text], bool]: mtime_cache = self.mtime_cache - for subdir in self.subdirs_to_update: - path = os.path.join(self.tests_root, subdir) + if mtime_cache is None or mtime_cache.updated(path, path_stat): + file_hash = self.hash_cache.get(path, None) + return path, file_hash, True + else: + return path, None, False + + def __iter__(self) -> Iterator[Tuple[Text, Optional[Text], bool]]: + for path_to_update in self.paths_to_update: + path = os.path.join(self.tests_root, path_to_update) if os.path.isfile(path): path_stat = os.stat(path) - path = subdir - if mtime_cache is None or mtime_cache.updated(path, path_stat): - file_hash = self.hash_cache.get(path, None) - yield path, file_hash, True - else: - yield path, None, False + yield self._make_file_info(path_to_update, path_stat) elif os.path.isdir(path): for dirpath, dirnames, filenames in self.path_filter( walk(path.encode("utf8"))): for filename, path_stat in filenames: - path = os.path.join(subdir, + path = os.path.join(path_to_update, os.path.join(dirpath, filename).decode("utf8")) - if mtime_cache is None or mtime_cache.updated(path, path_stat): - file_hash = self.hash_cache.get(path, None) - yield os.path.join(subdir, path), file_hash, True - else: - yield os.path.join(subdir, path), None, False + yield self._make_file_info(path, path_stat) def dump_caches(self) -> None: for cache in [self.mtime_cache, self.ignore_cache]: