From 6b9ef5113a2157ab8a4aebde6e6dac77beba319c Mon Sep 17 00:00:00 2001 From: "Daniel Richard G." Date: Mon, 7 Oct 2024 08:46:44 -0400 Subject: [PATCH] Enhancements to downloads.py and filescfg.py (#3034) --- devutils/run_utils_pylint.py | 2 +- utils/downloads.py | 62 ++++++++++++++++++++++++++++++------ utils/filescfg.py | 59 ++++++++++++++++++++++++++-------- 3 files changed, 99 insertions(+), 24 deletions(-) diff --git a/devutils/run_utils_pylint.py b/devutils/run_utils_pylint.py index 5b77118a35..227cfb2d08 100755 --- a/devutils/run_utils_pylint.py +++ b/devutils/run_utils_pylint.py @@ -31,7 +31,7 @@ def main(): pylint_options = [ '--disable={}'.format(','.join(disable)), '--jobs=4', - '--max-args=6', + '--max-args=7', '--score=n', '--persistent=n', ] diff --git a/utils/downloads.py b/utils/downloads.py index cb2fd2b91a..2681424fa4 100755 --- a/utils/downloads.py +++ b/utils/downloads.py @@ -19,8 +19,8 @@ import urllib.request from pathlib import Path -from _common import ENCODING, USE_REGISTRY, ExtractorEnum, get_logger, \ - get_chromium_version, add_common_params +from _common import ENCODING, USE_REGISTRY, ExtractorEnum, PlatformEnum, \ + get_logger, get_chromium_version, get_running_platform, add_common_params from _extraction import extract_tar_file, extract_with_7z, extract_with_winrar sys.path.insert(0, str(Path(__file__).parent / 'third_party')) @@ -151,6 +151,14 @@ def properties_iter(self): return sorted(map(lambda x: (x, self[x]), self), key=(lambda x: str(Path(x[1].output_path)))) + def check_sections_exist(self, section_names): + """...""" + if not section_names: + return + for name in section_names: + if name not in self: + raise KeyError('"{}" has no section "{}"'.format(type(self).__name__, name)) + class _UrlRetrieveReportHook: #pylint: disable=too-few-public-methods """Hook for urllib.request.urlretrieve to log progress information to console""" @@ -256,12 +264,17 @@ def _get_hash_pairs(download_properties, cache_dir): yield entry_type, entry_value -def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_verification=False): +def retrieve_downloads(download_info, + cache_dir, + components, + show_progress, + disable_ssl_verification=False): """ Retrieve downloads into the downloads cache. download_info is the DowloadInfo of downloads to retrieve. cache_dir is the pathlib.Path to the downloads cache. + components is a list of component names to download, if not empty. show_progress is a boolean indicating if download progress is printed to the console. disable_ssl_verification is a boolean indicating if certificate verification should be disabled for downloads using HTTPS. @@ -274,6 +287,8 @@ def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_veri if not cache_dir.is_dir(): raise NotADirectoryError(cache_dir) for download_name, download_properties in download_info.properties_iter(): + if components and not download_name in components: + continue get_logger().info('Downloading "%s" to "%s" ...', download_name, download_properties.download_filename) download_path = cache_dir / download_properties.download_filename @@ -286,16 +301,19 @@ def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_veri disable_ssl_verification) -def check_downloads(download_info, cache_dir): +def check_downloads(download_info, cache_dir, components): """ Check integrity of the downloads cache. download_info is the DownloadInfo of downloads to unpack. cache_dir is the pathlib.Path to the downloads cache. + components is a list of component names to check, if not empty. Raises source_retrieval.HashMismatchError when the computed and expected hashes do not match. """ for download_name, download_properties in download_info.properties_iter(): + if components and not download_name in components: + continue get_logger().info('Verifying hashes for "%s" ...', download_name) download_path = cache_dir / download_properties.download_filename with download_path.open('rb') as file_obj: @@ -307,12 +325,19 @@ def check_downloads(download_info, cache_dir): raise HashMismatchError(download_path) -def unpack_downloads(download_info, cache_dir, output_dir, skip_unused, sysroot, extractors=None): +def unpack_downloads(download_info, + cache_dir, + components, + output_dir, + skip_unused, + sysroot, + extractors=None): """ Unpack downloads in the downloads cache to output_dir. Assumes all downloads are retrieved. download_info is the DownloadInfo of downloads to unpack. cache_dir is the pathlib.Path directory containing the download cache + components is a list of component names to unpack, if not empty. output_dir is the pathlib.Path directory to unpack the downloads to. skip_unused is a boolean that determines if unused paths should be extracted. sysroot is a string containing a sysroot to unpack if any. @@ -322,6 +347,8 @@ def unpack_downloads(download_info, cache_dir, output_dir, skip_unused, sysroot, May raise undetermined exceptions during archive unpacking. """ for download_name, download_properties in download_info.properties_iter(): + if components and not download_name in components: + continue download_path = cache_dir / download_properties.download_filename get_logger().info('Unpacking "%s" to %s ...', download_name, download_properties.output_path) @@ -363,10 +390,12 @@ def _add_common_args(parser): def _retrieve_callback(args): - retrieve_downloads(DownloadInfo(args.ini), args.cache, args.show_progress, + info = DownloadInfo(args.ini) + info.check_sections_exist(args.components) + retrieve_downloads(info, args.cache, args.components, args.show_progress, args.disable_ssl_verification) try: - check_downloads(DownloadInfo(args.ini), args.cache) + check_downloads(info, args.cache, args.components) except HashMismatchError as exc: get_logger().error('File checksum does not match: %s', exc) sys.exit(1) @@ -378,8 +407,10 @@ def _unpack_callback(args): ExtractorEnum.WINRAR: args.winrar_path, ExtractorEnum.TAR: args.tar_path, } - unpack_downloads(DownloadInfo(args.ini), args.cache, args.output, args.skip_unused, - args.sysroot, extractors) + info = DownloadInfo(args.ini) + info.check_sections_exist(args.components) + unpack_downloads(info, args.cache, args.components, args.output, args.skip_unused, args.sysroot, + extractors) def main(): @@ -397,6 +428,10 @@ def main(): 'If it is not present, Python\'s urllib will be used. However, only ' 'the CLI-based downloaders can be resumed if the download is aborted.')) _add_common_args(retrieve_parser) + retrieve_parser.add_argument('--components', + nargs='+', + metavar='COMP', + help='Retrieve only these components. Default: all') retrieve_parser.add_argument('--hide-progress-bar', action='store_false', dest='show_progress', @@ -407,12 +442,19 @@ def main(): help='Disables certification verification for downloads using HTTPS.') retrieve_parser.set_defaults(callback=_retrieve_callback) + def _default_extractor_path(name): + return USE_REGISTRY if get_running_platform() == PlatformEnum.WINDOWS else name + # unpack unpack_parser = subparsers.add_parser( 'unpack', help='Unpack download files', description='Verifies hashes of and unpacks download files into the specified directory.') _add_common_args(unpack_parser) + unpack_parser.add_argument('--components', + nargs='+', + metavar='COMP', + help='Unpack only these components. Default: all') unpack_parser.add_argument('--tar-path', default='tar', help=('(Linux and macOS only) Command or path to the BSD or GNU tar ' @@ -420,7 +462,7 @@ def main(): unpack_parser.add_argument( '--7z-path', dest='sevenz_path', - default=USE_REGISTRY, + default=_default_extractor_path('7z'), help=('Command or path to 7-Zip\'s "7z" binary. If "_use_registry" is ' 'specified, determine the path from the registry. Default: %(default)s')) unpack_parser.add_argument( diff --git a/utils/filescfg.py b/utils/filescfg.py index 68ead31074..59036ac79d 100755 --- a/utils/filescfg.py +++ b/utils/filescfg.py @@ -9,6 +9,7 @@ """ import argparse +import datetime import platform import sys import tarfile @@ -18,7 +19,7 @@ from _common import get_logger, add_common_params -def filescfg_generator(cfg_path, build_outputs, cpu_arch): +def filescfg_generator(cfg_path, build_outputs, cpu_arch, excluded_files=None): """ Generator that yields pathlib.Path relative to the build outputs according to FILES.cfg @@ -42,29 +43,44 @@ def filescfg_generator(cfg_path, build_outputs, cpu_arch): # Do not package Windows debugging symbols if file_path.suffix.lower() == '.pdb': continue - yield file_path.relative_to(resolved_build_outputs) + file_path_rel = file_path.relative_to(resolved_build_outputs) + if excluded_files and file_path_rel in excluded_files: + continue + yield file_path_rel -def _get_archive_writer(output_path): +def _get_archive_writer(output_path, timestamp=None): """ Detects and returns the appropriate archive writer - output_path is the pathlib.Path of the archive to write + output_path is the pathlib.Path of the archive to write. + timestamp is a file timestamp to use for all files, if set. """ if not output_path.suffixes: raise ValueError('Output name has no suffix: %s' % output_path.name) if output_path.suffixes[-1].lower() == '.zip': archive_root = Path(output_path.stem) output_archive = zipfile.ZipFile(str(output_path), 'w', zipfile.ZIP_DEFLATED) + zip_date_time = None + if timestamp: + zip_date_time = datetime.datetime.fromtimestamp(timestamp).timetuple()[:6] + + def zip_write(in_path, arc_path): + if zip_date_time: + info = zipfile.ZipInfo.from_file(in_path, arc_path) + info.date_time = zip_date_time + with open(in_path, 'rb') as in_file: + output_archive.writestr(info, in_file.read()) + else: + output_archive.write(in_path, arc_path) def add_func(in_path, arc_path): """Add files to zip archive""" if in_path.is_dir(): for sub_path in in_path.rglob('*'): - output_archive.write(str(sub_path), - str(arc_path / sub_path.relative_to(in_path))) + zip_write(str(sub_path), str(arc_path / sub_path.relative_to(in_path))) else: - output_archive.write(str(in_path), str(arc_path)) + zip_write(str(in_path), str(arc_path)) elif '.tar' in output_path.name.lower(): if len(output_path.suffixes) >= 2 and output_path.suffixes[-2].lower() == '.tar': tar_mode = 'w:%s' % output_path.suffixes[-1][1:] @@ -74,22 +90,39 @@ def add_func(in_path, arc_path): archive_root = Path(output_path.stem) else: raise ValueError('Could not detect tar format for output: %s' % output_path.name) - output_archive = tarfile.open(str(output_path), tar_mode) + if timestamp: + + class TarInfoFixedTimestamp(tarfile.TarInfo): + """TarInfo class with predefined constant mtime""" + @property + def mtime(self): + """Return predefined timestamp""" + return timestamp + + @mtime.setter + def mtime(self, value): + """Ignore incoming value""" + + tarinfo_class = TarInfoFixedTimestamp + else: + tarinfo_class = tarfile.TarInfo + output_archive = tarfile.open(str(output_path), tar_mode, tarinfo=tarinfo_class) add_func = lambda in_path, arc_path: output_archive.add(str(in_path), str(arc_path)) else: raise ValueError('Unknown archive extension with name: %s' % output_path.name) return output_archive, add_func, archive_root -def create_archive(file_iter, include_iter, build_outputs, output_path): +def create_archive(file_iter, include_iter, build_outputs, output_path, timestamp=None): """ Create an archive of the build outputs. Supports zip and compressed tar archives. - file_iter is an iterable of files to include in the zip archive. - output_path is the pathlib.Path to write the new zip archive. - build_outputs is a pathlib.Path to the build outputs + file_iter is an iterable of files to include in the archive. + output_path is the pathlib.Path to write the new archive. + build_outputs is a pathlib.Path to the build outputs. + timestamp is a file timestamp (Unix format) to use for all files, if set. """ - output_archive, add_func, archive_root = _get_archive_writer(output_path) + output_archive, add_func, archive_root = _get_archive_writer(output_path, timestamp) with output_archive: for relative_path in file_iter: add_func(build_outputs / relative_path, archive_root / relative_path)