Skip to content

Commit

Permalink
Enhancements to downloads.py and filescfg.py (#3034)
Browse files Browse the repository at this point in the history
  • Loading branch information
iskunk authored Oct 7, 2024
1 parent 477f402 commit 6b9ef51
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 24 deletions.
2 changes: 1 addition & 1 deletion devutils/run_utils_pylint.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main():
pylint_options = [
'--disable={}'.format(','.join(disable)),
'--jobs=4',
'--max-args=6',
'--max-args=7',
'--score=n',
'--persistent=n',
]
Expand Down
62 changes: 52 additions & 10 deletions utils/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import urllib.request
from pathlib import Path

from _common import ENCODING, USE_REGISTRY, ExtractorEnum, get_logger, \
get_chromium_version, add_common_params
from _common import ENCODING, USE_REGISTRY, ExtractorEnum, PlatformEnum, \
get_logger, get_chromium_version, get_running_platform, add_common_params
from _extraction import extract_tar_file, extract_with_7z, extract_with_winrar

sys.path.insert(0, str(Path(__file__).parent / 'third_party'))
Expand Down Expand Up @@ -151,6 +151,14 @@ def properties_iter(self):
return sorted(map(lambda x: (x, self[x]), self),
key=(lambda x: str(Path(x[1].output_path))))

def check_sections_exist(self, section_names):
"""..."""
if not section_names:
return
for name in section_names:
if name not in self:
raise KeyError('"{}" has no section "{}"'.format(type(self).__name__, name))


class _UrlRetrieveReportHook: #pylint: disable=too-few-public-methods
"""Hook for urllib.request.urlretrieve to log progress information to console"""
Expand Down Expand Up @@ -256,12 +264,17 @@ def _get_hash_pairs(download_properties, cache_dir):
yield entry_type, entry_value


def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_verification=False):
def retrieve_downloads(download_info,
cache_dir,
components,
show_progress,
disable_ssl_verification=False):
"""
Retrieve downloads into the downloads cache.
download_info is the DowloadInfo of downloads to retrieve.
cache_dir is the pathlib.Path to the downloads cache.
components is a list of component names to download, if not empty.
show_progress is a boolean indicating if download progress is printed to the console.
disable_ssl_verification is a boolean indicating if certificate verification
should be disabled for downloads using HTTPS.
Expand All @@ -274,6 +287,8 @@ def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_veri
if not cache_dir.is_dir():
raise NotADirectoryError(cache_dir)
for download_name, download_properties in download_info.properties_iter():
if components and not download_name in components:
continue
get_logger().info('Downloading "%s" to "%s" ...', download_name,
download_properties.download_filename)
download_path = cache_dir / download_properties.download_filename
Expand All @@ -286,16 +301,19 @@ def retrieve_downloads(download_info, cache_dir, show_progress, disable_ssl_veri
disable_ssl_verification)


def check_downloads(download_info, cache_dir):
def check_downloads(download_info, cache_dir, components):
"""
Check integrity of the downloads cache.
download_info is the DownloadInfo of downloads to unpack.
cache_dir is the pathlib.Path to the downloads cache.
components is a list of component names to check, if not empty.
Raises source_retrieval.HashMismatchError when the computed and expected hashes do not match.
"""
for download_name, download_properties in download_info.properties_iter():
if components and not download_name in components:
continue
get_logger().info('Verifying hashes for "%s" ...', download_name)
download_path = cache_dir / download_properties.download_filename
with download_path.open('rb') as file_obj:
Expand All @@ -307,12 +325,19 @@ def check_downloads(download_info, cache_dir):
raise HashMismatchError(download_path)


def unpack_downloads(download_info, cache_dir, output_dir, skip_unused, sysroot, extractors=None):
def unpack_downloads(download_info,
cache_dir,
components,
output_dir,
skip_unused,
sysroot,
extractors=None):
"""
Unpack downloads in the downloads cache to output_dir. Assumes all downloads are retrieved.
download_info is the DownloadInfo of downloads to unpack.
cache_dir is the pathlib.Path directory containing the download cache
components is a list of component names to unpack, if not empty.
output_dir is the pathlib.Path directory to unpack the downloads to.
skip_unused is a boolean that determines if unused paths should be extracted.
sysroot is a string containing a sysroot to unpack if any.
Expand All @@ -322,6 +347,8 @@ def unpack_downloads(download_info, cache_dir, output_dir, skip_unused, sysroot,
May raise undetermined exceptions during archive unpacking.
"""
for download_name, download_properties in download_info.properties_iter():
if components and not download_name in components:
continue
download_path = cache_dir / download_properties.download_filename
get_logger().info('Unpacking "%s" to %s ...', download_name,
download_properties.output_path)
Expand Down Expand Up @@ -363,10 +390,12 @@ def _add_common_args(parser):


def _retrieve_callback(args):
retrieve_downloads(DownloadInfo(args.ini), args.cache, args.show_progress,
info = DownloadInfo(args.ini)
info.check_sections_exist(args.components)
retrieve_downloads(info, args.cache, args.components, args.show_progress,
args.disable_ssl_verification)
try:
check_downloads(DownloadInfo(args.ini), args.cache)
check_downloads(info, args.cache, args.components)
except HashMismatchError as exc:
get_logger().error('File checksum does not match: %s', exc)
sys.exit(1)
Expand All @@ -378,8 +407,10 @@ def _unpack_callback(args):
ExtractorEnum.WINRAR: args.winrar_path,
ExtractorEnum.TAR: args.tar_path,
}
unpack_downloads(DownloadInfo(args.ini), args.cache, args.output, args.skip_unused,
args.sysroot, extractors)
info = DownloadInfo(args.ini)
info.check_sections_exist(args.components)
unpack_downloads(info, args.cache, args.components, args.output, args.skip_unused, args.sysroot,
extractors)


def main():
Expand All @@ -397,6 +428,10 @@ def main():
'If it is not present, Python\'s urllib will be used. However, only '
'the CLI-based downloaders can be resumed if the download is aborted.'))
_add_common_args(retrieve_parser)
retrieve_parser.add_argument('--components',
nargs='+',
metavar='COMP',
help='Retrieve only these components. Default: all')
retrieve_parser.add_argument('--hide-progress-bar',
action='store_false',
dest='show_progress',
Expand All @@ -407,20 +442,27 @@ def main():
help='Disables certification verification for downloads using HTTPS.')
retrieve_parser.set_defaults(callback=_retrieve_callback)

def _default_extractor_path(name):
return USE_REGISTRY if get_running_platform() == PlatformEnum.WINDOWS else name

# unpack
unpack_parser = subparsers.add_parser(
'unpack',
help='Unpack download files',
description='Verifies hashes of and unpacks download files into the specified directory.')
_add_common_args(unpack_parser)
unpack_parser.add_argument('--components',
nargs='+',
metavar='COMP',
help='Unpack only these components. Default: all')
unpack_parser.add_argument('--tar-path',
default='tar',
help=('(Linux and macOS only) Command or path to the BSD or GNU tar '
'binary for extraction. Default: %(default)s'))
unpack_parser.add_argument(
'--7z-path',
dest='sevenz_path',
default=USE_REGISTRY,
default=_default_extractor_path('7z'),
help=('Command or path to 7-Zip\'s "7z" binary. If "_use_registry" is '
'specified, determine the path from the registry. Default: %(default)s'))
unpack_parser.add_argument(
Expand Down
59 changes: 46 additions & 13 deletions utils/filescfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import argparse
import datetime
import platform
import sys
import tarfile
Expand All @@ -18,7 +19,7 @@
from _common import get_logger, add_common_params


def filescfg_generator(cfg_path, build_outputs, cpu_arch):
def filescfg_generator(cfg_path, build_outputs, cpu_arch, excluded_files=None):
"""
Generator that yields pathlib.Path relative to the build outputs according to FILES.cfg
Expand All @@ -42,29 +43,44 @@ def filescfg_generator(cfg_path, build_outputs, cpu_arch):
# Do not package Windows debugging symbols
if file_path.suffix.lower() == '.pdb':
continue
yield file_path.relative_to(resolved_build_outputs)
file_path_rel = file_path.relative_to(resolved_build_outputs)
if excluded_files and file_path_rel in excluded_files:
continue
yield file_path_rel


def _get_archive_writer(output_path):
def _get_archive_writer(output_path, timestamp=None):
"""
Detects and returns the appropriate archive writer
output_path is the pathlib.Path of the archive to write
output_path is the pathlib.Path of the archive to write.
timestamp is a file timestamp to use for all files, if set.
"""
if not output_path.suffixes:
raise ValueError('Output name has no suffix: %s' % output_path.name)
if output_path.suffixes[-1].lower() == '.zip':
archive_root = Path(output_path.stem)
output_archive = zipfile.ZipFile(str(output_path), 'w', zipfile.ZIP_DEFLATED)
zip_date_time = None
if timestamp:
zip_date_time = datetime.datetime.fromtimestamp(timestamp).timetuple()[:6]

def zip_write(in_path, arc_path):
if zip_date_time:
info = zipfile.ZipInfo.from_file(in_path, arc_path)
info.date_time = zip_date_time
with open(in_path, 'rb') as in_file:
output_archive.writestr(info, in_file.read())
else:
output_archive.write(in_path, arc_path)

def add_func(in_path, arc_path):
"""Add files to zip archive"""
if in_path.is_dir():
for sub_path in in_path.rglob('*'):
output_archive.write(str(sub_path),
str(arc_path / sub_path.relative_to(in_path)))
zip_write(str(sub_path), str(arc_path / sub_path.relative_to(in_path)))
else:
output_archive.write(str(in_path), str(arc_path))
zip_write(str(in_path), str(arc_path))
elif '.tar' in output_path.name.lower():
if len(output_path.suffixes) >= 2 and output_path.suffixes[-2].lower() == '.tar':
tar_mode = 'w:%s' % output_path.suffixes[-1][1:]
Expand All @@ -74,22 +90,39 @@ def add_func(in_path, arc_path):
archive_root = Path(output_path.stem)
else:
raise ValueError('Could not detect tar format for output: %s' % output_path.name)
output_archive = tarfile.open(str(output_path), tar_mode)
if timestamp:

class TarInfoFixedTimestamp(tarfile.TarInfo):
"""TarInfo class with predefined constant mtime"""
@property
def mtime(self):
"""Return predefined timestamp"""
return timestamp

@mtime.setter
def mtime(self, value):
"""Ignore incoming value"""

tarinfo_class = TarInfoFixedTimestamp
else:
tarinfo_class = tarfile.TarInfo
output_archive = tarfile.open(str(output_path), tar_mode, tarinfo=tarinfo_class)
add_func = lambda in_path, arc_path: output_archive.add(str(in_path), str(arc_path))
else:
raise ValueError('Unknown archive extension with name: %s' % output_path.name)
return output_archive, add_func, archive_root


def create_archive(file_iter, include_iter, build_outputs, output_path):
def create_archive(file_iter, include_iter, build_outputs, output_path, timestamp=None):
"""
Create an archive of the build outputs. Supports zip and compressed tar archives.
file_iter is an iterable of files to include in the zip archive.
output_path is the pathlib.Path to write the new zip archive.
build_outputs is a pathlib.Path to the build outputs
file_iter is an iterable of files to include in the archive.
output_path is the pathlib.Path to write the new archive.
build_outputs is a pathlib.Path to the build outputs.
timestamp is a file timestamp (Unix format) to use for all files, if set.
"""
output_archive, add_func, archive_root = _get_archive_writer(output_path)
output_archive, add_func, archive_root = _get_archive_writer(output_path, timestamp)
with output_archive:
for relative_path in file_iter:
add_func(build_outputs / relative_path, archive_root / relative_path)
Expand Down

0 comments on commit 6b9ef51

Please sign in to comment.