diff --git a/src/ocrd/cli/resmgr.py b/src/ocrd/cli/resmgr.py index 1289e498e..6ddc9a91b 100644 --- a/src/ocrd/cli/resmgr.py +++ b/src/ocrd/cli/resmgr.py @@ -13,29 +13,20 @@ import requests import click -from ocrd_utils import ( - initLogging, - directory_size, - getLogger, - get_ocrd_tool_json, - get_moduledir, - RESOURCE_LOCATIONS, -) +from ocrd_utils import directory_size, getLogger, get_moduledir, get_ocrd_tool_json, initLogging, RESOURCE_LOCATIONS from ocrd.constants import RESOURCE_USER_LIST_COMMENT from ..resource_manager import OcrdResourceManager + def print_resources(executable, reslist, resmgr): - print('%s' % executable) + print(f"{executable}") for resdict in reslist: - print('- %s %s (%s)\n %s' % ( - resdict['name'], - '@ %s' % resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else '', - resdict['url'], - resdict['description'] - )) + res_loc = resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else '' + print(f"- {resdict['name']} @ {res_loc} ({resdict['url']})\n {resdict['description']}") print() + @click.group("resmgr") def resmgr_cli(): """ @@ -43,9 +34,12 @@ def resmgr_cli(): """ initLogging() + @resmgr_cli.command('list-available') -@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") -@click.option('-e', '--executable', help='Show only resources for executable beginning with EXEC', metavar='EXEC', default='ocrd-*') +@click.option('-D', '--no-dynamic', is_flag=True, default=False, + help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") +@click.option('-e', '--executable', metavar='EXEC', default='ocrd-*', + help='Show only resources for executable beginning with EXEC', ) def list_available(executable, no_dynamic): """ List available resources @@ -54,6 +48,7 @@ def list_available(executable, no_dynamic): for executable, reslist in resmgr.list_available(executable=executable, dynamic=not no_dynamic): print_resources(executable, reslist, resmgr) + @resmgr_cli.command('list-installed') @click.option('-e', '--executable', help='Show only resources for executable EXEC', metavar='EXEC') def list_installed(executable=None): @@ -64,17 +59,24 @@ def list_installed(executable=None): for executable, reslist in resmgr.list_installed(executable): print_resources(executable, reslist, resmgr) + @resmgr_cli.command('download') -@click.option('-n', '--any-url', help='URL of unregistered resource to download/copy from', default='') -@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") -@click.option('-t', '--resource-type', help='Type of resource', type=click.Choice(['file', 'directory', 'archive']), default='file') -@click.option('-P', '--path-in-archive', help='Path to extract in case of archive type', default='.') -@click.option('-a', '--allow-uninstalled', help="Allow installing resources for uninstalled processors", is_flag=True) +@click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from') +@click.option('-D', '--no-dynamic', default=False, is_flag=True, + help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") +@click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file', + help='Type of resource',) +@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type') +@click.option('-a', '--allow-uninstalled', is_flag=True, + help="Allow installing resources for uninstalled processors",) @click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True) -@click.option('-l', '--location', help="Where to store resources - defaults to first location in processor's 'resource_locations' list or finally 'data'", type=click.Choice(RESOURCE_LOCATIONS)) +@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS), + help="Where to store resources - defaults to first location in processor's 'resource_locations' " + "list or finally 'data'") @click.argument('executable', required=True) @click.argument('name', required=False) -def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable, name): +def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable, + name): """ Download resource NAME for processor EXECUTABLE. @@ -91,7 +93,7 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal log = getLogger('ocrd.cli.resmgr') resmgr = OcrdResourceManager() if executable != '*' and not name: - log.error("Unless EXECUTABLE ('%s') is the '*' wildcard, NAME is required" % executable) + log.error(f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required") sys.exit(1) elif executable == '*': executable = None @@ -101,19 +103,21 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal is_filename = Path(any_url).exists() if any_url else False if executable and not which(executable): if not allow_uninstalled: - log.error("Executable '%s' is not installed. " \ - "To download resources anyway, use the -a/--allow-uninstalled flag", executable) + log.error(f"Executable '{executable}' is not installed. " + f"To download resources anyway, use the -a/--allow-uninstalled flag") sys.exit(1) else: - log.info("Executable %s is not installed, but " \ - "downloading resources anyway", executable) + log.info(f"Executable '{executable}' is not installed, but downloading resources anyway") reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name) if not any(r[1] for r in reslist): log.info(f"No resources {name} found in registry for executable {executable}") if executable and name: - reslist = [(executable, [{'url': any_url or '???', 'name': name, - 'type': resource_type, - 'path_in_archive': path_in_archive}])] + reslist = [(executable, [{ + 'url': any_url or '???', + 'name': name, + 'type': resource_type, + 'path_in_archive': path_in_archive}] + )] for this_executable, this_reslist in reslist: for resdict in this_reslist: if 'size' in resdict: @@ -123,15 +127,15 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal if any_url: resdict['url'] = any_url if resdict['url'] == '???': - log.warning("Cannot download user resource %s", resdict['name']) + log.warning(f"Cannot download user resource {resdict['name']}") continue if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'): - log.info("Downloading %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) + log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})") if 'size' not in resdict: with requests.head(resdict['url']) as r: resdict['size'] = int(r.headers.get('content-length', 0)) else: - log.info("Copying %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) + log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})") urlpath = Path(resdict['url']) resdict['url'] = str(urlpath.resolve()) if Path(urlpath).is_dir(): @@ -141,7 +145,8 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal if not location: location = get_ocrd_tool_json(this_executable)['resource_locations'][0] elif location not in get_ocrd_tool_json(this_executable)['resource_locations']: - log.error("The selected --location {location} is not in the {this_executable}'s resource search path, refusing to install to invalid location") + log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, " + f"refusing to install to invalid location") sys.exit(1) if location != 'module': basedir = resmgr.location_to_resource_dir(location) @@ -164,13 +169,16 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal progress_cb=lambda delta: bar.update(delta) ) if registered == 'unregistered': - log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, name, any_url, resmgr.user_list) + log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub " + f"in {resmgr.user_list}'") resmgr.add_to_user_database(this_executable, fpath, url=any_url) resmgr.save_user_list() - log.info("Installed resource %s under %s", resdict['url'], fpath) + log.info(f"Installed resource {resdict['url']} under {fpath}") except FileExistsError as exc: log.info(str(exc)) - log.info("Use in parameters as '%s'", resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))) + log.info(f"Use in parameters as " + f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'") + @resmgr_cli.command('migrate') @click.argument('migration', type=click.Choice(['2.37.0'])) @@ -203,7 +211,6 @@ def migrate(migration): v_out = 'directory' resdict_out[k_out] = v_out yaml_out[executable].append(resdict_out) - resmgr.user_list.write_text(RESOURCE_USER_LIST_COMMENT + - '\n# migrated with ocrd resmgr migrate {migration}\n' + - safe_dump(yaml_out)) + resmgr.user_list.write_text( + RESOURCE_USER_LIST_COMMENT + '\n# migrated with ocrd resmgr migrate {migration}\n' + safe_dump(yaml_out)) log.info(f'Applied migration {migration} to {resmgr.user_list}') diff --git a/src/ocrd/resource_manager.py b/src/ocrd/resource_manager.py index c668028e9..c74940c16 100644 --- a/src/ocrd/resource_manager.py +++ b/src/ocrd/resource_manager.py @@ -24,7 +24,8 @@ from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT -class OcrdResourceManager(): + +class OcrdResourceManager: """ Managing processor resources @@ -81,7 +82,7 @@ def load_resource_list(self, list_filename, database=None): report = OcrdResourceListValidator.validate(list_loaded) if not report.is_valid: self.log.error('\n'.join(report.errors)) - raise ValueError("Resource list %s is invalid!" % (list_filename)) + raise ValueError(f"Resource list {list_filename} is invalid!") for executable, resource_list in list_loaded.items(): if executable not in database: database[executable] = [] @@ -176,7 +177,8 @@ def add_to_user_database(self, executable, res_filename, url=None, resource_type Add a stub entry to the user resource.yml """ res_name = Path(res_filename).name - self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", executable, res_name, str(res_filename), self.user_list) + self.log.info(f"{executable} resource '{res_name}' ({str(res_filename)}) not a known resource, " + f"creating stub in {self.user_list}'") if Path(res_filename).is_dir(): res_size = directory_size(res_filename) else: @@ -190,7 +192,7 @@ def add_to_user_database(self, executable, res_filename, url=None, resource_type resdict = { 'name': res_name, 'url': url if url else '???', - 'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()), + 'description': f'Found at {self.resource_dir_to_location(res_filename)} on {datetime.now()}', 'version_range': '???', 'type': resource_type, 'size': res_size @@ -218,37 +220,45 @@ def resource_dir_to_location(self, resource_path): 'cwd' if resource_path.startswith(getcwd()) else \ resource_path - def parameter_usage(self, name, usage='as-is'): + @staticmethod + def parameter_usage(name, usage='as-is'): if usage == 'as-is': return name elif usage == 'without-extension': return Path(name).stem - raise ValueError("No such usage '%s'" % usage) + raise ValueError(f"No such usage '{usage}'") - def _download_impl(self, url, filename, progress_cb=None, size=None): + @staticmethod + def _download_impl(url, filename, progress_cb=None, size=None): log = getLogger('ocrd.resource_manager._download_impl') - log.info("Downloading %s to %s" % (url, filename)) - with open(filename, 'wb') as f: + log.info(f"Downloading {url} to {filename}") + try: gdrive_file_id, is_gdrive_download_link = gparse_url(url, warning=False) if gdrive_file_id: if not is_gdrive_download_link: - url = "https://drive.google.com/uc?id={id}".format(id=gdrive_file_id) + url = f"https://drive.google.com/uc?id={gdrive_file_id}" try: with requests.get(url, stream=True) as r: if "Content-Disposition" not in r.headers: url = get_url_from_gdrive_confirmation(r.text) except RuntimeError as e: log.warning("Cannot unwrap Google Drive URL: ", e) - with requests.get(url, stream=True) as r: - r.raise_for_status() - for data in r.iter_content(chunk_size=4096): - if progress_cb: - progress_cb(len(data)) - f.write(data) + with open(filename, 'wb') as f: + with requests.get(url, stream=True) as r: + r.raise_for_status() + for data in r.iter_content(chunk_size=4096): + if progress_cb: + progress_cb(len(data)) + f.write(data) + except Exception as e: + rmtree(filename, ignore_errors=True) + Path(filename).unlink(missing_ok=True) + raise e - def _copy_impl(self, src_filename, filename, progress_cb=None): + @staticmethod + def _copy_impl(src_filename, filename, progress_cb=None): log = getLogger('ocrd.resource_manager._copy_impl') - log.info("Copying %s to %s", src_filename, filename) + log.info(f"Copying {src_filename} to {filename}") if Path(src_filename).is_dir(): log.info(f"Copying recursively from {src_filename} to {filename}") for child in Path(src_filename).rglob('*'): @@ -276,16 +286,8 @@ def _copy_impl(self, src_filename, filename, progress_cb=None): # TODO Proper caching (make head request for size, If-Modified etc) def download( - self, - executable, - url, - basedir, - overwrite=False, - no_subdir=False, - name=None, - resource_type='file', - path_in_archive='.', - progress_cb=None, + self, executable, url, basedir, overwrite=False, no_subdir=False, name=None, resource_type='file', + path_in_archive='.', progress_cb=None, ): """ Download a resource by URL @@ -299,12 +301,15 @@ def download( is_url = url.startswith('https://') or url.startswith('http://') if fpath.exists(): if not overwrite: - raise FileExistsError("%s %s already exists but --overwrite is not set" % ('Directory' if fpath.is_dir() else 'File', fpath)) + fpath_type = 'Directory' if fpath.is_dir() else 'File' + log.warning(f"{fpath_type} {fpath} already exists but --overwrite is not set, skipping the download") + # raise FileExistsError(f"{fpath_type} {fpath} already exists but --overwrite is not set") + return fpath if fpath.is_dir(): - log.info("Removing existing target directory {fpath}") + log.info(f"Removing existing target directory {fpath}") rmtree(str(fpath)) else: - log.info("Removing existing target file {fpath}") + log.info(f"Removing existing target file {fpath}") unlink(str(fpath)) destdir.mkdir(parents=True, exist_ok=True) if resource_type in ('file', 'directory'): @@ -322,7 +327,7 @@ def download( Path('out').mkdir() with pushd_popd('out'): mimetype = guess_media_type(f'../{archive_fname}', fallback='application/octet-stream') - log.info("Extracting %s archive to %s/out" % (mimetype, tempdir)) + log.info(f"Extracting {mimetype} archive to {tempdir}/out") if mimetype == 'application/zip': with ZipFile(f'../{archive_fname}', 'r') as zipf: zipf.extractall() @@ -330,8 +335,8 @@ def download( with open_tarfile(f'../{archive_fname}', 'r:*') as tar: tar.extractall() else: - raise RuntimeError("Unable to handle extraction of %s archive %s" % (mimetype, url)) - log.info("Copying '%s' from archive to %s" % (path_in_archive, fpath)) + raise RuntimeError(f"Unable to handle extraction of {mimetype} archive {url}") + log.info(f"Copying '{path_in_archive}' from archive to {fpath}") if Path(path_in_archive).is_dir(): copytree(path_in_archive, str(fpath)) else: