From 53944010fd38365de759215e65eb4cc86d52b7d9 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Mon, 16 Dec 2024 00:04:30 +0000 Subject: [PATCH] Use ``resource_path()`` to access datatypes_conf.xml.sample as a package resource This required other changes included here as well: 1) Extend `parse_xml()` to accept a `Traversable` (i.e. the output of `resource_path()`. Open the file before passing it to `etree.parse()`, since lxml's implementation probably doesn't support a `Traversable`. This also simplifies the exception handling, since the workaround for the missing `errno` in the `OSError` exception is not needed any more. ------ 2) Fix Galaxy startup when the ``data_manager_config_file`` config option is set to an empty value When does this happen? ---------------------- planemo (which we use in the converters tests) set this option to an empty value when not testing data managers: https://github.com/galaxyproject/planemo/blob/master/planemo/galaxy/config.py#L452 This empty config value was resolved to the `path_resolves_to` directory for this option. With the change in 1) above, this results in the following traceback at Galaxy startup: ``` Traceback (most recent call last): File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/webapps/galaxy/buildapp.py", line 68, in app_pair app = galaxy.app.UniverseApplication(global_conf=global_conf, is_webapp=True, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/app.py", line 777, in __init__ self.data_managers = self._register_singleton(DataManagers) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/di.py", line 27, in _register_singleton instance = self[dep_type] ~~~~^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 381, in __getitem__ return self.resolve(dep) ^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 265, in resolve return self._reflection_build_with_err_handling(dep_type, suppress_error) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 392, in _reflection_build_with_err_handling return self._reflection_build(dep_type) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/.venv/lib/python3.12/site-packages/lagom/container.py", line 408, in _reflection_build return dep_type(**sub_deps) # type: ignore ^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/tools/data_manager/manager.py", line 44, in __init__ self.load_from_xml(filename) File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/tools/data_manager/manager.py", line 58, in load_from_xml tree = util.parse_xml(xml_filename) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/users/ga002/soranzon/software/nsoranzo_galaxy/lib/galaxy/util/__init__.py", line 360, in parse_xml with source.open("rb") as f: ^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.12/pathlib.py", line 1013, in open return io.open(self, mode, buffering, encoding, errors, newline) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ IsADirectoryError: [Errno 21] Is a directory: '/tmp/tmps2qq5htq' ``` Why is this a problem now? -------------------------- Before this commit, `util.parse_xml()` was raising a `XMLSyntaxError` exception if passed a directory, but now it raises an `IsADirectoryError`, which is a subclass of `OSError` with errno 21. As a consequence, when `load_from_xml()` in lib/galaxy/tools/data_manager/manager.py calls `util.parse_xml()` passing a directory, the resulting exception is now re-raised in the `except OSError as e:` branch instead of just being logged, which is actually good since this is a misconfiguration. Solutions -------- - Do not resolve an empty/None path to the `path_resolves_to` directory. - Do not check empty/None paths against root - Adjst unit tests accordingly - Raise an OSError if the `job_config_file` config option is set to null (previously this would be resolved to a directory, also raising an OSError when trying to open that as a file). N.B.: This will also need to be fixed in Planemo by not setting the `data_manager_config_file` option when not needed. --- lib/galaxy/config/__init__.py | 8 +++++--- lib/galaxy/jobs/__init__.py | 2 ++ lib/galaxy/tool_util/linters/datatypes.py | 7 +++++-- lib/galaxy/util/__init__.py | 25 +++++++++++++++-------- test/unit/config/test_path_graph.py | 6 +++--- test/unit/config/test_path_resolves_to.py | 8 ++++---- 6 files changed, 36 insertions(+), 20 deletions(-) diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index d253b3d0d9b6..909df2ba1483 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -511,10 +511,10 @@ def resolve(key): if not parent: # base case: nothing else needs resolving return path parent_path = resolve(parent) # recursively resolve parent path - if path is not None: + if path: path = os.path.join(parent_path, path) # resolve path else: - path = parent_path # or use parent path + log.warning("Trying to resolve path for the '%s' option but it's empty/None", key) setattr(self, key, path) # update property _cache[key] = path # cache it! @@ -538,7 +538,7 @@ def resolve(key): if self.is_set(key) and self.paths_to_check_against_root and key in self.paths_to_check_against_root: self._check_against_root(key) - def _check_against_root(self, key): + def _check_against_root(self, key: str): def get_path(current_path, initial_path): # if path does not exist and was set as relative: if not self._path_exists(current_path) and not os.path.isabs(initial_path): @@ -557,6 +557,8 @@ def get_path(current_path, initial_path): return current_path current_value = getattr(self, key) # resolved path or list of resolved paths + if not current_value: + return if isinstance(current_value, list): initial_paths = listify(self._raw_config[key], do_strip=True) # initial unresolved paths updated_paths = [] diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 3e9a306e4116..f55562de63b0 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -384,6 +384,8 @@ def __init__(self, app: MinimalManagerApp): f" release of Galaxy. Please convert to YAML at {self.app.config.job_config_file} or" f" explicitly set `job_config_file` to {job_config_file} to remove this message" ) + if not job_config_file: + raise OSError() if ".xml" in job_config_file: tree = load(job_config_file) job_config_dict = self.__parse_job_conf_xml(tree) diff --git a/lib/galaxy/tool_util/linters/datatypes.py b/lib/galaxy/tool_util/linters/datatypes.py index 534868ed01ec..f05ae5c7ab23 100644 --- a/lib/galaxy/tool_util/linters/datatypes.py +++ b/lib/galaxy/tool_util/linters/datatypes.py @@ -2,6 +2,7 @@ from typing import ( Set, TYPE_CHECKING, + Union, ) # from galaxy import config @@ -10,15 +11,17 @@ listify, parse_xml, ) +from galaxy.util.resources import resource_path if TYPE_CHECKING: from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser import ToolSource + from galaxy.util.resources import Traversable -DATATYPES_CONF = os.path.join(os.path.dirname(__file__), "datatypes_conf.xml.sample") +DATATYPES_CONF = resource_path(__package__, "datatypes_conf.xml.sample") -def _parse_datatypes(datatype_conf_path: str) -> Set[str]: +def _parse_datatypes(datatype_conf_path: Union[str, "Traversable"]) -> Set[str]: datatypes = set() tree = parse_xml(datatype_conf_path) root = tree.getroot() diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py index d2c8617c08bd..797ba6f4428c 100644 --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -45,6 +45,7 @@ Optional, overload, Tuple, + TYPE_CHECKING, TypeVar, Union, ) @@ -75,6 +76,7 @@ LXML_AVAILABLE = True try: from lxml import etree + from lxml.etree import DocumentInvalid # lxml.etree.Element is a function that returns a new instance of the # lxml.etree._Element class. This class doesn't have a proper __init__() @@ -123,6 +125,10 @@ def XML(text: Union[str, bytes]) -> Element: XML, ) + class DocumentInvalid(Exception): # type: ignore[no-redef] + pass + + from . import requests from .custom_logging import get_logger from .inflection import Inflector @@ -142,6 +148,9 @@ def shlex_join(split_command): return " ".join(map(shlex.quote, split_command)) +if TYPE_CHECKING: + from galaxy.util.resources import Traversable + inflector = Inflector() log = get_logger(__name__) @@ -333,7 +342,10 @@ def unique_id(KEY_SIZE=128): def parse_xml( - fname: StrPath, strip_whitespace=True, remove_comments=True, schemafname: Union[StrPath, None] = None + fname: Union[StrPath, "Traversable"], + strip_whitespace: bool = True, + remove_comments: bool = True, + schemafname: Union[StrPath, None] = None, ) -> ElementTree: """Returns a parsed xml tree""" parser = None @@ -348,8 +360,10 @@ def parse_xml( schema_root = etree.XML(schema_file.read()) schema = etree.XMLSchema(schema_root) + source = Path(fname) if isinstance(fname, (str, os.PathLike)) else fname try: - tree = cast(ElementTree, etree.parse(str(fname), parser=parser)) + with source.open("rb") as f: + tree = cast(ElementTree, etree.parse(f, parser=parser)) root = tree.getroot() if strip_whitespace: for elem in root.iter("*"): @@ -359,15 +373,10 @@ def parse_xml( elem.tail = elem.tail.strip() if schema: schema.assertValid(tree) - except OSError as e: - if e.errno is None and not os.path.exists(fname): # type: ignore[unreachable] - # lxml doesn't set errno - e.errno = errno.ENOENT # type: ignore[unreachable] - raise except etree.ParseError: log.exception("Error parsing file %s", fname) raise - except etree.DocumentInvalid: + except DocumentInvalid: log.exception("Validation of file %s failed", fname) raise return tree diff --git a/test/unit/config/test_path_graph.py b/test/unit/config/test_path_graph.py index 02f33787081f..77afe7333c55 100644 --- a/test/unit/config/test_path_graph.py +++ b/test/unit/config/test_path_graph.py @@ -142,7 +142,7 @@ def test_resolves_to_invalid_type(monkeypatch): def test_resolves_with_empty_component(monkeypatch): - # A path can be None (root path is never None; may be asigned elsewhere) + # A path can be None (root path is never None; may be assigned elsewhere) mock_schema = { "path0": { "type": "str", @@ -155,7 +155,7 @@ def test_resolves_with_empty_component(monkeypatch): "path2": { "type": "str", "default": "value2", - "path_resolves_to": "path1", + "path_resolves_to": "path0", }, } monkeypatch.setattr(AppSchema, "_read_schema", lambda a, b: get_schema(mock_schema)) @@ -163,5 +163,5 @@ def test_resolves_with_empty_component(monkeypatch): config = BaseAppConfiguration() assert config.path0 == "value0" - assert config.path1 == "value0" + assert config.path1 is None assert config.path2 == "value0/value2" diff --git a/test/unit/config/test_path_resolves_to.py b/test/unit/config/test_path_resolves_to.py index 97953b3d0787..d7e7dd0918d6 100644 --- a/test/unit/config/test_path_resolves_to.py +++ b/test/unit/config/test_path_resolves_to.py @@ -132,21 +132,21 @@ def test_kwargs_relative_path_old_prefix_for_other_option(mock_init): def test_kwargs_relative_path_old_prefix_empty_after_strip(mock_init): # Expect: use value from kwargs, strip old prefix, then resolve - new_path1 = "old-config" + new_path1 = "old-config/foo" config = BaseAppConfiguration(path1=new_path1) - assert config.path1 == "my-config/" # stripped of old prefix, then resolved + assert config.path1 == "my-config/foo" # stripped of old prefix, then resolved assert config.path2 == "my-data/my-data-files" # stripped of old prefix, then resolved assert config.path3 == "my-other-files" # no change def test_kwargs_set_to_null(mock_init): - # Expected: allow overriding with null, then resolve + # Expected: allow overriding with null # This is not a common scenario, but it does happen: one example is # `job_config` set to `None` when testing config = BaseAppConfiguration(path1=None) - assert config.path1 == "my-config" # resolved + assert config.path1 is None assert config.path2 == "my-data/my-data-files" # resolved assert config.path3 == "my-other-files" # no change