diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0c06163..313bccd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,4 +12,4 @@ jobs: - uses: TrueBrain/actions-flake8@v2 with: flake8_version: 6.0.0 - plugins: flake8-isort==6.0.0 flake8-quotes==3.3.2 + plugins: flake8-isort==6.1.1 flake8-quotes==3.4.0 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7f5ae0f..c5aa28f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] fail-fast: false steps: diff --git a/mandible/metadata_mapper/builder.py b/mandible/metadata_mapper/builder.py new file mode 100644 index 0000000..36bfff5 --- /dev/null +++ b/mandible/metadata_mapper/builder.py @@ -0,0 +1,239 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Callable, Dict, Type, TypeVar + +from .directive import ( + Add, + FloorDiv, + Mapped, + Mul, + Reformatted, + Sub, + TemplateDirective, + TrueDiv, +) +from .types import Key, Template + +# For testing purposes to ensure we implement builders for all directives +_DIRECTIVE_BUILDER_REGISTRY: Dict[str, Callable[..., "DirectiveBuilder"]] = {} + + +@dataclass +class BuildConfig: + directive_marker: str + + +class Builder(ABC): + @abstractmethod + def build(self, config: BuildConfig) -> Template: + pass + + +class DirectiveBuilder(Builder): + def __init__( + self, + name: str, + params: Dict[str, Any], + ): + self.name = name + self.params = params + + def build(self, config: BuildConfig) -> Template: + return { + f"{config.directive_marker}{self.name}": { + k: v.build(config) if isinstance(v, Builder) else v + for k, v in self.params.items() + }, + } + + def __add__(self, other: Any) -> "DirectiveBuilder": + return add(self, other) + + def __radd__(self, other: Any) -> "DirectiveBuilder": + return add(other, self) + + def __floordiv__(self, other: Any) -> "DirectiveBuilder": + return floordiv(self, other) + + def __rfloordiv__(self, other: Any) -> "DirectiveBuilder": + return floordiv(other, self) + + def __mul__(self, other: Any) -> "DirectiveBuilder": + return mul(self, other) + + def __rmul__(self, other: Any) -> "DirectiveBuilder": + return mul(other, self) + + def __sub__(self, other: Any) -> "DirectiveBuilder": + return sub(self, other) + + def __rsub__(self, other: Any) -> "DirectiveBuilder": + return sub(other, self) + + def __truediv__(self, other: Any) -> "DirectiveBuilder": + return truediv(self, other) + + def __rtruediv__(self, other: Any) -> "DirectiveBuilder": + return truediv(other, self) + + +T = TypeVar("T") + + +def _directive_builder(directive: Type["TemplateDirective"]) -> Callable[[T], T]: + directive_name = directive.directive_name + assert directive_name is not None + + def decorator(func): + func.__doc__ = directive.__doc__ + + _DIRECTIVE_BUILDER_REGISTRY[directive_name] = func + return func + + return decorator + + +@_directive_builder(Mapped) +def mapped( + source: str, + key: Key, +) -> DirectiveBuilder: + directive_name = Mapped.directive_name + assert directive_name is not None + + return DirectiveBuilder( + directive_name, + { + "source": source, + "key": key, + }, + ) + + +@_directive_builder(Reformatted) +def reformatted( + format: str, + value: Any, + key: Key, +) -> DirectiveBuilder: + directive_name = Reformatted.directive_name + assert directive_name is not None + + return DirectiveBuilder( + directive_name, + { + "format": format, + "value": value, + "key": key, + }, + ) + +# +# Operations +# + + +def _binop_directive(directive_name: str, left: Any, right: Any): + return DirectiveBuilder( + directive_name, + { + "left": left, + "right": right, + }, + ) + + +@_directive_builder(Add) +def add( + left: Any, + right: Any, +) -> DirectiveBuilder: + directive_name = Add.directive_name + assert directive_name is not None + + return _binop_directive(directive_name, left, right) + + +@_directive_builder(FloorDiv) +def floordiv( + left: Any, + right: Any, +) -> DirectiveBuilder: + directive_name = FloorDiv.directive_name + assert directive_name is not None + + return _binop_directive(directive_name, left, right) + + +@_directive_builder(Mul) +def mul( + left: Any, + right: Any, +) -> DirectiveBuilder: + directive_name = Mul.directive_name + assert directive_name is not None + + return _binop_directive(directive_name, left, right) + + +@_directive_builder(Sub) +def sub( + left: Any, + right: Any, +) -> DirectiveBuilder: + directive_name = Sub.directive_name + assert directive_name is not None + + return _binop_directive(directive_name, left, right) + + +@_directive_builder(TrueDiv) +def truediv( + left: Any, + right: Any, +) -> DirectiveBuilder: + directive_name = TrueDiv.directive_name + assert directive_name is not None + + return _binop_directive(directive_name, left, right) + + +def build(template: Any, directive_marker: str = "@") -> Template: + """Convert a template created with builder classes to a standard template + that is ready to be used with the metadata mapper. + + When using builder classes, you must convert your template using `build`. + + :param template: template with possible `Builder` values + :param directive_marker: marker to use for identifying directives + :returns: Template - a standard template with all `Builder`s replaced + """ + config = BuildConfig(directive_marker=directive_marker) + + return build_with_config(template, config) + + +def build_with_config(template: Any, config: BuildConfig) -> Template: + """Same as build but takes configuration options as a BuildConfig object. + + :param template: template with possible `Builder` values + :param config: BuildConfig object to customize generation + :returns: Template - a standard template with all `Builder`s replaced + """ + if isinstance(template, dict): + return { + k: build_with_config(v, config) + for k, v in template.items() + } + elif isinstance(template, list): + return [build_with_config(v, config) for v in template] + elif isinstance(template, Builder): + return template.build(config) + elif isinstance(template, tuple): + return tuple(build_with_config(v, config) for v in template) + elif isinstance(template, set): + return set(build_with_config(v, config) for v in template) + elif isinstance(template, (str, int, float, bool)): + return template + + raise ValueError(template) diff --git a/mandible/metadata_mapper/directive.py b/mandible/metadata_mapper/directive.py deleted file mode 100644 index ac0e232..0000000 --- a/mandible/metadata_mapper/directive.py +++ /dev/null @@ -1,105 +0,0 @@ -import io -from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, Union - -from .context import Context -from .exception import MetadataMapperError -from .format import FORMAT_REGISTRY -from .source import Source - -Key = Union[str, Callable[[Context], str]] - - -def get_key(key: Key, context: Context) -> str: - if callable(key): - key = key(context) - - return key - - -class TemplateDirective(ABC): - """Base class for directives in a metadata template. - - A directive is a special marker in the metadata template which will be - replaced by the MetadataMapper. - """ - - def __init__(self, context: Context, sources: Dict[str, Source]): - self.context = context - self.sources = sources - - @abstractmethod - def call(self): - pass - - def prepare(self): - pass - - -class Mapped(TemplateDirective): - """A value mapped to the template from a metadata Source. - - The directive will be replaced by looking at the specified Source and - extracting the defined key. - """ - def __init__( - self, - context: Context, - sources: Dict[str, Source], - source: str, - key: Key - ): - super().__init__(context, sources) - - if source not in sources: - raise MetadataMapperError(f"source '{source}' does not exist") - - self.source = sources[source] - self.key = get_key(key, context) - - def call(self): - return self.source.get_value(self.key) - - def prepare(self): - self.source.add_key(self.key) - - -class Reformatted(TemplateDirective): - """A value mapped to the template from a metadata Source. - - The directive will be replaced by looking at the specified Source and - extracting the defined key. - """ - def __init__( - self, - context: Context, - sources: Dict[str, Source], - format: str, - value: Any, - key: Key - ): - super().__init__(context, sources) - - format_cls = FORMAT_REGISTRY.get(format) - if format_cls is None: - raise MetadataMapperError(f"format '{format}' does not exist") - - self.format = format_cls() - self.value = value - self.key = get_key(key, context) - - def call(self): - if isinstance(self.value, bytes): - value = self.value - elif isinstance(self.value, str): - value = self.value.encode() - else: - raise MetadataMapperError( - "value must be of type 'bytes' or 'str' but got " - f"'{type(self.value).__name__}'" - ) - - return self.format.get_value( - io.BytesIO(value), - self.key - ) diff --git a/mandible/metadata_mapper/directive/__init__.py b/mandible/metadata_mapper/directive/__init__.py new file mode 100644 index 0000000..43ad588 --- /dev/null +++ b/mandible/metadata_mapper/directive/__init__.py @@ -0,0 +1,18 @@ +from .directive import DIRECTIVE_REGISTRY, Key, TemplateDirective, get_key +from .mapped import Mapped +from .operations import Add, FloorDiv, Mul, Sub, TrueDiv +from .reformatted import Reformatted + +__all__ = ( + "Add", + "DIRECTIVE_REGISTRY", + "FloorDiv", + "Key", + "Mapped", + "Mul", + "Reformatted", + "Sub", + "TemplateDirective", + "TrueDiv", + "get_key", +) diff --git a/mandible/metadata_mapper/directive/directive.py b/mandible/metadata_mapper/directive/directive.py new file mode 100644 index 0000000..3a06d45 --- /dev/null +++ b/mandible/metadata_mapper/directive/directive.py @@ -0,0 +1,51 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import ClassVar, Dict, Optional, Type + +from ..context import Context +from ..source import Source +from ..types import Key + +DIRECTIVE_REGISTRY: Dict[str, Type["TemplateDirective"]] = {} + + +def get_key(key: Key, context: Context) -> str: + if callable(key): + key = key(context) + + return key + + +@dataclass +class TemplateDirective(ABC): + """Base class for directives in a metadata template. + + A directive is a special marker in the metadata template which will be + replaced by the MetadataMapper. + """ + # Registry boilerplate + def __init_subclass__( + cls, + register: bool = True, + name: Optional[str] = None, + **kwargs, + ): + if register: + name = name or cls.__name__.lower() + DIRECTIVE_REGISTRY[name] = cls + cls.directive_name = name + + super().__init_subclass__(**kwargs) + + # Begin class definition + directive_name: ClassVar[Optional[str]] = None + + context: Context + sources: Dict[str, Source] + + @abstractmethod + def call(self): + pass + + def prepare(self): + pass diff --git a/mandible/metadata_mapper/directive/mapped.py b/mandible/metadata_mapper/directive/mapped.py new file mode 100644 index 0000000..cc48979 --- /dev/null +++ b/mandible/metadata_mapper/directive/mapped.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + +from ..exception import MetadataMapperError +from ..types import Key +from .directive import TemplateDirective, get_key + + +@dataclass +class Mapped(TemplateDirective): + """A value mapped to the template from a metadata Source. + + The directive will be replaced by looking at the specified Source and + extracting the defined key. + """ + + source: str + key: Key + + def __post_init__(self): + if self.source not in self.sources: + raise MetadataMapperError(f"source '{self.source}' does not exist") + + self.source_obj = self.sources[self.source] + self.key_str = get_key(self.key, self.context) + + def call(self): + return self.source_obj.get_value(self.key_str) + + def prepare(self): + self.source_obj.add_key(self.key_str) diff --git a/mandible/metadata_mapper/directive/operations.py b/mandible/metadata_mapper/directive/operations.py new file mode 100644 index 0000000..2b8985f --- /dev/null +++ b/mandible/metadata_mapper/directive/operations.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass +from typing import Any + +from .directive import TemplateDirective + + +@dataclass +class _BinOp(TemplateDirective, register=False): + left: Any + right: Any + + +@dataclass +class Add(_BinOp): + """Perform a python + operation on two values.""" + + def call(self) -> Any: + return self.left + self.right + + +@dataclass +class FloorDiv(_BinOp): + """Perform a python // operation on two values.""" + + def call(self) -> Any: + return self.left // self.right + + +@dataclass +class Mul(_BinOp): + """Perform a python * operation on two values.""" + + def call(self) -> Any: + return self.left * self.right + + +@dataclass +class Sub(_BinOp): + """Perform a python - operation on two values.""" + + def call(self) -> Any: + return self.left - self.right + + +@dataclass +class TrueDiv(_BinOp): + """Perform a python / operation on two values.""" + + def call(self) -> Any: + return self.left / self.right diff --git a/mandible/metadata_mapper/directive/reformatted.py b/mandible/metadata_mapper/directive/reformatted.py new file mode 100644 index 0000000..835ef2d --- /dev/null +++ b/mandible/metadata_mapper/directive/reformatted.py @@ -0,0 +1,44 @@ +import io +from dataclasses import dataclass +from typing import Any + +from ..exception import MetadataMapperError +from ..format import FORMAT_REGISTRY +from .directive import Key, TemplateDirective, get_key + + +@dataclass +class Reformatted(TemplateDirective): + """A value mapped to the template from a metadata Source. + + The directive will be replaced by looking at the specified Source and + extracting the defined key. + """ + + format: str + value: Any + key: Key + + def __post_init__(self): + format_cls = FORMAT_REGISTRY.get(self.format) + if format_cls is None: + raise MetadataMapperError(f"format '{self.format}' does not exist") + + self.format_obj = format_cls() + self.key_str = get_key(self.key, self.context) + + def call(self): + if isinstance(self.value, bytes): + value = self.value + elif isinstance(self.value, str): + value = self.value.encode() + else: + raise MetadataMapperError( + "value must be of type 'bytes' or 'str' but got " + f"'{type(self.value).__name__}'", + ) + + return self.format_obj.get_value( + io.BytesIO(value), + self.key_str, + ) diff --git a/mandible/metadata_mapper/format/format.py b/mandible/metadata_mapper/format/format.py index 51704f2..835c865 100644 --- a/mandible/metadata_mapper/format/format.py +++ b/mandible/metadata_mapper/format/format.py @@ -21,10 +21,12 @@ def __str__(self): @dataclass class Format(ABC): # Registry boilerplate - def __init_subclass__(cls, register: bool = True): + def __init_subclass__(cls, register: bool = True, **kwargs): if register: FORMAT_REGISTRY[cls.__name__] = cls + super().__init_subclass__(**kwargs) + # Begin class definition def get_values(self, file: IO[bytes], keys: Iterable[str]): with self._parse_data(file) as data: diff --git a/mandible/metadata_mapper/mapper.py b/mandible/metadata_mapper/mapper.py index ec002a8..bfaced3 100644 --- a/mandible/metadata_mapper/mapper.py +++ b/mandible/metadata_mapper/mapper.py @@ -1,11 +1,12 @@ import inspect import logging -from typing import Dict, Optional +from typing import Any, Dict, Optional from .context import Context -from .directive import Mapped, Reformatted, TemplateDirective +from .directive import DIRECTIVE_REGISTRY, TemplateDirective from .exception import MetadataMapperError, TemplateError from .source import Source, SourceProvider +from .types import Template log = logging.getLogger(__name__) @@ -13,20 +14,16 @@ class MetadataMapper: def __init__( self, - template, + template: Template, source_provider: SourceProvider = None, *, - directive_marker: str = "@" + directive_marker: str = "@", ): self.template = template self.source_provider = source_provider self.directive_marker = directive_marker - self.directives = { - "mapped": Mapped, - "reformatted": Reformatted, - } - def get_metadata(self, context: Context) -> Dict: + def get_metadata(self, context: Context) -> Template: if self.source_provider is not None: sources = self.source_provider.get_sources() else: @@ -78,7 +75,7 @@ def _cache_source_keys(self, context: Context, sources: Dict[str, Source]): def _replace_template( self, context: Context, - template, + template: Template, sources: Dict[str, Source], debug_path: str = "$", ): @@ -104,7 +101,12 @@ def _replace_template( }, debug_path ) - return directive.call() + try: + return directive.call() + except Exception as e: + raise MetadataMapperError( + f"failed to call directive at {debug_path}: {e}" + ) from e return { k: self._replace_template( @@ -158,7 +160,7 @@ def _get_directive( config: dict, debug_path: str, ) -> TemplateDirective: - cls = self.directives.get(directive_name[1:]) + cls = DIRECTIVE_REGISTRY.get(directive_name[len(self.directive_marker):]) if cls is None: raise TemplateError( f"invalid directive '{directive_name}'", @@ -200,7 +202,7 @@ def _get_directive( raise TemplateError(str(e), debug_path) from e -def _walk_values(obj, debug_path: str = "$"): +def _walk_values(obj: Any, debug_path: str = "$"): yield obj, debug_path if isinstance(obj, dict): for key, val in obj.items(): diff --git a/mandible/metadata_mapper/storage.py b/mandible/metadata_mapper/storage.py index e8e7394..c37d850 100644 --- a/mandible/metadata_mapper/storage.py +++ b/mandible/metadata_mapper/storage.py @@ -18,10 +18,12 @@ class StorageError(Exception): class Storage(ABC): # Registry boilerplate - def __init_subclass__(cls, register: bool = True): + def __init_subclass__(cls, register: bool = True, **kwargs): if register: STORAGE_REGISTRY[cls.__name__] = cls + super().__init_subclass__(**kwargs) + @abstractmethod def open_file(self, context: Context) -> IO[bytes]: """Get a filelike object to access the data.""" diff --git a/mandible/metadata_mapper/types.py b/mandible/metadata_mapper/types.py new file mode 100644 index 0000000..553b6c0 --- /dev/null +++ b/mandible/metadata_mapper/types.py @@ -0,0 +1,17 @@ +from typing import Callable, Dict, List, Set, Tuple, Union + +from .context import Context + +KeyFunc = Callable[[Context], str] +Key = Union[str, KeyFunc] +Template = Union[ + Dict[str, "Template"], + List["Template"], + Tuple["Template", ...], + Set["Template"], + str, + int, + float, + bool, + KeyFunc, +] diff --git a/mandible/py.typed b/mandible/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index ed647cf..0c23ace 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mandible" -version = "0.4.0" +version = "0.5.0" description = "A generic framework for writing satellite data ingest systems" authors = ["Rohan Weeden ", "Matt Perry "] license = "APACHE-2" @@ -33,7 +33,7 @@ xml = ["lxml"] [tool.poetry.group.dev.dependencies] boto3 = "^1.18" moto = "^4.0.1" -pytest = "^7.1.2" +pytest = "^8.0.2" pytest-cov = "^4.0.0" pytest-mock = "^3.8.2" diff --git a/tests/data/fixed_name_file.json b/tests/data/fixed_name_file.json index de45595..935bc6d 100644 --- a/tests/data/fixed_name_file.json +++ b/tests/data/fixed_name_file.json @@ -2,5 +2,7 @@ "foo": "value for foo", "nested": { "key": "value for nested" - } + }, + "integer": 10, + "list": [1, 2, 3] } diff --git a/tests/data/match_me.json b/tests/data/match_me.json index d64acd3..b4fd2d5 100644 --- a/tests/data/match_me.json +++ b/tests/data/match_me.json @@ -1,3 +1,4 @@ { - "bar": "value for bar" + "bar": "value for bar", + "list": ["A", "B", "C"] } diff --git a/tests/integration_tests/conftest.py b/tests/integration_tests/conftest.py new file mode 100644 index 0000000..2e055f6 --- /dev/null +++ b/tests/integration_tests/conftest.py @@ -0,0 +1,139 @@ +import pytest + +from mandible.metadata_mapper import Context + + +@pytest.fixture +def fixed_name_file_config(): + return { + "storage": { + "class": "LocalFile", + "filters": { + "name": r"fixed_name_file\.json", + }, + }, + "format": { + "class": "Json", + }, + } + + +@pytest.fixture +def config(fixed_name_file_config): + return { + "sources": { + "fixed_name_file": fixed_name_file_config, + "name_match_file": { + "storage": { + "class": "LocalFile", + "filters": { + "name": r".*match_me\.json", + }, + }, + "format": { + "class": "Json", + }, + }, + "fixed_xml_file": { + "storage": { + "class": "LocalFile", + "filters": { + "name": "fixed_xml_file.xml", + }, + }, + "format": { + "class": "Xml", + }, + }, + "namespace_xml_file": { + "storage": { + "class": "LocalFile", + "filters": { + "name": "xml_with_namespace.xml", + }, + }, + "format": { + "class": "Xml", + }, + }, + }, + "template": { + "foo": { + "@mapped": { + "source": "fixed_name_file", + "key": "foo", + }, + }, + "outer": { + "nested": { + "@mapped": { + "source": "fixed_name_file", + "key": "nested.key", + }, + }, + "bar": { + "@mapped": { + "source": "name_match_file", + "key": "bar", + }, + }, + }, + "xml_foobar_1": { + "@mapped": { + "source": "fixed_xml_file", + "key": "./foo/bar[1]/foobar", + }, + }, + "xml_foobar_2": { + "@mapped": { + "source": "fixed_xml_file", + "key": "./foo/bar[2]/foobar", + }, + }, + "namespace_xml_foobar_1": { + "@mapped": { + "source": "namespace_xml_file", + "key": "./foo:foo/foo:bar[1]/foo:foobar", + }, + }, + "namespace_xml_foobar_2": { + "@mapped": { + "source": "namespace_xml_file", + "key": "./foo:foo/foo:bar[2]/foo:foobar", + }, + }, + }, + } + + +@pytest.fixture +def context(data_path): + return Context( + files=[ + { + "name": "fixed_name_file.json", + "path": str(data_path / "fixed_name_file.json"), + }, + { + "name": "fixed_xml_file.xml", + "path": str(data_path / "fixed_xml_file.xml"), + }, + { + "name": "xml_with_namespace.xml", + "path": str(data_path / "xml_with_namespace.xml"), + }, + { + "name": "another_file.json", + }, + { + "name": "yet_another_file.json", + }, + { + "name": "first_match_me.json", + "path": str(data_path / "match_me.json"), + }, + { + "name": "dont_match_me.json", + }, + ], + ) diff --git a/tests/integration_tests/test_builder.py b/tests/integration_tests/test_builder.py new file mode 100644 index 0000000..bb14494 --- /dev/null +++ b/tests/integration_tests/test_builder.py @@ -0,0 +1,30 @@ +import pytest + +from mandible.metadata_mapper import ConfigSourceProvider, MetadataMapper +from mandible.metadata_mapper.builder import build, mapped + + +@pytest.fixture +def source_provider(config): + return ConfigSourceProvider({ + "fixed_name_file": config["sources"]["fixed_name_file"], + "name_match_file": config["sources"]["name_match_file"], + }) + + +def test_template(source_provider, context): + mapper = MetadataMapper( + template=build({ + "list": ( + mapped("fixed_name_file", "list") + + mapped("name_match_file", "list") + ), + "number": mapped("fixed_name_file", "integer") + 20.5, + }), + source_provider=source_provider, + ) + + assert mapper.get_metadata(context) == { + "list": [1, 2, 3, "A", "B", "C"], + "number": 30.5, + } diff --git a/tests/integration_tests/test_directives.py b/tests/integration_tests/test_directives.py new file mode 100644 index 0000000..6d8e12d --- /dev/null +++ b/tests/integration_tests/test_directives.py @@ -0,0 +1,499 @@ +import pytest + +from mandible.metadata_mapper import ( + ConfigSourceProvider, + Context, + MetadataMapper, + MetadataMapperError, +) + + +@pytest.mark.xml +def test_mapped_key_callable(config, context): + mapper = MetadataMapper( + template={ + "bar": { + "@mapped": { + "source": "name_match_file", + "key": lambda ctx: ctx.meta["foo"], + }, + }, + }, + source_provider=ConfigSourceProvider(config["sources"]), + ) + context.meta["foo"] = "bar" + + assert mapper.get_metadata(context) == { + "bar": "value for bar", + } + + +def test_mapped_non_existent_source(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "does not exist", + "key": "foo", + }, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.@mapped: " + "source 'does not exist' does not exist" + ), + ): + mapper.get_metadata(context) + + +def test_mapped_missing_key(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "fixed_name_file", + }, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.@mapped: " + "missing key: 'key'" + ), + ): + mapper.get_metadata(context) + + +def test_mapped_missing_source(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "key": "does not exist", + }, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.@mapped: " + "missing key: 'source'" + ), + ): + mapper.get_metadata(context) + + +def test_mapped_missing_source_path(context): + mapper = MetadataMapper( + template={ + "foo": { + "bar": [ + "ignored", + "ignored", + { + "@mapped": { + "key": "does not exist", + }, + }, + ], + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.bar\[2\]\.@mapped: " + "missing key: 'source'" + ), + ): + mapper.get_metadata(context) + + +def test_mapped_missing_source_and_key(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": {}, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.@mapped: " + "missing keys: 'key', 'source'" + ), + ): + mapper.get_metadata(context) + + +def test_mapped_extra_parameter(context, fixed_name_file_config): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "fixed_name_file", + "key": "foo", + "does_not_exist": "does not exist", + "does_not_exist_2": "does not exist", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + ) + + mapper.get_metadata(context) == {"foo": "value for foo"} + + +def test_reformatted_json_field_in_json(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Json", + "value": { + "@mapped": { + "source": "file", + "key": "some-field", + }, + }, + "key": "foo", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": br""" + { + "some-field": "{\"foo\": \"bar\"}" + } + """, + }, + "format": { + "class": "Json", + }, + }, + }), + ) + + context = Context() + + assert mapper.get_metadata(context) == "bar" + + +@pytest.mark.xml +def test_reformatted_json_field_in_xml(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Json", + "value": { + "@mapped": { + "source": "file", + "key": "/root/json-field", + }, + }, + "key": "foo", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": b""" + + {"foo": "bar"} + + """, + }, + "format": { + "class": "Xml", + }, + }, + }), + ) + + context = Context() + + assert mapper.get_metadata(context) == "bar" + + +@pytest.mark.xml +def test_reformatted_json_field_in_xml_get_entire_value(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Json", + "value": { + "@mapped": { + "source": "file", + "key": "/root/json-field", + }, + }, + "key": "$", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": b""" + + {"foo": "bar"} + + """, + }, + "format": { + "class": "Xml", + }, + }, + }), + ) + + context = Context() + + assert mapper.get_metadata(context) == {"foo": "bar"} + + +@pytest.mark.xml +def test_reformatted_xml_field_in_json(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Xml", + "value": { + "@mapped": { + "source": "file", + "key": "foo", + }, + }, + "key": "/root/field", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": b""" + { + "foo": "bar" + } + """, + }, + "format": { + "class": "Json", + }, + }, + }), + ) + + context = Context() + + assert mapper.get_metadata(context) == "bar" + + +def test_reformatted_bad_type(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Json", + "value": { + "@mapped": { + "source": "file", + "key": "foo", + }, + }, + "key": "$", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": b'{"foo": true}', + }, + "format": { + "class": "Json", + }, + }, + }), + ) + + context = Context() + + with pytest.raises(MetadataMapperError, match="but got 'bool'"): + mapper.get_metadata(context) + + +@pytest.mark.xml +def test_reformatted_nested(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Xml", + "value": { + "@reformatted": { + "format": "Json", + "value": '{"foo": "bar"}', + "key": "foo", + }, + }, + "key": "/root/field", + }, + }, + source_provider=ConfigSourceProvider({ + "file": { + "storage": { + "class": "Dummy", + "data": b""" + { + "foo": "bar" + } + """, + }, + "format": { + "class": "Json", + }, + }, + }), + ) + + context = Context() + + assert mapper.get_metadata(context) == "bar" + + +def test_reformatted_nested_missing_parameter(): + mapper = MetadataMapper( + template={ + "@reformatted": { + "format": "Json", + "value": { + "@reformatted": { + "format": "Json", + "key": "foo", + }, + }, + "key": "/root/field", + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + context = Context() + + with pytest.raises( + MetadataMapperError, + match=( + "failed to process template at " + r"\$\.@reformatted\.value\.@reformatted: " + "missing key: 'value'" + ), + ): + mapper.get_metadata(context) + + +def test_add_constant_values(): + mapper = MetadataMapper( + template={ + "integers": { + "@add": { + "left": 1, + "right": 2, + }, + }, + "floats": { + "@add": { + "left": 1.5, + "right": 2, + }, + }, + "strings": { + "@add": { + "left": "hello ", + "right": "world", + }, + }, + "lists": { + "@add": { + "left": [1, 2], + "right": [3, 4], + }, + }, + }, + ) + + context = Context() + + assert mapper.get_metadata(context) == { + "integers": 3, + "floats": 3.5, + "strings": "hello world", + "lists": [1, 2, 3, 4], + } + + +def test_add_constant_values_bad_types(): + mapper = MetadataMapper( + template={ + "@add": { + "left": "foo", + "right": 10, + }, + }, + ) + + context = Context() + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to evaluate template: failed to call directive at \$\.@add: " + r'can only concatenate str \(not "int"\) to str' + ), + ): + mapper.get_metadata(context) + + +def test_add_mapped_values(context, fixed_name_file_config): + mapper = MetadataMapper( + template={ + "@add": { + "left": { + "@mapped": { + "source": "fixed_name_file", + "key": "foo", + }, + }, + "right": { + "@mapped": { + "source": "fixed_name_file", + "key": "nested.key", + }, + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + ) + + assert mapper.get_metadata(context) == "value for foovalue for nested" diff --git a/tests/integration_tests/test_metadata_mapper.py b/tests/integration_tests/test_metadata_mapper.py new file mode 100644 index 0000000..8bf5eee --- /dev/null +++ b/tests/integration_tests/test_metadata_mapper.py @@ -0,0 +1,308 @@ +import re + +import pytest + +from mandible.metadata_mapper import ( + ConfigSourceProvider, + Context, + MetadataMapper, + MetadataMapperError, + PySourceProvider, + Source, +) +from mandible.metadata_mapper.format import Json, Xml +from mandible.metadata_mapper.storage import LocalFile + + +@pytest.fixture +def mapper(config): + return MetadataMapper( + template=config["template"], + source_provider=ConfigSourceProvider(config["sources"]), + ) + + +def test_empty_mapping_empty_context(): + mapper = MetadataMapper({}) + assert mapper.get_metadata(Context()) == {} + + +def test_constant_mapping_empty_context(): + template = { + "foo": "bar", + "baz": "qux", + } + mapper = MetadataMapper(template) + + assert mapper.get_metadata(Context()) == template + + +def test_empty_context(fixed_name_file_config): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "fixed_name_file", + "key": "foo", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + ) + context = Context() + + with pytest.raises( + MetadataMapperError, + match=( + "failed to query source 'fixed_name_file': " + "no files in context" + ), + ): + mapper.get_metadata(context) + + +@pytest.mark.xml +def test_basic(mapper, context): + assert mapper.get_metadata(context) == { + "foo": "value for foo", + "outer": { + "nested": "value for nested", + "bar": "value for bar", + }, + "namespace_xml_foobar_1": "testing_1", + "namespace_xml_foobar_2": "2", + "xml_foobar_1": "testing_1", + "xml_foobar_2": "2", + } + + +def test_custom_directive_marker(context, fixed_name_file_config): + mapper = MetadataMapper( + template={ + "foo": { + "#mapped": { + "source": "fixed_name_file", + "key": "foo", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + directive_marker="#", + ) + assert mapper.get_metadata(context) == { + "foo": "value for foo", + } + + +def test_custom_directive_marker_long(context, fixed_name_file_config): + mapper = MetadataMapper( + template={ + "foo": { + "###mapped": { + "source": "fixed_name_file", + "key": "foo", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + directive_marker="###", + ) + assert mapper.get_metadata(context) == { + "foo": "value for foo", + } + + +@pytest.mark.xml +def test_basic_py_source_provider(config, context): + mapper = MetadataMapper( + template=config["template"], + source_provider=PySourceProvider({ + "fixed_name_file": Source( + storage=LocalFile( + filters={ + "name": "fixed_name_file.json", + }, + ), + format=Json(), + ), + "fixed_xml_file": Source( + storage=LocalFile( + filters={ + "name": "fixed_xml_file.xml", + }, + ), + format=Xml(), + ), + "namespace_xml_file": Source( + storage=LocalFile( + filters={ + "name": "xml_with_namespace.xml", + }, + ), + format=Xml(), + ), + "name_match_file": Source( + storage=LocalFile( + filters={ + "name": r".*match_me\.json", + }, + ), + format=Json(), + ), + "name_match_file2": Source( + storage=LocalFile( + filters={ + "name": re.compile(r".*match_me\.json"), + }, + ), + format=Json(), + ), + }), + ) + assert mapper.get_metadata(context) == { + "foo": "value for foo", + "outer": { + "nested": "value for nested", + "bar": "value for bar", + }, + "namespace_xml_foobar_1": "testing_1", + "namespace_xml_foobar_2": "2", + "xml_foobar_1": "testing_1", + "xml_foobar_2": "2", + } + + +@pytest.mark.xml +def test_basic_s3_file(s3_resource, config, context): + s3_resource.create_bucket(Bucket="test") + s3_resource.Object("test", "fixed_name_file.json").put( + Body=open(context.files[0]["path"]).read(), + ) + context.files[0]["bucket"] = "test" + context.files[0]["key"] = "fixed_name_file.json" + + config["sources"]["fixed_name_file"]["storage"]["class"] = "S3File" + + mapper = MetadataMapper( + template=config["template"], + source_provider=ConfigSourceProvider(config["sources"]), + ) + + assert mapper.get_metadata(context) == { + "foo": "value for foo", + "outer": { + "nested": "value for nested", + "bar": "value for bar", + }, + "namespace_xml_foobar_1": "testing_1", + "namespace_xml_foobar_2": "2", + "xml_foobar_1": "testing_1", + "xml_foobar_2": "2", + } + + +def test_no_matching_files(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "source_file", + "key": "foo", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "source_file": { + "storage": { + "class": "LocalFile", + "filters": { + "name": "does not exist", + }, + }, + "format": { + "class": "Json", + }, + }, + }), + ) + + with pytest.raises( + MetadataMapperError, + match=( + "failed to query source 'source_file': " + "no files matched filters" + ), + ): + mapper.get_metadata(context) + + +def test_source_non_existent_key(context, fixed_name_file_config): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": { + "source": "fixed_name_file", + "key": "does_not_exist", + }, + }, + }, + source_provider=ConfigSourceProvider({ + "fixed_name_file": fixed_name_file_config, + }), + ) + + with pytest.raises( + MetadataMapperError, + match=( + "failed to query source 'fixed_name_file': " + "key not found 'does_not_exist'" + ), + ): + mapper.get_metadata(context) + + +def test_invalid_directive(context): + mapper = MetadataMapper( + template={ + "foo": { + "@does_not_exist": {}, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo\.@does_not_exist: " + "invalid directive '@does_not_exist'" + ), + ): + mapper.get_metadata(context) + + +def test_multiple_directives(context): + mapper = MetadataMapper( + template={ + "foo": { + "@mapped": {}, + "@invalid": {}, + }, + }, + source_provider=ConfigSourceProvider({}), + ) + + with pytest.raises( + MetadataMapperError, + match=( + r"failed to process template at \$\.foo: " + "multiple directives found in config: '@mapped', '@invalid'" + ), + ): + mapper.get_metadata(context) diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..6fddd67 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,335 @@ +from mandible.metadata_mapper.builder import ( + add, + build, + floordiv, + mapped, + mul, + reformatted, + sub, + truediv, +) + + +def test_build_noop(): + template = { + "something": ["simple"], + "with": { + "nested": [ + {"data": ["structures"]}, + ], + }, + } + + assert build(template) == template + + +def test_build_mapped(): + template = mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + } + + +def test_build_directive_marker(): + template = mapped( + source="some_source", + key="some.key", + ) + + assert build(template, directive_marker="#%^") == { + "#%^mapped": { + "source": "some_source", + "key": "some.key", + }, + } + + +def test_build_reformatted(): + template = reformatted( + format="Json", + value=mapped( + source="some_source", + key="some.key", + ), + key="foo", + ) + + assert build(template) == { + "@reformatted": { + "format": "Json", + "value": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "key": "foo", + }, + } + + +def test_build_add(): + template = add( + left=1, + right=2, + ) + + assert build(template) == { + "@add": { + "left": 1, + "right": 2, + }, + } + + +def test_build_add_automatic(): + template = mapped( + source="some_source", + key="some.key", + ) + 10 + + assert build(template) == { + "@add": { + "left": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "right": 10, + }, + } + + +def test_build_add_automatic_right(): + template = 10 + mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@add": { + "left": 10, + "right": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + }, + } + + +def test_build_floordiv(): + template = floordiv( + left=1, + right=2, + ) + + assert build(template) == { + "@floordiv": { + "left": 1, + "right": 2, + }, + } + + +def test_build_floordiv_automatic(): + template = mapped( + source="some_source", + key="some.key", + ) // 10 + + assert build(template) == { + "@floordiv": { + "left": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "right": 10, + }, + } + + +def test_build_floordiv_automatic_right(): + template = 10 // mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@floordiv": { + "left": 10, + "right": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + }, + } + + +def test_build_mul(): + template = mul( + left=1, + right=2, + ) + + assert build(template) == { + "@mul": { + "left": 1, + "right": 2, + }, + } + + +def test_build_mul_automatic(): + template = mapped( + source="some_source", + key="some.key", + ) * 10 + + assert build(template) == { + "@mul": { + "left": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "right": 10, + }, + } + + +def test_build_mul_automatic_right(): + template = 10 * mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@mul": { + "left": 10, + "right": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + }, + } + + +def test_build_sub(): + template = sub( + left=1, + right=2, + ) + + assert build(template) == { + "@sub": { + "left": 1, + "right": 2, + }, + } + + +def test_build_sub_automatic(): + template = mapped( + source="some_source", + key="some.key", + ) - 10 + + assert build(template) == { + "@sub": { + "left": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "right": 10, + }, + } + + +def test_build_sub_automatic_right(): + template = 10 - mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@sub": { + "left": 10, + "right": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + }, + } + + +def test_build_truediv(): + template = truediv( + left=1, + right=2, + ) + + assert build(template) == { + "@truediv": { + "left": 1, + "right": 2, + }, + } + + +def test_build_truediv_automatic(): + template = mapped( + source="some_source", + key="some.key", + ) / 10 + + assert build(template) == { + "@truediv": { + "left": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + "right": 10, + }, + } + + +def test_build_truediv_automatic_right(): + template = 10 / mapped( + source="some_source", + key="some.key", + ) + + assert build(template) == { + "@truediv": { + "left": 10, + "right": { + "@mapped": { + "source": "some_source", + "key": "some.key", + }, + }, + }, + } diff --git a/tests/test_directives.py b/tests/test_directives.py new file mode 100644 index 0000000..bf4b2bf --- /dev/null +++ b/tests/test_directives.py @@ -0,0 +1,48 @@ +from mandible.metadata_mapper import Context +from mandible.metadata_mapper.builder import _DIRECTIVE_BUILDER_REGISTRY +from mandible.metadata_mapper.directive import ( + DIRECTIVE_REGISTRY, + Add, + FloorDiv, + Mul, + Sub, + TrueDiv, +) + + +def test_all_directives_have_builder_class(): + directive_names = set(DIRECTIVE_REGISTRY) + builder_names = set(_DIRECTIVE_BUILDER_REGISTRY) + + assert directive_names <= builder_names, \ + "Some directives don't have a builder class!" + + +def test_add(): + add = Add(Context(), {}, 1, 2) + + assert add.call() == 3 + + +def test_floordiv(): + floordiv = FloorDiv(Context(), {}, 1, 2) + + assert floordiv.call() == 0 + + +def test_mul(): + mul = Mul(Context(), {}, 1, 2) + + assert mul.call() == 2 + + +def test_sub(): + sub = Sub(Context(), {}, 1, 2) + + assert sub.call() == -1 + + +def test_truediv(): + truediv = TrueDiv(Context(), {}, 1, 2) + + assert truediv.call() == 0.5 diff --git a/tests/test_format.py b/tests/test_format.py index 4d439fd..7a0e567 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -69,7 +69,7 @@ def test_h5_empty_key(): format = H5() - with pytest.raises(FormatError, match="cannot be an empty string"): + with pytest.raises(FormatError, match="key not found ''"): format.get_value(file, "") diff --git a/tests/test_metadata_mapper.py b/tests/test_metadata_mapper.py deleted file mode 100644 index 73ffce3..0000000 --- a/tests/test_metadata_mapper.py +++ /dev/null @@ -1,827 +0,0 @@ -import re - -import pytest - -from mandible.metadata_mapper import ( - ConfigSourceProvider, - Context, - MetadataMapper, - MetadataMapperError, - PySourceProvider, - Source, -) -from mandible.metadata_mapper.format import Json, Xml -from mandible.metadata_mapper.storage import LocalFile - - -@pytest.fixture -def fixed_name_file_config(): - return { - "storage": { - "class": "LocalFile", - "filters": { - "name": r"fixed_name_file\.json" - } - }, - "format": { - "class": "Json", - } - } - - -@pytest.fixture -def config(fixed_name_file_config): - return { - "sources": { - "fixed_name_file": fixed_name_file_config, - "name_match_file": { - "storage": { - "class": "LocalFile", - "filters": { - "name": r".*match_me\.json" - } - }, - "format": { - "class": "Json" - } - }, - "fixed_xml_file": { - "storage": { - "class": "LocalFile", - "filters": { - "name": "fixed_xml_file.xml" - } - }, - "format": { - "class": "Xml" - } - }, - "namespace_xml_file": { - "storage": { - "class": "LocalFile", - "filters": { - "name": "xml_with_namespace.xml" - } - }, - "format": { - "class": "Xml" - } - } - }, - "template": { - "foo": { - "@mapped": { - "source": "fixed_name_file", - "key": "foo" - } - }, - "outer": { - "nested": { - "@mapped": { - "source": "fixed_name_file", - "key": "nested.key" - } - }, - "bar": { - "@mapped": { - "source": "name_match_file", - "key": "bar" - } - } - }, - "xml_foobar_1": { - "@mapped": { - "source": "fixed_xml_file", - "key": "./foo/bar[1]/foobar" - } - }, - "xml_foobar_2": { - "@mapped": { - "source": "fixed_xml_file", - "key": "./foo/bar[2]/foobar" - } - }, - "namespace_xml_foobar_1": { - "@mapped": { - "source": "namespace_xml_file", - "key": "./foo:foo/foo:bar[1]/foo:foobar" - } - }, - "namespace_xml_foobar_2": { - "@mapped": { - "source": "namespace_xml_file", - "key": "./foo:foo/foo:bar[2]/foo:foobar" - } - }, - } - } - - -@pytest.fixture -def context(data_path): - return Context( - files=[ - { - "name": "fixed_name_file.json", - "path": str(data_path / "fixed_name_file.json") - }, - { - "name": "fixed_xml_file.xml", - "path": str(data_path / "fixed_xml_file.xml") - }, - { - "name": "xml_with_namespace.xml", - "path": str(data_path / "xml_with_namespace.xml") - }, - { - "name": "another_file.json" - }, - { - "name": "yet_another_file.json" - }, - { - "name": "first_match_me.json", - "path": str(data_path / "match_me.json") - }, - { - "name": "dont_match_me.json" - }, - ] - ) - - -@pytest.fixture -def mapper(config): - return MetadataMapper( - template=config["template"], - source_provider=ConfigSourceProvider(config["sources"]) - ) - - -def test_empty_mapping_empty_context(): - mapper = MetadataMapper({}) - assert mapper.get_metadata(Context()) == {} - - -def test_constant_mapping_empty_context(): - template = { - "foo": "bar", - "baz": "qux" - } - mapper = MetadataMapper(template) - - assert mapper.get_metadata(Context()) == template - - -def test_empty_context(fixed_name_file_config): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "fixed_name_file", - "key": "foo" - } - } - }, - source_provider=ConfigSourceProvider({ - "fixed_name_file": fixed_name_file_config - }) - ) - context = Context() - - with pytest.raises( - MetadataMapperError, - match=( - "failed to query source 'fixed_name_file': " - "no files in context" - ) - ): - mapper.get_metadata(context) - - -@pytest.mark.xml -def test_basic(mapper, context): - assert mapper.get_metadata(context) == { - "foo": "value for foo", - "outer": { - "nested": "value for nested", - "bar": "value for bar" - }, - "namespace_xml_foobar_1": "testing_1", - "namespace_xml_foobar_2": "2", - "xml_foobar_1": "testing_1", - "xml_foobar_2": "2", - } - - -@pytest.mark.xml -def test_mapped_key_callable(config, context): - mapper = MetadataMapper( - template={ - "bar": { - "@mapped": { - "source": "name_match_file", - "key": lambda ctx: ctx.meta["foo"] - } - }, - }, - source_provider=ConfigSourceProvider(config["sources"]) - ) - context.meta["foo"] = "bar" - - assert mapper.get_metadata(context) == { - "bar": "value for bar" - } - - -def test_custom_directive(context, fixed_name_file_config): - mapper = MetadataMapper( - template={ - "foo": { - "#mapped": { - "source": "fixed_name_file", - "key": "foo" - } - }, - }, - source_provider=ConfigSourceProvider({ - "fixed_name_file": fixed_name_file_config - }), - directive_marker="#" - ) - assert mapper.get_metadata(context) == { - "foo": "value for foo" - } - - -@pytest.mark.xml -def test_basic_py_source_provider(config, context): - mapper = MetadataMapper( - template=config["template"], - source_provider=PySourceProvider({ - "fixed_name_file": Source( - storage=LocalFile( - filters={ - "name": "fixed_name_file.json" - } - ), - format=Json() - ), - "fixed_xml_file": Source( - storage=LocalFile( - filters={ - "name": "fixed_xml_file.xml" - } - ), - format=Xml() - ), - "namespace_xml_file": Source( - storage=LocalFile( - filters={ - "name": "xml_with_namespace.xml" - } - ), - format=Xml() - ), - "name_match_file": Source( - storage=LocalFile( - filters={ - "name": r".*match_me\.json" - } - ), - format=Json() - ), - "name_match_file2": Source( - storage=LocalFile( - filters={ - "name": re.compile(r".*match_me\.json") - } - ), - format=Json() - ) - }) - ) - assert mapper.get_metadata(context) == { - "foo": "value for foo", - "outer": { - "nested": "value for nested", - "bar": "value for bar" - }, - "namespace_xml_foobar_1": "testing_1", - "namespace_xml_foobar_2": "2", - "xml_foobar_1": "testing_1", - "xml_foobar_2": "2", - } - - -@pytest.mark.xml -def test_basic_s3_file(s3_resource, config, context): - s3_resource.create_bucket(Bucket="test") - s3_resource.Object("test", "fixed_name_file.json").put( - Body=open(context.files[0]["path"]).read() - ) - context.files[0]["bucket"] = "test" - context.files[0]["key"] = "fixed_name_file.json" - - config["sources"]["fixed_name_file"]["storage"]["class"] = "S3File" - - mapper = MetadataMapper( - template=config["template"], - source_provider=ConfigSourceProvider(config["sources"]) - ) - - assert mapper.get_metadata(context) == { - "foo": "value for foo", - "outer": { - "nested": "value for nested", - "bar": "value for bar" - }, - "namespace_xml_foobar_1": "testing_1", - "namespace_xml_foobar_2": "2", - "xml_foobar_1": "testing_1", - "xml_foobar_2": "2", - } - - -def test_no_matching_files(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "source_file", - "key": "foo" - } - } - }, - source_provider=ConfigSourceProvider({ - "source_file": { - "storage": { - "class": "LocalFile", - "filters": { - "name": "does not exist" - } - }, - "format": { - "class": "Json" - } - } - }) - ) - - with pytest.raises( - MetadataMapperError, - match=( - "failed to query source 'source_file': " - "no files matched filters" - ) - ): - mapper.get_metadata(context) - - -def test_source_non_existent_key(context, fixed_name_file_config): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "fixed_name_file", - "key": "does_not_exist", - } - } - }, - source_provider=ConfigSourceProvider({ - "fixed_name_file": fixed_name_file_config - }) - ) - - with pytest.raises( - MetadataMapperError, - match=( - "failed to query source 'fixed_name_file': " - "key not found 'does_not_exist'" - ), - ): - mapper.get_metadata(context) - - -def test_mapped_non_existent_source(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "does not exist", - "key": "foo" - } - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.@mapped: " - "source 'does not exist' does not exist" - ) - ): - mapper.get_metadata(context) - - -def test_mapped_missing_key(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "fixed_name_file", - } - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.@mapped: " - "missing key: 'key'" - ) - ): - mapper.get_metadata(context) - - -def test_mapped_missing_source(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "key": "does not exist", - } - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.@mapped: " - "missing key: 'source'" - ) - ): - mapper.get_metadata(context) - - -def test_mapped_missing_source_path(context): - mapper = MetadataMapper( - template={ - "foo": { - "bar": [ - "ignored", - "ignored", - { - "@mapped": { - "key": "does not exist", - } - } - ] - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.bar\[2\]\.@mapped: " - "missing key: 'source'" - ) - ): - mapper.get_metadata(context) - - -def test_mapped_missing_source_and_key(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": {} - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.@mapped: " - "missing keys: 'key', 'source'" - ) - ): - mapper.get_metadata(context) - - -def test_mapped_extra_parameter(context, fixed_name_file_config): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": { - "source": "fixed_name_file", - "key": "foo", - "does_not_exist": "does not exist", - "does_not_exist_2": "does not exist", - } - } - }, - source_provider=ConfigSourceProvider({ - "fixed_name_file": fixed_name_file_config - }) - ) - - mapper.get_metadata(context) == {"foo": "value for foo"} - - -def test_invalid_directive(context): - mapper = MetadataMapper( - template={ - "foo": { - "@does_not_exist": {} - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo\.@does_not_exist: " - "invalid directive '@does_not_exist'" - ) - ): - mapper.get_metadata(context) - - -def test_multiple_directives(context): - mapper = MetadataMapper( - template={ - "foo": { - "@mapped": {}, - "@invalid": {} - } - }, - source_provider=ConfigSourceProvider({}) - ) - - with pytest.raises( - MetadataMapperError, - match=( - r"failed to process template at \$\.foo: " - "multiple directives found in config: '@mapped', '@invalid'" - ) - ): - mapper.get_metadata(context) - - -def test_reformatted_json_field_in_json(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Json", - "value": { - "@mapped": { - "source": "file", - "key": "some-field" - }, - }, - "key": "foo" - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": br""" - { - "some-field": "{\"foo\": \"bar\"}" - } - """ - }, - "format": { - "class": "Json", - } - } - }) - ) - - context = Context() - - assert mapper.get_metadata(context) == "bar" - - -@pytest.mark.xml -def test_reformatted_json_field_in_xml(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Json", - "value": { - "@mapped": { - "source": "file", - "key": "/root/json-field" - }, - }, - "key": "foo" - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": b""" - - {"foo": "bar"} - - """ - }, - "format": { - "class": "Xml", - } - } - }) - ) - - context = Context() - - assert mapper.get_metadata(context) == "bar" - - -@pytest.mark.xml -def test_reformatted_json_field_in_xml_get_entire_value(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Json", - "value": { - "@mapped": { - "source": "file", - "key": "/root/json-field" - }, - }, - "key": "$", - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": b""" - - {"foo": "bar"} - - """ - }, - "format": { - "class": "Xml", - } - } - }) - ) - - context = Context() - - assert mapper.get_metadata(context) == {"foo": "bar"} - - -@pytest.mark.xml -def test_reformatted_xml_field_in_json(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Xml", - "value": { - "@mapped": { - "source": "file", - "key": "foo" - }, - }, - "key": "/root/field" - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": b""" - { - "foo": "bar" - } - """ - }, - "format": { - "class": "Json", - } - } - }) - ) - - context = Context() - - assert mapper.get_metadata(context) == "bar" - - -def test_reformatted_bad_type(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Json", - "value": { - "@mapped": { - "source": "file", - "key": "foo" - }, - }, - "key": "$" - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": b'{"foo": true}' - }, - "format": { - "class": "Json", - } - } - }) - ) - - context = Context() - - with pytest.raises(MetadataMapperError, match="but got 'bool'"): - mapper.get_metadata(context) - - -@pytest.mark.xml -def test_reformatted_nested(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Xml", - "value": { - "@reformatted": { - "format": "Json", - "value": '{"foo": "bar"}', - "key": "foo" - }, - }, - "key": "/root/field" - }, - }, - source_provider=ConfigSourceProvider({ - "file": { - "storage": { - "class": "Dummy", - "data": b""" - { - "foo": "bar" - } - """ - }, - "format": { - "class": "Json", - } - } - }) - ) - - context = Context() - - assert mapper.get_metadata(context) == "bar" - - -def test_reformatted_nested_missing_parameter(): - mapper = MetadataMapper( - template={ - "@reformatted": { - "format": "Json", - "value": { - "@reformatted": { - "format": "Json", - "key": "foo" - }, - }, - "key": "/root/field" - }, - }, - source_provider=ConfigSourceProvider({}) - ) - - context = Context() - - with pytest.raises( - MetadataMapperError, - match=( - "failed to process template at " - r"\$\.@reformatted\.value\.@reformatted: " - "missing key: 'value'" - ) - ): - mapper.get_metadata(context) diff --git a/tox.ini b/tox.ini index b45a55d..2f03afd 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ requires = tox>4 isolated_build = true -env_list = py{38,39,310,311}-X{all,none}, py38-Xall-jp{14,15,16} +env_list = py{38,39,310,311,312}-X{all,none}, py38-Xall-jp{14,15,16} [gh-actions] python = @@ -10,6 +10,7 @@ python = 3.9: py39 3.10: py310 3.11: py311 + 3.12: py312 [testenv] allowlist_externals = poetry