diff --git a/src/check_jsonschema/parsers/__init__.py b/src/check_jsonschema/parsers/__init__.py index 9fbb64ee2..aee8bc135 100644 --- a/src/check_jsonschema/parsers/__init__.py +++ b/src/check_jsonschema/parsers/__init__.py @@ -35,10 +35,12 @@ def __init__( supported_formats: t.Sequence[str, ...] | None = None, ) -> None: yaml_impl = yaml.construct_yaml_implementation() + failover_yaml_impl = yaml.construct_yaml_implementation(pure=True) if modify_yaml_implementation: modify_yaml_implementation(yaml_impl) + modify_yaml_implementation(failover_yaml_impl) base_by_tag = { - "yaml": yaml.impl2loader(yaml_impl), + "yaml": yaml.impl2loader(yaml_impl, failover_yaml_impl), **DEFAULT_LOAD_FUNC_BY_TAG, } if supported_formats is None: diff --git a/src/check_jsonschema/parsers/yaml.py b/src/check_jsonschema/parsers/yaml.py index 0fd0f84d6..83537a994 100644 --- a/src/check_jsonschema/parsers/yaml.py +++ b/src/check_jsonschema/parsers/yaml.py @@ -1,12 +1,15 @@ from __future__ import annotations import typing as t +import warnings import ruamel.yaml -def construct_yaml_implementation() -> ruamel.yaml.YAML: - implementation = ruamel.yaml.YAML(typ="safe") +def construct_yaml_implementation( + typ: str = "safe", pure: bool = False +) -> ruamel.yaml.YAML: + implementation = ruamel.yaml.YAML(typ=typ, pure=pure) # workaround global state # see: https://sourceforge.net/p/ruamel-yaml/tickets/341/ @@ -43,12 +46,27 @@ def _normalize(data: t.Any) -> t.Any: return data -def impl2loader(impl: ruamel.yaml.YAML) -> t.Callable[[t.BinaryIO], t.Any]: +_data_sentinel = object() + + +def impl2loader( + primary: ruamel.yaml.YAML, *fallbacks: ruamel.yaml.YAML +) -> t.Callable[[t.BinaryIO], t.Any]: def load(stream: t.BinaryIO) -> t.Any: - data = impl.load(stream) + stream_bytes = stream.read() + lasterr: ruamel.yaml.YAMLError | None = None + data: t.Any = _data_sentinel + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ruamel.yaml.error.ReusedAnchorWarning) + for impl in [primary] + list(fallbacks): + try: + data = impl.load(stream_bytes) + except ruamel.yaml.YAMLError as e: + lasterr = e + else: + break + if data is _data_sentinel and lasterr is not None: + raise lasterr return _normalize(data) return load - - -load = impl2loader(construct_yaml_implementation()) diff --git a/tests/unit/test_gitlab_data_transform.py b/tests/unit/test_gitlab_data_transform.py index 84c870241..b48e4816e 100644 --- a/tests/unit/test_gitlab_data_transform.py +++ b/tests/unit/test_gitlab_data_transform.py @@ -32,7 +32,6 @@ def test_can_parse_ok_gitlab_yaml_with_transform(): with pytest.raises(Exception): data = impl.load(rawdata) - print(data) GITLAB_TRANSFORM.modify_yaml_implementation(impl) data = impl.load(rawdata) diff --git a/tests/unit/test_instance_loader.py b/tests/unit/test_instance_loader.py index 4bf6f5f90..85ee6b53c 100644 --- a/tests/unit/test_instance_loader.py +++ b/tests/unit/test_instance_loader.py @@ -1,6 +1,7 @@ import os import pytest +import ruamel.yaml from check_jsonschema.instance_loader import InstanceLoader from check_jsonschema.parsers import BadFileTypeError @@ -109,3 +110,31 @@ def test_instanceloader_optional_format_handling( err = excinfo.value # error message should be instructive assert expect_error_message in str(err) + + +def test_instanceloader_yaml_dup_anchor(tmp_path): + f = tmp_path / "foo.yaml" + f.write_text( + """\ +a: + b: &anchor + - 1 + - 2 + c: &anchor d +""" + ) + loader = InstanceLoader([str(f)]) + data = list(loader.iter_files()) + assert data == [(str(f), {"a": {"b": [1, 2], "c": "d"}})] + + +def test_instanceloader_invalid_yaml_data(tmp_path): + f = tmp_path / "foo.yaml" + f.write_text( + """\ +a: {b +""" + ) + loader = InstanceLoader([str(f)]) + with pytest.raises(ruamel.yaml.YAMLError): + list(loader.iter_files()) diff --git a/tests/unit/test_schema_loader.py b/tests/unit/test_schema_loader.py index 310c05c01..b6938cbd9 100644 --- a/tests/unit/test_schema_loader.py +++ b/tests/unit/test_schema_loader.py @@ -3,7 +3,7 @@ import pytest -from check_jsonschema.schema_loader import SchemaLoader +from check_jsonschema.schema_loader import SchemaLoader, SchemaParseError from check_jsonschema.schema_loader.readers import HttpSchemaReader, LocalSchemaReader @@ -89,3 +89,51 @@ def test_schemaloader_remote_path(schemafile): sl = SchemaLoader(schemafile) assert isinstance(sl.reader, HttpSchemaReader) assert sl.reader.url == schemafile + + +def test_schemaloader_local_yaml_dup_anchor(tmp_path): + f = tmp_path / "schema.yaml" + f.write_text( + """ +--- +"$schema": https://json-schema.org/draft/2020-12/schema +type: object +properties: + a: + type: object + properties: + b: &anchor + type: array + items: + type: integer + c: &anchor + type: string +""" + ) + sl = SchemaLoader(str(f)) + schema = sl.get_schema() + assert schema == { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "a": { + "type": "object", + "properties": { + "b": {"type": "array", "items": {"type": "integer"}}, + "c": {"type": "string"}, + }, + }, + }, + } + + +def test_schemaloader_invalid_yaml_data(tmp_path): + f = tmp_path / "foo.yaml" + f.write_text( + """\ +a: {b +""" + ) + sl = SchemaLoader(str(f)) + with pytest.raises(SchemaParseError): + sl.get_schema()