Skip to content

Commit

Permalink
Merge branch 'patch-1' from pull request #121
Browse files Browse the repository at this point in the history
Convert yaml parsing to use a "failover" approach, in which a primary
YAML instance is required, but if parsing fails, any number of
fallback parsers can be used. By setting the fallback to a parser with
`pure=True`, the patch from #121 to support duplicate anchors is
preserved.

Tests for both the InstanceLoader and SchemaLoader were included in
the original, and here are augmented with tests which ensure that
invalid YAML data produces the desired errors.
  • Loading branch information
sirosen committed Jul 4, 2022
2 parents 6ca999b + 69a8eca commit 8a03d5c
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 10 deletions.
4 changes: 3 additions & 1 deletion src/check_jsonschema/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ def __init__(
supported_formats: t.Sequence[str, ...] | None = None,
) -> None:
yaml_impl = yaml.construct_yaml_implementation()
failover_yaml_impl = yaml.construct_yaml_implementation(pure=True)
if modify_yaml_implementation:
modify_yaml_implementation(yaml_impl)
modify_yaml_implementation(failover_yaml_impl)
base_by_tag = {
"yaml": yaml.impl2loader(yaml_impl),
"yaml": yaml.impl2loader(yaml_impl, failover_yaml_impl),
**DEFAULT_LOAD_FUNC_BY_TAG,
}
if supported_formats is None:
Expand Down
32 changes: 25 additions & 7 deletions src/check_jsonschema/parsers/yaml.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from __future__ import annotations

import typing as t
import warnings

import ruamel.yaml


def construct_yaml_implementation() -> ruamel.yaml.YAML:
implementation = ruamel.yaml.YAML(typ="safe")
def construct_yaml_implementation(
typ: str = "safe", pure: bool = False
) -> ruamel.yaml.YAML:
implementation = ruamel.yaml.YAML(typ=typ, pure=pure)

# workaround global state
# see: https://sourceforge.net/p/ruamel-yaml/tickets/341/
Expand Down Expand Up @@ -43,12 +46,27 @@ def _normalize(data: t.Any) -> t.Any:
return data


def impl2loader(impl: ruamel.yaml.YAML) -> t.Callable[[t.BinaryIO], t.Any]:
_data_sentinel = object()


def impl2loader(
primary: ruamel.yaml.YAML, *fallbacks: ruamel.yaml.YAML
) -> t.Callable[[t.BinaryIO], t.Any]:
def load(stream: t.BinaryIO) -> t.Any:
data = impl.load(stream)
stream_bytes = stream.read()
lasterr: ruamel.yaml.YAMLError | None = None
data: t.Any = _data_sentinel
with warnings.catch_warnings():
warnings.simplefilter("ignore", ruamel.yaml.error.ReusedAnchorWarning)
for impl in [primary] + list(fallbacks):
try:
data = impl.load(stream_bytes)
except ruamel.yaml.YAMLError as e:
lasterr = e
else:
break
if data is _data_sentinel and lasterr is not None:
raise lasterr
return _normalize(data)

return load


load = impl2loader(construct_yaml_implementation())
1 change: 0 additions & 1 deletion tests/unit/test_gitlab_data_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def test_can_parse_ok_gitlab_yaml_with_transform():

with pytest.raises(Exception):
data = impl.load(rawdata)
print(data)

GITLAB_TRANSFORM.modify_yaml_implementation(impl)
data = impl.load(rawdata)
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_instance_loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

import pytest
import ruamel.yaml

from check_jsonschema.instance_loader import InstanceLoader
from check_jsonschema.parsers import BadFileTypeError
Expand Down Expand Up @@ -109,3 +110,31 @@ def test_instanceloader_optional_format_handling(
err = excinfo.value
# error message should be instructive
assert expect_error_message in str(err)


def test_instanceloader_yaml_dup_anchor(tmp_path):
f = tmp_path / "foo.yaml"
f.write_text(
"""\
a:
b: &anchor
- 1
- 2
c: &anchor d
"""
)
loader = InstanceLoader([str(f)])
data = list(loader.iter_files())
assert data == [(str(f), {"a": {"b": [1, 2], "c": "d"}})]


def test_instanceloader_invalid_yaml_data(tmp_path):
f = tmp_path / "foo.yaml"
f.write_text(
"""\
a: {b
"""
)
loader = InstanceLoader([str(f)])
with pytest.raises(ruamel.yaml.YAMLError):
list(loader.iter_files())
50 changes: 49 additions & 1 deletion tests/unit/test_schema_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from check_jsonschema.schema_loader import SchemaLoader
from check_jsonschema.schema_loader import SchemaLoader, SchemaParseError
from check_jsonschema.schema_loader.readers import HttpSchemaReader, LocalSchemaReader


Expand Down Expand Up @@ -89,3 +89,51 @@ def test_schemaloader_remote_path(schemafile):
sl = SchemaLoader(schemafile)
assert isinstance(sl.reader, HttpSchemaReader)
assert sl.reader.url == schemafile


def test_schemaloader_local_yaml_dup_anchor(tmp_path):
f = tmp_path / "schema.yaml"
f.write_text(
"""
---
"$schema": https://json-schema.org/draft/2020-12/schema
type: object
properties:
a:
type: object
properties:
b: &anchor
type: array
items:
type: integer
c: &anchor
type: string
"""
)
sl = SchemaLoader(str(f))
schema = sl.get_schema()
assert schema == {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"a": {
"type": "object",
"properties": {
"b": {"type": "array", "items": {"type": "integer"}},
"c": {"type": "string"},
},
},
},
}


def test_schemaloader_invalid_yaml_data(tmp_path):
f = tmp_path / "foo.yaml"
f.write_text(
"""\
a: {b
"""
)
sl = SchemaLoader(str(f))
with pytest.raises(SchemaParseError):
sl.get_schema()

0 comments on commit 8a03d5c

Please sign in to comment.