From b75532db2ace934686ac77287227f649ef277b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Thu, 25 Apr 2024 21:42:04 -0600 Subject: [PATCH] feat: JSON schema keyword `allOf` is now supported (#2389) * feat: Support `allOf` in schemas * Add some tests --- singer_sdk/_singerlib/schema.py | 9 +- singer_sdk/helpers/_typing.py | 13 +++ singer_sdk/typing.py | 59 +++++++++- tests/_singerlib/test_schema.py | 18 +++ tests/core/test_jsonschema_helpers.py | 156 ++++++++++++++++++++++++++ 5 files changed, 253 insertions(+), 2 deletions(-) diff --git a/singer_sdk/_singerlib/schema.py b/singer_sdk/_singerlib/schema.py index d12d04561..2cdc04f29 100644 --- a/singer_sdk/_singerlib/schema.py +++ b/singer_sdk/_singerlib/schema.py @@ -36,6 +36,7 @@ "additionalProperties", "anyOf", "patternProperties", + "allOf", ] @@ -64,6 +65,7 @@ class Schema: maxLength: int | None = None # noqa: N815 minLength: int | None = None # noqa: N815 anyOf: t.Any | None = None # noqa: N815 + allOf: t.Any | None = None # noqa: N815 format: str | None = None additionalProperties: t.Any | None = None # noqa: N815 patternProperties: t.Any | None = None # noqa: N815 @@ -131,6 +133,7 @@ class _SchemaKey: properties = "properties" pattern_properties = "patternProperties" any_of = "anyOf" + all_of = "allOf" def resolve_schema_references( @@ -163,7 +166,7 @@ def resolve_schema_references( return _resolve_schema_references(schema, resolver) -def _resolve_schema_references( +def _resolve_schema_references( # noqa: C901 schema: dict[str, t.Any], resolver: Resolver, ) -> dict[str, t.Any]: @@ -194,4 +197,8 @@ def _resolve_schema_references( for i, element in enumerate(schema[_SchemaKey.any_of]): schema[_SchemaKey.any_of][i] = _resolve_schema_references(element, resolver) + if _SchemaKey.all_of in schema: + for i, element in enumerate(schema[_SchemaKey.all_of]): + schema[_SchemaKey.all_of][i] = _resolve_schema_references(element, resolver) + return schema diff --git a/singer_sdk/helpers/_typing.py b/singer_sdk/helpers/_typing.py index 74ed59cf5..a4d4f22d9 100644 --- a/singer_sdk/helpers/_typing.py +++ b/singer_sdk/helpers/_typing.py @@ -145,6 +145,8 @@ def is_datetime_type(type_dict: dict) -> bool: raise EmptySchemaTypeError if "anyOf" in type_dict: return any(is_datetime_type(type_dict) for type_dict in type_dict["anyOf"]) + if "allOf" in type_dict: + return all(is_datetime_type(type_dict) for type_dict in type_dict["allOf"]) if "type" in type_dict: return type_dict.get("format") == "date-time" msg = f"Could not detect type of replication key using schema '{type_dict}'" @@ -168,6 +170,11 @@ def is_date_or_datetime_type(type_dict: dict) -> bool: if "anyOf" in type_dict: return any(is_date_or_datetime_type(option) for option in type_dict["anyOf"]) + if "allOf" in type_dict: + return all( + is_date_or_datetime_type(type_dict) for type_dict in type_dict["allOf"] + ) + if "type" in type_dict: return type_dict.get("format") in {"date", "date-time"} @@ -233,6 +240,9 @@ def is_string_array_type(type_dict: dict) -> bool: if "anyOf" in type_dict: return any(is_string_array_type(t) for t in type_dict["anyOf"]) + if "allOf" in type_dict: + return all(is_string_array_type(t) for t in type_dict["allOf"]) + if "type" not in type_dict: msg = f"Could not detect type from schema '{type_dict}'" raise ValueError(msg) @@ -248,6 +258,9 @@ def is_array_type(type_dict: dict) -> bool: if "anyOf" in type_dict: return any(is_array_type(t) for t in type_dict["anyOf"]) + if "allOf" in type_dict: + return all(is_array_type(t) for t in type_dict["allOf"]) + if "type" not in type_dict: msg = f"Could not detect type from schema '{type_dict}'" raise ValueError(msg) diff --git a/singer_sdk/typing.py b/singer_sdk/typing.py index d4cc0011f..ca4d917df 100644 --- a/singer_sdk/typing.py +++ b/singer_sdk/typing.py @@ -834,7 +834,7 @@ def type_dict(self) -> dict: # type: ignore[override] return result -class OneOf(JSONPointerType): +class OneOf(JSONTypeHelper): """OneOf type. This type allows for a value to be one of a set of types. @@ -876,6 +876,63 @@ def type_dict(self) -> dict: # type: ignore[override] return {"oneOf": [t.type_dict for t in self.wrapped]} +class AllOf(JSONTypeHelper): + """AllOf type. + + This type requires a value to match all of the given types. + + Examples: + >>> t = AllOf( + ... ObjectType(Property("first_type", StringType)), + ... ObjectType(Property("second_type", IntegerType)), + ... ) + >>> print(t.to_json(indent=2)) + { + "allOf": [ + { + "type": "object", + "properties": { + "first_type": { + "type": [ + "string", + "null" + ] + } + } + }, + { + "type": "object", + "properties": { + "second_type": { + "type": [ + "integer", + "null" + ] + } + } + } + ] + } + """ + + def __init__(self, *types: W | type[W]) -> None: + """Initialize OneOf type. + + Args: + types: Types to choose from. + """ + self.wrapped = types + + @property + def type_dict(self) -> dict: # type: ignore[override] + """Get type dictionary. + + Returns: + A dictionary describing the type. + """ + return {"allOf": [t.type_dict for t in self.wrapped]} + + class Constant(JSONTypeHelper): """A constant property. diff --git a/tests/_singerlib/test_schema.py b/tests/_singerlib/test_schema.py index 4fa72c5a8..2f0023347 100644 --- a/tests/_singerlib/test_schema.py +++ b/tests/_singerlib/test_schema.py @@ -245,6 +245,24 @@ def test_schema_from_dict(pydict, expected): {"anyOf": [{"type": "string"}, {"type": "integer"}]}, id="resolve_schema_any_of", ), + pytest.param( + { + "allOf": [ + {"$ref": "references.json#/definitions/first_type"}, + {"$ref": "references.json#/definitions/second_type"}, + ], + }, + { + "references.json": { + "definitions": { + "first_type": {"type": "string"}, + "second_type": {"type": "integer"}, + }, + }, + }, + {"allOf": [{"type": "string"}, {"type": "integer"}]}, + id="resolve_schema_all_of", + ), ], ) def test_resolve_schema_references(schema, refs, expected): diff --git a/tests/core/test_jsonschema_helpers.py b/tests/core/test_jsonschema_helpers.py index 8fb5e90a8..2c13f93ee 100644 --- a/tests/core/test_jsonschema_helpers.py +++ b/tests/core/test_jsonschema_helpers.py @@ -27,6 +27,7 @@ ) from singer_sdk.tap_base import Tap from singer_sdk.typing import ( + AllOf, AnyType, ArrayType, BooleanType, @@ -103,6 +104,13 @@ def test_to_json(): StringType, description="A test property", ), + Property( + "test_property_3", + AllOf( + ObjectType(Property("test_property_4", StringType)), + ObjectType(Property("test_property_5", StringType)), + ), + ), additional_properties=False, ) assert schema.to_json(indent=4) == dedent( @@ -122,6 +130,32 @@ def test_to_json(): "null" ], "description": "A test property" + }, + "test_property_3": { + "allOf": [ + { + "type": "object", + "properties": { + "test_property_4": { + "type": [ + "string", + "null" + ] + } + } + }, + { + "type": "object", + "properties": { + "test_property_5": { + "type": [ + "string", + "null" + ] + } + } + } + ] } }, "required": [ @@ -926,3 +960,125 @@ def test_discriminated_union(): "client_id": "123", }, ) + + +def test_is_datetime_type(): + assert is_datetime_type({"type": "string", "format": "date-time"}) + assert not is_datetime_type({"type": "string"}) + + assert is_datetime_type({"anyOf": [{"type": "string", "format": "date-time"}]}) + assert not is_datetime_type({"anyOf": [{"type": "string"}]}) + + assert is_datetime_type({"allOf": [{"type": "string", "format": "date-time"}]}) + assert not is_datetime_type({"allOf": [{"type": "string"}]}) + + +def test_is_date_or_datetime_type(): + assert is_date_or_datetime_type({"type": "string", "format": "date"}) + assert is_date_or_datetime_type({"type": "string", "format": "date-time"}) + assert not is_date_or_datetime_type({"type": "string"}) + + assert is_date_or_datetime_type( + {"anyOf": [{"type": "string", "format": "date-time"}]}, + ) + assert is_date_or_datetime_type({"anyOf": [{"type": "string", "format": "date"}]}) + assert not is_date_or_datetime_type({"anyOf": [{"type": "string"}]}) + + assert is_date_or_datetime_type( + {"allOf": [{"type": "string", "format": "date-time"}]}, + ) + assert is_date_or_datetime_type({"allOf": [{"type": "string", "format": "date"}]}) + assert not is_date_or_datetime_type({"allOf": [{"type": "string"}]}) + + +def test_is_string_array_type(): + assert is_string_array_type( + { + "type": "array", + "items": {"type": "string"}, + }, + ) + assert not is_string_array_type( + { + "type": "array", + "items": {"type": "integer"}, + }, + ) + + assert is_string_array_type( + { + "anyOf": [ + {"type": "array", "items": {"type": "string"}}, + {"type": "null"}, + ], + }, + ) + assert not is_string_array_type( + { + "anyOf": [ + {"type": "array", "items": {"type": "integer"}}, + {"type": "null"}, + ], + }, + ) + + assert is_string_array_type( + { + "allOf": [ + {"type": "array", "items": {"type": "string"}}, + ], + }, + ) + assert not is_string_array_type( + { + "allOf": [ + {"type": "array", "items": {"type": "integer"}}, + ], + }, + ) + + +def test_is_array_type(): + assert is_array_type( + { + "type": "array", + "items": {"type": "string"}, + }, + ) + assert not is_array_type( + { + "type": "string", + }, + ) + + assert is_array_type( + { + "anyOf": [ + {"type": "array"}, + {"type": "null"}, + ], + }, + ) + assert not is_array_type( + { + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + }, + ) + + assert is_array_type( + { + "allOf": [ + {"type": "array"}, + ], + }, + ) + assert not is_array_type( + { + "allOf": [ + {"type": "string"}, + ], + }, + )