add Schema.infer method

This introduces the class method Schema.infer, to infer a Schema from concrete data. This will be useful for converting existing known-good data (e.g. API responses) into enforceable schemas.
alecthomas · Nov 7, 2017 · 786186f · 786186f
1 parent 95489bd
commit 786186f
Show file tree

Hide file tree

Showing 2 changed files with 115 additions and 0 deletions.
diff --git a/voluptuous/schema_builder.py b/voluptuous/schema_builder.py
@@ -197,6 +197,48 @@ def __init__(self, schema, required=False, extra=PREVENT_EXTRA):
         self.extra = int(extra)  # ensure the value is an integer
         self._compiled = self._compile(schema)
 
+    @classmethod
+    def infer(cls, data, **kwargs):
+        """Create a Schema from concrete data (e.g. an API response).
+
+        For example, this will take a dict like:
+
+        {
+            'foo': 1,
+            'bar': {
+                'a': True,
+                'b': False
+            },
+            'baz': ['purple', 'monkey', 'dishwasher']
+        }
+
+        And return a Schema:
+
+        {
+            'foo': int,
+            'bar': {
+                'a': bool,
+                'b': bool
+            },
+            'baz': [str]
+        }
+        """
+        def value_to_schema_type(value):
+            if isinstance(value, dict):
+                if len(value) == 0:
+                    return dict
+                return {k: value_to_schema_type(v)
+                        for k, v in iteritems(value)}
+            if isinstance(value, list):
+                if len(value) == 0:
+                    return list
+                else:
+                    return [value_to_schema_type(v)
+                            for v in value]
+            return type(value)
+
+        return cls(value_to_schema_type(data), **kwargs)
+
     def __eq__(self, other):
         if str(other) == str(self.schema):
             # Because repr is combination mixture of object and schema

diff --git a/voluptuous/tests/tests.py b/voluptuous/tests/tests.py
@@ -828,6 +828,79 @@ def test_marker_hashable():
     assert_equal(definition.get('j'), None)
 
 
+def test_schema_infer():
+    schema = Schema.infer({
+        'str': 'foo',
+        'bool': True,
+        'int': 42,
+        'float': 3.14
+    })
+    assert_equal(schema, Schema({
+        Required('str'): str,
+        Required('bool'): bool,
+        Required('int'): int,
+        Required('float'): float
+    }))
+
+
+def test_schema_infer_dict():
+    schema = Schema.infer({
+        'a': {
+            'b': {
+                'c': 'foo'
+            }
+        }
+    })
+
+    assert_equal(schema, Schema({
+        Required('a'): {
+            Required('b'): {
+                Required('c'): str
+            }
+        }
+    }))
+
+
+def test_schema_infer_list():
+    schema = Schema.infer({
+        'list': ['foo', True, 42, 3.14]
+    })
+
+    assert_equal(schema, Schema({
+        Required('list'): [str, bool, int, float]
+    }))
+
+
+def test_schema_infer_scalar():
+    assert_equal(Schema.infer('foo'), Schema(str))
+    assert_equal(Schema.infer(True), Schema(bool))
+    assert_equal(Schema.infer(42), Schema(int))
+    assert_equal(Schema.infer(3.14), Schema(float))
+    assert_equal(Schema.infer({}), Schema(dict))
+    assert_equal(Schema.infer([]), Schema(list))
+
+
+def test_schema_infer_accepts_kwargs():
+    schema = Schema.infer({
+        'str': 'foo',
+        'bool': True
+    }, required=False, extra=True)
+
+    # Subset of schema should be acceptable thanks to required=False.
+    schema({'bool': False})
+
+    # Keys that are in schema should still match required types.
+    try:
+        schema({'str': 42})
+    except Invalid:
+        pass
+    else:
+        assert False, 'Did not raise Invalid for Number'
+
+    # Extra fields should be acceptable thanks to extra=True.
+    schema({'str': 'bar', 'int': 42})
+
+
 def test_validation_performance():
     """
     This test comes to make sure the validation complexity of dictionaries is done in a linear time.