From 9d7a4276fbf392b4c2ca31701cb54f59e6450d5e Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 27 Jul 2022 15:59:23 -0700
Subject: [PATCH 01/18] Add key validation methods (the methods themselves
 still gotta be revised)

---
 sdv/metadata/single_table.py | 103 +++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 11 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index ca3bec9f6..a7fe4a670 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -250,6 +250,22 @@ def _validate_datatype(id):
         """Check whether id is a string or a tuple of strings."""
         return isinstance(id, str) or isinstance(id, tuple) and all(isinstance(i, str) for i in id)
 
+    def _validate_primary_key(self, id):
+        if not self._validate_datatype(id):
+            raise ValueError("'primary_key' must be a string or tuple of strings.")
+
+        invalid_ids = {}
+        if isinstance(id, str) and id not in self._columns:
+            invalid_ids = {id}
+        elif isinstance(id, tuple):
+            invalid_ids = {invalid_id for invalid_id in id if invalid_id in self._columns}
+
+        if invalid_ids:
+            raise ValueError(
+                f'Unknown primary key values {invalid_ids}.'
+                ' Keys should be columns that exist in the table.'
+            )
+
     def set_primary_key(self, id):
         """Set the metadata primary key.
 
@@ -257,8 +273,7 @@ def set_primary_key(self, id):
             id (str, tuple):
                 Name (or tuple of names) of the primary key column(s).
         """
-        if not self._validate_datatype(id):
-            raise ValueError("'primary_key' must be a string or tuple of strings.")
+        self._validate_primary_key(id)
 
         if self._metadata['primary_key'] is not None:
             warnings.warn(
@@ -268,6 +283,25 @@ def set_primary_key(self, id):
 
         self._metadata['primary_key'] = id
 
+    def _validate_alternate_keys(self, ids):
+        if not isinstance(ids, list) or not all(self._validate_datatype(id) for id in ids):
+            raise ValueError(
+                "'alternate_keys' must be a list of strings or a list of tuples of strings."
+            )
+        
+        invalid_ids = {}
+        for id in ids:
+            if isinstance(id, str) and id not in self._columns:
+                invalid_ids.add(id)
+            elif isinstance(id, tuple):
+                invalid_ids.update({invalid_id for invalid_id in id if invalid_id in self._columns})
+
+        if invalid_ids:
+            raise ValueError(
+                f'Unknown alternate key values {invalid_ids}.'
+                ' Keys should be columns that exist in the table.'
+            )
+
     def set_alternate_keys(self, ids):
         """Set the metadata alternate keys.
 
@@ -275,13 +309,25 @@ def set_alternate_keys(self, ids):
             ids (list[str], list[tuple]):
                 List of names (or tuple of names) of the alternate key columns.
         """
-        if not isinstance(ids, list) or not all(self._validate_datatype(id) for id in ids):
+        self._validate_alternate_keys(ids)
+        self._metadata['alternate_keys'] = ids
+
+    def _validate_sequence_key(self, id):
+        if not self._validate_datatype(id):
+            raise ValueError("'sequence_key' must be a string or tuple of strings.")
+
+        invalid_ids = {}
+        if isinstance(id, str) and id not in self._columns:
+            invalid_ids = {id}
+        elif isinstance(id, tuple):
+            invalid_ids = {invalid_id for invalid_id in id if invalid_id in self._columns}
+
+        if invalid_ids:
             raise ValueError(
-                "'alternate_keys' must be a list of strings or a list of tuples of strings."
+                f'Unknown sequence key values {invalid_ids}.'
+                ' Keys should be columns that exist in the table.'
             )
 
-        self._metadata['alternate_keys'] = ids
-
     def set_sequence_key(self, id):
         """Set the metadata sequence key.
 
@@ -289,8 +335,7 @@ def set_sequence_key(self, id):
             id (str, tuple):
                 Name (or tuple of names) of the sequence key column(s).
         """
-        if not self._validate_datatype(id):
-            raise ValueError("'sequence_key' must be a string or tuple of strings.")
+        self._validate_sequence_key(id)
 
         if self._metadata['sequence_key'] is not None:
             warnings.warn(
@@ -300,6 +345,16 @@ def set_sequence_key(self, id):
 
         self._metadata['sequence_key'] = id
 
+    def _validate_sequence_index(self, column_name):
+        if not isinstance(column_name, str):
+            raise ValueError("'sequence_index' must be a string.")
+
+        if column_name not in self._columns:
+            raise ValueError(
+                f'Unknown sequence key value {column_name}.'
+                ' Keys should be columns that exist in the table.'
+            )
+
     def set_sequence_index(self, column_name):
         """Set the metadata sequence index.
 
@@ -307,11 +362,37 @@ def set_sequence_index(self, column_name):
             column_name (str):
                 Name of the sequence index column.
         """
-        if not isinstance(column_name, str):
-            raise ValueError("'sequence_index' must be a string.")
-
+        self._validate_sequence_index(column_name)
         self._metadata['sequence_index'] = column_name
 
+    def _validate_sequence_key_not_sequence_index(self):
+        """Check that ``_sequence_index`` and ``_sequence_key`` don't overlap."""
+        if (
+            (isinstance(self._sequence_key, tuple) and self._sequence_index in self._sequence_key)
+            or self._sequence_index == self._sequence_key
+        ):
+            raise ValueError(
+                f'sequence_index and sequence_key have the same value {self._sequence_index}.'
+                ' These columns must be different.'
+            )
+
+    def validate(self):
+        """Validate the metadata.
+        
+        Raises:
+            - ``InvalidMetadataError`` if the metadata is invalid.
+        """
+        # Validate keys
+        self._validate_primary_key(self._primary_key)
+        self._validate_alternate_keys(self._alternate_keys)
+        self._validate_sequence_key(self._sequence_key)
+        self._validate_sequence_index(self._sequence_index)
+        self._validate_sequence_key_not_sequence_index()
+
+        # Validate constraints
+
+        # Validate columns
+
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
         metadata = {}

From b6da51431294714c5c31150b61ee6012c57663c4 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Thu, 28 Jul 2022 13:29:54 -0700
Subject: [PATCH 02/18] Combined errors implementation

---
 sdv/metadata/single_table.py             | 124 +++++++++++------------
 tests/unit/metadata/test_single_table.py |  16 ++-
 2 files changed, 75 insertions(+), 65 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index a7fe4a670..80a768c9a 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 import pandas as pd
+from sdv.constraints.errors import MultipleConstraintsErrors
 
 from sdv.constraints import Constraint
 from sdv.metadata.errors import MetadataError
@@ -107,6 +108,10 @@ def _validate_text(column_name, **kwargs):
     def __init__(self):
         self._columns = {}
         self._constraints = []
+        self._primary_key = None
+        self._alternate_keys = []
+        self._sequence_key = None
+        self._sequence_index = None
         self._version = self.SCHEMA_VERSION
         self._metadata = {
             'columns': self._columns,
@@ -250,22 +255,23 @@ def _validate_datatype(id):
         """Check whether id is a string or a tuple of strings."""
         return isinstance(id, str) or isinstance(id, tuple) and all(isinstance(i, str) for i in id)
 
-    def _validate_primary_key(self, id):
+    def _validate_key(self, id, key_type):
+        """Validate the primary and sequence keys."""
+        errors = []
         if not self._validate_datatype(id):
-            raise ValueError("'primary_key' must be a string or tuple of strings.")
-
-        invalid_ids = {}
-        if isinstance(id, str) and id not in self._columns:
-            invalid_ids = {id}
-        elif isinstance(id, tuple):
-            invalid_ids = {invalid_id for invalid_id in id if invalid_id in self._columns}
+            errors.append(f"'{key_type}_key' must be a string or tuple of strings.")
 
+        keys = {id} if isinstance(id, str) else set(id)
+        invalid_ids = keys - set(self._columns)
         if invalid_ids:
-            raise ValueError(
-                f'Unknown primary key values {invalid_ids}.'
+            errors.append(
+                f'Unknown {key_type} key values {invalid_ids}.'
                 ' Keys should be columns that exist in the table.'
             )
 
+        if errors:
+            raise MultipleConstraintsErrors(errors) 
+
     def set_primary_key(self, id):
         """Set the metadata primary key.
 
@@ -273,7 +279,7 @@ def set_primary_key(self, id):
             id (str, tuple):
                 Name (or tuple of names) of the primary key column(s).
         """
-        self._validate_primary_key(id)
+        self._validate_key(id, 'primary')
 
         if self._metadata['primary_key'] is not None:
             warnings.warn(
@@ -283,25 +289,44 @@ def set_primary_key(self, id):
 
         self._metadata['primary_key'] = id
 
+    def set_sequence_key(self, id):
+        """Set the metadata sequence key.
+
+        Args:
+            id (str, tuple):
+                Name (or tuple of names) of the sequence key column(s).
+        """
+        self._validate_key(id, 'sequence')
+
+        if self._metadata['sequence_key'] is not None:
+            warnings.warn(
+                f"There is an existing sequence key {self._metadata['sequence_key']}."
+                ' This key will be removed.'
+            )
+
+        self._metadata['sequence_key'] = id
+
     def _validate_alternate_keys(self, ids):
+        errors = []
         if not isinstance(ids, list) or not all(self._validate_datatype(id) for id in ids):
-            raise ValueError(
+            errors.append(
                 "'alternate_keys' must be a list of strings or a list of tuples of strings."
             )
-        
-        invalid_ids = {}
+
+        keys = set()
         for id in ids:
-            if isinstance(id, str) and id not in self._columns:
-                invalid_ids.add(id)
-            elif isinstance(id, tuple):
-                invalid_ids.update({invalid_id for invalid_id in id if invalid_id in self._columns})
+            keys.update({id} if isinstance(id, str) else set(id))
 
+        invalid_ids = keys - set(self._columns)
         if invalid_ids:
-            raise ValueError(
+            errors.append(
                 f'Unknown alternate key values {invalid_ids}.'
                 ' Keys should be columns that exist in the table.'
             )
 
+        if errors:
+            raise MultipleConstraintsErrors(errors)
+
     def set_alternate_keys(self, ids):
         """Set the metadata alternate keys.
 
@@ -312,49 +337,20 @@ def set_alternate_keys(self, ids):
         self._validate_alternate_keys(ids)
         self._metadata['alternate_keys'] = ids
 
-    def _validate_sequence_key(self, id):
-        if not self._validate_datatype(id):
-            raise ValueError("'sequence_key' must be a string or tuple of strings.")
-
-        invalid_ids = {}
-        if isinstance(id, str) and id not in self._columns:
-            invalid_ids = {id}
-        elif isinstance(id, tuple):
-            invalid_ids = {invalid_id for invalid_id in id if invalid_id in self._columns}
-
-        if invalid_ids:
-            raise ValueError(
-                f'Unknown sequence key values {invalid_ids}.'
-                ' Keys should be columns that exist in the table.'
-            )
-
-    def set_sequence_key(self, id):
-        """Set the metadata sequence key.
-
-        Args:
-            id (str, tuple):
-                Name (or tuple of names) of the sequence key column(s).
-        """
-        self._validate_sequence_key(id)
-
-        if self._metadata['sequence_key'] is not None:
-            warnings.warn(
-                f"There is an existing sequence key {self._metadata['sequence_key']}."
-                ' This key will be removed.'
-            )
-
-        self._metadata['sequence_key'] = id
-
     def _validate_sequence_index(self, column_name):
+        errors = []
         if not isinstance(column_name, str):
-            raise ValueError("'sequence_index' must be a string.")
+            errors.append("'sequence_index' must be a string.")
 
         if column_name not in self._columns:
-            raise ValueError(
+            errors.append(
                 f'Unknown sequence key value {column_name}.'
                 ' Keys should be columns that exist in the table.'
             )
 
+        if errors:
+            raise MultipleConstraintsErrors(errors)
+
     def set_sequence_index(self, column_name):
         """Set the metadata sequence index.
 
@@ -365,12 +361,11 @@ def set_sequence_index(self, column_name):
         self._validate_sequence_index(column_name)
         self._metadata['sequence_index'] = column_name
 
-    def _validate_sequence_key_not_sequence_index(self):
+    def _validate_sequence_index_not_in_sequence_key(self):
         """Check that ``_sequence_index`` and ``_sequence_key`` don't overlap."""
-        if (
-            (isinstance(self._sequence_key, tuple) and self._sequence_index in self._sequence_key)
-            or self._sequence_index == self._sequence_key
-        ):
+        sk = self._sequence_key
+        sequence_key = {sk} if isinstance(sk, str) else set(sk)
+        if self._sequence_index in sequence_key:
             raise ValueError(
                 f'sequence_index and sequence_key have the same value {self._sequence_index}.'
                 ' These columns must be different.'
@@ -383,15 +378,20 @@ def validate(self):
             - ``InvalidMetadataError`` if the metadata is invalid.
         """
         # Validate keys
-        self._validate_primary_key(self._primary_key)
+        self._validate_key(self._primary_key, 'primary')
         self._validate_alternate_keys(self._alternate_keys)
-        self._validate_sequence_key(self._sequence_key)
+        self._validate_key(self._sequence_key, 'sequence')
         self._validate_sequence_index(self._sequence_index)
-        self._validate_sequence_key_not_sequence_index()
+        self._validate_sequence_index_not_in_sequence_key()
 
         # Validate constraints
+        for constraint, args in self._constraints.items():
+            constraint._validate_metadata(self, **args)
 
         # Validate columns
+        for column, kwargs in self._columns:
+            sdtype = kwargs.get('sdtype')
+            self._validate_column(column, sdtype, **kwargs)
 
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 221bc4252..40588ddfb 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -12,6 +12,8 @@
 
 from sdv.metadata.errors import MetadataError
 from sdv.metadata.single_table import SingleTableMetadata
+from sdv.constraints.errors import MultipleConstraintsErrors
+
 
 
 class TestSingleTableMetadata:
@@ -863,6 +865,9 @@ def test__validate_dataype_invalid_tuple(self):
     def test_set_primary_key_validation(self):
         """Test that ``set_primary_key`` crashes for invalid arguments.
 
+        Setup:
+            - A ``SingleTableMetadata`` instance with ``_columns`` set.
+
         Input:
             - A tuple with non-string values.
 
@@ -871,11 +876,16 @@ def test_set_primary_key_validation(self):
         """
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'a', 'd'}
 
-        err_msg = "'primary_key' must be a string or tuple of strings."
+        err_msg = (
+            "'primary_key' must be a string or tuple of strings."
+            "\n\nUnknown primary key values {'b', 'c'}."
+            ' Keys should be columns that exist in the table.'
+        )
         # Run / Assert
-        with pytest.raises(ValueError, match=err_msg):
-            instance.set_primary_key(('1', 2, '3'))
+        with pytest.raises(MultipleConstraintsErrors, match=err_msg):
+            instance.set_primary_key(('a', 'b', 'd', 'c'))
 
     def test_set_primary_key(self):
         """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value."""

From d0db252270249cc961322e69e71949643e7c5a61 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Fri, 29 Jul 2022 09:18:06 -0700
Subject: [PATCH 03/18] Finish constraints

---
 sdv/metadata/single_table.py             |  28 ++--
 tests/unit/metadata/test_single_table.py | 165 +++++++++++++++++++----
 2 files changed, 147 insertions(+), 46 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 80a768c9a..4496ff91f 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -257,21 +257,17 @@ def _validate_datatype(id):
 
     def _validate_key(self, id, key_type):
         """Validate the primary and sequence keys."""
-        errors = []
         if not self._validate_datatype(id):
-            errors.append(f"'{key_type}_key' must be a string or tuple of strings.")
+            raise ValueError(f"'{key_type}_key' must be a string or tuple of strings.")
 
         keys = {id} if isinstance(id, str) else set(id)
         invalid_ids = keys - set(self._columns)
         if invalid_ids:
-            errors.append(
+            raise ValueError(
                 f'Unknown {key_type} key values {invalid_ids}.'
                 ' Keys should be columns that exist in the table.'
             )
 
-        if errors:
-            raise MultipleConstraintsErrors(errors) 
-
     def set_primary_key(self, id):
         """Set the metadata primary key.
 
@@ -307,9 +303,8 @@ def set_sequence_key(self, id):
         self._metadata['sequence_key'] = id
 
     def _validate_alternate_keys(self, ids):
-        errors = []
         if not isinstance(ids, list) or not all(self._validate_datatype(id) for id in ids):
-            errors.append(
+            raise ValueError(
                 "'alternate_keys' must be a list of strings or a list of tuples of strings."
             )
 
@@ -319,14 +314,11 @@ def _validate_alternate_keys(self, ids):
 
         invalid_ids = keys - set(self._columns)
         if invalid_ids:
-            errors.append(
+            raise ValueError(
                 f'Unknown alternate key values {invalid_ids}.'
                 ' Keys should be columns that exist in the table.'
             )
 
-        if errors:
-            raise MultipleConstraintsErrors(errors)
-
     def set_alternate_keys(self, ids):
         """Set the metadata alternate keys.
 
@@ -338,19 +330,16 @@ def set_alternate_keys(self, ids):
         self._metadata['alternate_keys'] = ids
 
     def _validate_sequence_index(self, column_name):
-        errors = []
         if not isinstance(column_name, str):
-            errors.append("'sequence_index' must be a string.")
+            raise ValueError("'sequence_index' must be a string.")
 
         if column_name not in self._columns:
-            errors.append(
+            column_name = {column_name}
+            raise ValueError(
                 f'Unknown sequence key value {column_name}.'
                 ' Keys should be columns that exist in the table.'
             )
 
-        if errors:
-            raise MultipleConstraintsErrors(errors)
-
     def set_sequence_index(self, column_name):
         """Set the metadata sequence index.
 
@@ -366,8 +355,9 @@ def _validate_sequence_index_not_in_sequence_key(self):
         sk = self._sequence_key
         sequence_key = {sk} if isinstance(sk, str) else set(sk)
         if self._sequence_index in sequence_key:
+            index = {self._sequence_index}
             raise ValueError(
-                f'sequence_index and sequence_key have the same value {self._sequence_index}.'
+                f"'sequence_index' and 'sequence_key' have the same value {index}."
                 ' These columns must be different.'
             )
 
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 40588ddfb..4d86e0db0 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -12,8 +12,6 @@
 
 from sdv.metadata.errors import MetadataError
 from sdv.metadata.single_table import SingleTableMetadata
-from sdv.constraints.errors import MultipleConstraintsErrors
-
 
 
 class TestSingleTableMetadata:
@@ -862,14 +860,33 @@ def test__validate_dataype_invalid_tuple(self):
         # Assert
         assert out is False
 
-    def test_set_primary_key_validation(self):
+    def test_set_primary_key_validation_dtype(self):
+        """Test that ``set_primary_key`` crashes for invalid arguments.
+
+        Input:
+            - A tuple with non-string values.
+
+        Side Effect:
+            - A ``ValueError`` should be raised.
+        """
+        # Setup
+        instance = SingleTableMetadata()
+
+        err_msg = (
+            "'primary_key' must be a string or tuple of strings."
+        )
+        # Run / Assert
+        with pytest.raises(ValueError, match=err_msg):
+            instance.set_primary_key(('1', 2, '3'))
+
+    def test_set_primary_key_validation_columns(self):
         """Test that ``set_primary_key`` crashes for invalid arguments.
 
         Setup:
             - A ``SingleTableMetadata`` instance with ``_columns`` set.
 
         Input:
-            - A tuple with non-string values.
+            - A tuple with columns not present in ``_columns``.
 
         Side Effect:
             - A ``ValueError`` should be raised.
@@ -879,18 +896,18 @@ def test_set_primary_key_validation(self):
         instance._columns = {'a', 'd'}
 
         err_msg = (
-            "'primary_key' must be a string or tuple of strings."
-            "\n\nUnknown primary key values {'b', 'c'}."
+            "Unknown primary key values {'b', 'c'}."
             ' Keys should be columns that exist in the table.'
         )
         # Run / Assert
-        with pytest.raises(MultipleConstraintsErrors, match=err_msg):
+        with pytest.raises(ValueError, match=err_msg):
             instance.set_primary_key(('a', 'b', 'd', 'c'))
 
     def test_set_primary_key(self):
         """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value."""
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column'}
 
         # Run
         instance.set_primary_key('column')
@@ -902,6 +919,7 @@ def test_set_primary_key_tuple(self):
         """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value for tuples."""
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column1', 'column2'}
 
         # Run
         instance.set_primary_key(('column1', 'column2'))
@@ -924,6 +942,7 @@ def test_set_primary_key_warning(self, warning_mock):
         """
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column1'}
         instance._metadata['primary_key'] = 'column0'
 
         # Run
@@ -934,11 +953,11 @@ def test_set_primary_key_warning(self, warning_mock):
         assert warning_mock.warn.called_once_with(warning_msg)
         assert instance._metadata['primary_key'] == 'column1'
 
-    def test_set_alternate_keys_validation(self):
-        """Test that ``set_alternate_keys`` crashes for invalid arguments.
+    def test_set_sequence_key_validation_dtype(self):
+        """Test that ``set_sequence_key`` crashes for invalid arguments.
 
         Input:
-            - A list with tuples with non-string values.
+            - A tuple with non-string values.
 
         Side Effect:
             - A ``ValueError`` should be raised.
@@ -946,43 +965,40 @@ def test_set_alternate_keys_validation(self):
         # Setup
         instance = SingleTableMetadata()
 
-        err_msg = "'alternate_keys' must be a list of strings or a list of tuples of strings."
+        err_msg = "'sequence_key' must be a string or tuple of strings."
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
-            instance.set_alternate_keys(['col1', ('1', 2, '3'), 'col3'])
-
-    def test_set_alternate_keys(self):
-        """Test that ``set_alternate_keys`` sets the ``_metadata['alternate_keys']`` value."""
-        # Setup
-        instance = SingleTableMetadata()
-
-        # Run
-        instance.set_alternate_keys(['column1', ('column2', 'column3')])
-
-        # Assert
-        assert instance._metadata['alternate_keys'] == ['column1', ('column2', 'column3')]
+            instance.set_sequence_key(('1', 2, '3'))
 
-    def test_set_sequence_key_validation(self):
+    def test_set_sequence_key_validation_columns(self):
         """Test that ``set_sequence_key`` crashes for invalid arguments.
 
+        Setup:
+            - A ``SingleTableMetadata`` instance with ``_columns`` set.
+
         Input:
-            - A tuple with non-string values.
+            - A tuple with columns not present in ``_columns``.
 
         Side Effect:
             - A ``ValueError`` should be raised.
         """
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'a', 'd'}
 
-        err_msg = "'sequence_key' must be a string or tuple of strings."
+        err_msg = (
+            "Unknown sequence key values {'b', 'c'}."
+            ' Keys should be columns that exist in the table.'
+        )
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
-            instance.set_sequence_key(('1', 2, '3'))
+            instance.set_sequence_key(('a', 'b', 'd', 'c'))
 
     def test_set_sequence_key(self):
         """Test that ``set_sequence_key`` sets the ``_metadata['sequence_key']`` value."""
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column'}
 
         # Run
         instance.set_sequence_key('column')
@@ -994,6 +1010,7 @@ def test_set_sequence_key_tuple(self):
         """Test that ``set_sequence_key`` sets ``_metadata['sequence_key']`` for tuples."""
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column1', 'column2'}
 
         # Run
         instance.set_sequence_key(('column1', 'column2'))
@@ -1016,6 +1033,7 @@ def test_set_sequence_key_warning(self, warning_mock):
         """
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column1'}
         instance._metadata['sequence_key'] = 'column0'
 
         # Run
@@ -1026,6 +1044,59 @@ def test_set_sequence_key_warning(self, warning_mock):
         assert warning_mock.warn.called_once_with(warning_msg)
         assert instance._metadata['sequence_key'] == 'column1'
 
+    def test_set_alternate_keys_validation_dtype(self):
+        """Test that ``set_alternate_keys`` crashes for invalid arguments.
+
+        Input:
+            - A list with tuples with non-string values.
+
+        Side Effect:
+            - A ``ValueError`` should be raised.
+        """
+        # Setup
+        instance = SingleTableMetadata()
+
+        err_msg = "'alternate_keys' must be a list of strings or a list of tuples of strings."
+        # Run / Assert
+        with pytest.raises(ValueError, match=err_msg):
+            instance.set_alternate_keys(['col1', ('1', 2, '3'), 'col3'])
+
+    def test_set_alternate_keys_validation_columns(self):
+        """Test that ``set_alternate_keys`` crashes for invalid arguments.
+
+        Setup:
+            - A ``SingleTableMetadata`` instance with ``_columns`` set.
+
+        Input:
+            - A tuple with non-string values.
+
+        Side Effect:
+            - A ``ValueError`` should be raised.
+        """
+        # Setup
+        instance = SingleTableMetadata()
+        instance._columns = {'abc', '213', '312'}
+
+        err_msg = (
+            "Unknown alternate key values {'bca', '123'}."
+            ' Keys should be columns that exist in the table.'
+        )
+        # Run / Assert
+        with pytest.raises(ValueError, match=err_msg):
+            instance.set_alternate_keys(['abc', ('123', '213', '312'), 'bca'])
+
+    def test_set_alternate_keys(self):
+        """Test that ``set_alternate_keys`` sets the ``_metadata['alternate_keys']`` value."""
+        # Setup
+        instance = SingleTableMetadata()
+        instance._columns = {'column1', 'column2', 'column3'}
+
+        # Run
+        instance.set_alternate_keys(['column1', ('column2', 'column3')])
+
+        # Assert
+        assert instance._metadata['alternate_keys'] == ['column1', ('column2', 'column3')]
+
     def test_set_sequence_index_validation(self):
         """Test that ``set_sequence_index`` crashes for invalid arguments.
 
@@ -1043,16 +1114,56 @@ def test_set_sequence_index_validation(self):
         with pytest.raises(ValueError, match=err_msg):
             instance.set_sequence_index(('column1', 'column2'))
 
+    def test_set_sequence_index_validation_columns(self):
+        """Test that ``set_sequence_index`` crashes for invalid arguments.
+
+        Setup:
+            - A ``SingleTableMetadata`` instance with ``_columns`` set.
+
+        Input:
+            - A string not present in ``_columns``.
+
+        Side Effect:
+            - A ``ValueError`` should be raised.
+        """
+        # Setup
+        instance = SingleTableMetadata()
+        instance._columns = {'a', 'd'}
+
+        err_msg = (
+            "Unknown sequence key value {'column'}."
+            ' Keys should be columns that exist in the table.'
+        )
+        # Run / Assert
+        with pytest.raises(ValueError, match=err_msg):
+            instance.set_sequence_index('column')
+
     def test_set_sequence_index(self):
         """Test that ``set_sequence_index`` sets the ``_metadata['sequence_index']`` value."""
         # Setup
         instance = SingleTableMetadata()
+        instance._columns = {'column'}
 
         # Run
         instance.set_sequence_index('column')
 
         # Assert
         assert instance._metadata['sequence_index'] == 'column'
+    
+    def test_validate_sequence_index_not_in_sequence_key(self):
+        """Test the ``_validate_sequence_index_not_in_sequence_key`` method."""
+        # Setup
+        instance = SingleTableMetadata()
+        instance._sequence_key = ('abc', 'def')
+        instance._sequence_index = 'abc'
+
+        err_msg = (
+            "'sequence_index' and 'sequence_key' have the same value {'abc'}."
+            ' These columns must be different.'
+        )
+        # Run / Assert
+        with pytest.raises(ValueError, match=err_msg):
+            instance._validate_sequence_index_not_in_sequence_key()
 
     def test_to_dict(self):
         """Test the ``to_dict`` method from ``SingleTableMetadata``.

From 3d9a5f7c576f8ee50573f3eda9c1df3b928cd93d Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Mon, 1 Aug 2022 19:05:04 -0700
Subject: [PATCH 04/18] Add silly constraint solution

---
 sdv/metadata/single_table.py             | 16 +++++++++++-----
 tests/unit/metadata/test_single_table.py | 21 +++++++++++++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 4496ff91f..81211abc3 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -8,11 +8,12 @@
 from pathlib import Path
 
 import pandas as pd
-from sdv.constraints.errors import MultipleConstraintsErrors
 
 from sdv.constraints import Constraint
 from sdv.metadata.errors import MetadataError
 
+import inspect
+
 
 class SingleTableMetadata:
     """Single Table Metadata class."""
@@ -354,7 +355,7 @@ def _validate_sequence_index_not_in_sequence_key(self):
         """Check that ``_sequence_index`` and ``_sequence_key`` don't overlap."""
         sk = self._sequence_key
         sequence_key = {sk} if isinstance(sk, str) else set(sk)
-        if self._sequence_index in sequence_key:
+        if self._sequence_index in sequence_key or sk is None:
             index = {self._sequence_index}
             raise ValueError(
                 f"'sequence_index' and 'sequence_key' have the same value {index}."
@@ -363,7 +364,7 @@ def _validate_sequence_index_not_in_sequence_key(self):
 
     def validate(self):
         """Validate the metadata.
-        
+
         Raises:
             - ``InvalidMetadataError`` if the metadata is invalid.
         """
@@ -375,8 +376,13 @@ def validate(self):
         self._validate_sequence_index_not_in_sequence_key()
 
         # Validate constraints
-        for constraint, args in self._constraints.items():
-            constraint._validate_metadata(self, **args)
+        for constraint in self._constraints:
+            params = inspect.signature(constraint.__init__).parameters.keys()
+            params = ['_' + arg for arg in params]
+            kwargs = {arg[1:]: constraint.arg for arg in params}
+            constraint._validate_metadata(self, **kwargs)
+
+        assert 1 == 2
 
         # Validate columns
         for column, kwargs in self._columns:
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 4d86e0db0..63dac1e82 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -1164,6 +1164,27 @@ def test_validate_sequence_index_not_in_sequence_key(self):
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
             instance._validate_sequence_index_not_in_sequence_key()
+    
+    def test_validate(self):
+        """Test ``validate``."""
+        # Setup
+        instance = SingleTableMetadata()
+        instance._columns = {'col1': {'sdtype': 'numerical'}, 'col2': {'sdtype': 'numerical'}}
+        instance._constraints = []
+        instance._primary_key = 'col1'
+        instance._alternate_keys = ['col2']
+        instance._sequence_key = 'col1'
+        instance._sequence_index = 'col2'
+
+        instance.add_constraint(
+            constraint_name='Inequality',
+            low_column_name='col1',
+            high_column_name='col2'
+        )
+
+        # Run
+        instance.validate()
+
 
     def test_to_dict(self):
         """Test the ``to_dict`` method from ``SingleTableMetadata``.

From 46117007c31a11cd0bb8da88f0d2481e957f7e03 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 10:50:08 -0700
Subject: [PATCH 05/18] Add try/catches for validation + add unit and
 integration tests

---
 sdv/metadata/errors.py                        |  4 ++
 sdv/metadata/single_table.py                  | 70 +++++++++++-------
 .../integration/metadata/test_single_table.py | 72 +++++++++++++++++++
 tests/unit/metadata/test_single_table.py      | 51 ++++++++-----
 4 files changed, 154 insertions(+), 43 deletions(-)

diff --git a/sdv/metadata/errors.py b/sdv/metadata/errors.py
index 1ba8f3bd6..ceb7e1aa8 100644
--- a/sdv/metadata/errors.py
+++ b/sdv/metadata/errors.py
@@ -5,5 +5,9 @@ class MetadataError(Exception):
     """Error to raise when Metadata is not valid."""
 
 
+class InvalidMetadataError(Exception):
+    """Error to raise when Metadata is not valid."""
+
+
 class MetadataNotFittedError(MetadataError):
     """Error to raise when Metadata is used before fitting."""
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 81211abc3..325d20af3 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -10,9 +10,7 @@
 import pandas as pd
 
 from sdv.constraints import Constraint
-from sdv.metadata.errors import MetadataError
-
-import inspect
+from sdv.metadata.errors import InvalidMetadataError, MetadataError
 
 
 class SingleTableMetadata:
@@ -54,13 +52,16 @@ def _validate_numerical(self, column_name, **kwargs):
         representation = kwargs.get('representation')
         if representation and representation not in self._NUMERICAL_REPRESENTATIONS:
             raise ValueError(
-                f"Invalid value for 'representation' {representation} for column '{column_name}'.")
+                f"Invalid value for 'representation' '{representation}'"
+                f" for column '{column_name}'."
+            )
 
     @staticmethod
     def _validate_datetime(column_name, **kwargs):
         datetime_format = kwargs.get('datetime_format')
-        if datetime_format:
+        if datetime_format is not None:
             try:
+                # NOTE: I don't know if this ever crashes, it just returns the string as is
                 formated_date = datetime.now().strftime(datetime_format)
             except Exception as exception:
                 raise ValueError(
@@ -79,12 +80,12 @@ def _validate_datetime(column_name, **kwargs):
     def _validate_categorical(column_name, **kwargs):
         order = kwargs.get('order')
         order_by = kwargs.get('order_by')
-        if order and order_by:
+        if order is not None and order_by is not None:
             raise ValueError(
                 f"Categorical column '{column_name}' has both an 'order' and 'order_by' "
                 'attribute. Only 1 is allowed.'
             )
-        if order_by and order_by not in ('numerical_value', 'alphabetical'):
+        if order_by is not None and order_by not in ('numerical_value', 'alphabetical'):
             raise ValueError(
                 f"Unknown ordering method '{order_by}' provided for categorical column "
                 f"'{column_name}'. Ordering method must be 'numerical_value' or 'alphabetical'."
@@ -284,7 +285,7 @@ def set_primary_key(self, id):
                 ' This key will be removed.'
             )
 
-        self._metadata['primary_key'] = id
+        self._primary_key = id
 
     def set_sequence_key(self, id):
         """Set the metadata sequence key.
@@ -301,7 +302,7 @@ def set_sequence_key(self, id):
                 ' This key will be removed.'
             )
 
-        self._metadata['sequence_key'] = id
+        self._sequence_key = id
 
     def _validate_alternate_keys(self, ids):
         if not isinstance(ids, list) or not all(self._validate_datatype(id) for id in ids):
@@ -349,7 +350,7 @@ def set_sequence_index(self, column_name):
                 Name of the sequence index column.
         """
         self._validate_sequence_index(column_name)
-        self._metadata['sequence_index'] = column_name
+        self._sequence_index = column_name
 
     def _validate_sequence_index_not_in_sequence_key(self):
         """Check that ``_sequence_index`` and ``_sequence_key`` don't overlap."""
@@ -369,25 +370,44 @@ def validate(self):
             - ``InvalidMetadataError`` if the metadata is invalid.
         """
         # Validate keys
-        self._validate_key(self._primary_key, 'primary')
-        self._validate_alternate_keys(self._alternate_keys)
-        self._validate_key(self._sequence_key, 'sequence')
-        self._validate_sequence_index(self._sequence_index)
-        self._validate_sequence_index_not_in_sequence_key()
+        errors = []
+        try:
+            self._validate_key(self._primary_key, 'primary')
+        except ValueError as e:
+            errors.append(e)
 
-        # Validate constraints
-        for constraint in self._constraints:
-            params = inspect.signature(constraint.__init__).parameters.keys()
-            params = ['_' + arg for arg in params]
-            kwargs = {arg[1:]: constraint.arg for arg in params}
-            constraint._validate_metadata(self, **kwargs)
+        try:
+            self._validate_key(self._sequence_key, 'sequence')
+        except ValueError as e:
+            errors.append(e)
 
-        assert 1 == 2
+        try:
+            self._validate_alternate_keys(self._alternate_keys)
+        except ValueError as e:
+            errors.append(e)
+
+        try:
+            self._validate_sequence_index(self._sequence_index)
+        except ValueError as e:
+            errors.append(e)
+
+        try:
+            self._validate_sequence_index_not_in_sequence_key()
+        except ValueError as e:
+            errors.append(e)
 
         # Validate columns
-        for column, kwargs in self._columns:
-            sdtype = kwargs.get('sdtype')
-            self._validate_column(column, sdtype, **kwargs)
+        for column, kwargs in self._columns.items():
+            try:
+                self._validate_column(column, **kwargs)
+            except ValueError as e:
+                errors.append(e)
+
+        if errors:
+            raise InvalidMetadataError(
+                'The following errors were found in the metadata:\n\n'
+                + '\n'.join([str(e) for e in errors])
+            )
 
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index af015b2a1..1be7b6bd4 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -1,6 +1,11 @@
 """Integration tests for Single Table Metadata."""
 
+import re
+
+import pytest
+
 from sdv.metadata import SingleTableMetadata
+from sdv.metadata.errors import InvalidMetadataError
 
 
 def test_single_table_metadata():
@@ -28,3 +33,70 @@ def test_single_table_metadata():
         'constraints': [],
         'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
     }
+
+
+def test_validate():
+    """Test ``SingleTableMetadata.validate``.
+
+    Ensure the method doesn't crash for a valid metadata.
+    """
+    # Setup
+    instance = SingleTableMetadata()
+    instance.add_column('col1', sdtype='numerical')
+    instance.add_column('col2', sdtype='numerical')
+    instance.add_constraint(
+        constraint_name='Inequality',
+        low_column_name='col1',
+        high_column_name='col2'
+    )
+    instance.set_primary_key('col1')
+    instance.set_alternate_keys([('col1', 'col2')])
+    instance.set_sequence_index('col1')
+    instance.set_sequence_key('col2')
+
+    # Run
+    instance.validate()
+
+
+def test_validate_errors():
+    """Test Test ``SingleTableMetadata.validate`` raises the correct errors."""
+    # Setup
+    instance = SingleTableMetadata()
+    instance._columns = {
+        'col1': {'sdtype': 'numerical'},
+        'col2': {'sdtype': 'numerical'},
+        'col4': {'sdtype': 'categorical', 'invalid1': 'value'},
+        'col5': {'sdtype': 'categorical', 'order': ''},
+        'col6': {'sdtype': 'categorical', 'order_by': ''},
+        'col7': {'sdtype': 'categorical', 'order': '', 'order_by': ''},
+        'col8': {'sdtype': 'numerical', 'representation': 'value'},
+        'col9': {'sdtype': 'datetime', 'datetime_format': '%1-%Y-%m-%d-%'},
+        'col10': {'sdtype': 'text', 'regex_format': '[A-{6}'},
+    }
+    instance._primary_key = 10
+    instance._alternate_keys = 'col1'
+    instance._sequence_key = ('col3', 'col1')
+    instance._sequence_index = 'col3'
+
+    err_msg = re.escape(
+        'The following errors were found in the metadata:'
+        "\n\n'primary_key' must be a string or tuple of strings."
+        "\nUnknown sequence key values {'col3'}. Keys should be columns that exist in the table."
+        "\n'alternate_keys' must be a list of strings or a list of tuples of strings."
+        "\nUnknown sequence key value {'col3'}. Keys should be columns that exist in the table."
+        "\n'sequence_index' and 'sequence_key' have the same value {'col3'}."
+        ' These columns must be different.'
+        "\nInvalid values '(invalid1)' for categorical column 'col4'."
+        "\nInvalid order value provided for categorical column 'col5'."
+        " The 'order' must be a list with 1 or more elements."
+        "\nUnknown ordering method '' provided for categorical column 'col6'."
+        " Ordering method must be 'numerical_value' or 'alphabetical'."
+        "\nCategorical column 'col7' has both an 'order' and 'order_by' attribute."
+        ' Only 1 is allowed.'
+        "\nInvalid value for 'representation' 'value' for column 'col8'."
+        "\nInvalid datetime format string '%1-%Y-%m-%d-%' for datetime column 'col9'."
+        "\nInvalid regex format string '[A-{6}' for text column 'col10'."
+    )
+    # Run / Assert
+    with pytest.raises(InvalidMetadataError, match=err_msg):
+        instance.validate()
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 63dac1e82..2d9e66c0d 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -89,7 +89,7 @@ def test__validate_numerical_default_and_invalid(self):
         # Run / Assert
         instance._validate_numerical('age')
 
-        error_msg = re.escape("Invalid value for 'representation' 36 for column 'age'.")
+        error_msg = re.escape("Invalid value for 'representation' '36' for column 'age'.")
         with pytest.raises(ValueError, match=error_msg):
             instance._validate_numerical('age', representation=36)
 
@@ -913,7 +913,7 @@ def test_set_primary_key(self):
         instance.set_primary_key('column')
 
         # Assert
-        assert instance._metadata['primary_key'] == 'column'
+        assert instance._primary_key == 'column'
 
     def test_set_primary_key_tuple(self):
         """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value for tuples."""
@@ -925,7 +925,7 @@ def test_set_primary_key_tuple(self):
         instance.set_primary_key(('column1', 'column2'))
 
         # Assert
-        assert instance._metadata['primary_key'] == ('column1', 'column2')
+        assert instance._primary_key == ('column1', 'column2')
 
     @patch('sdv.tabular.utils.warnings')
     def test_set_primary_key_warning(self, warning_mock):
@@ -943,7 +943,7 @@ def test_set_primary_key_warning(self, warning_mock):
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column1'}
-        instance._metadata['primary_key'] = 'column0'
+        instance._primary_key = 'column0'
 
         # Run
         instance.set_primary_key('column1')
@@ -951,7 +951,7 @@ def test_set_primary_key_warning(self, warning_mock):
         # Assert
         warning_msg = "There is an existing primary key 'column0'. This key will be removed."
         assert warning_mock.warn.called_once_with(warning_msg)
-        assert instance._metadata['primary_key'] == 'column1'
+        assert instance._primary_key == 'column1'
 
     def test_set_sequence_key_validation_dtype(self):
         """Test that ``set_sequence_key`` crashes for invalid arguments.
@@ -1004,7 +1004,7 @@ def test_set_sequence_key(self):
         instance.set_sequence_key('column')
 
         # Assert
-        assert instance._metadata['sequence_key'] == 'column'
+        assert instance._sequence_key == 'column'
 
     def test_set_sequence_key_tuple(self):
         """Test that ``set_sequence_key`` sets ``_metadata['sequence_key']`` for tuples."""
@@ -1016,7 +1016,7 @@ def test_set_sequence_key_tuple(self):
         instance.set_sequence_key(('column1', 'column2'))
 
         # Assert
-        assert instance._metadata['sequence_key'] == ('column1', 'column2')
+        assert instance._sequence_key == ('column1', 'column2')
 
     @patch('sdv.tabular.utils.warnings')
     def test_set_sequence_key_warning(self, warning_mock):
@@ -1042,7 +1042,7 @@ def test_set_sequence_key_warning(self, warning_mock):
         # Assert
         warning_msg = "There is an existing sequence key 'column0'. This key will be removed."
         assert warning_mock.warn.called_once_with(warning_msg)
-        assert instance._metadata['sequence_key'] == 'column1'
+        assert instance._sequence_key == 'column1'
 
     def test_set_alternate_keys_validation_dtype(self):
         """Test that ``set_alternate_keys`` crashes for invalid arguments.
@@ -1148,8 +1148,8 @@ def test_set_sequence_index(self):
         instance.set_sequence_index('column')
 
         # Assert
-        assert instance._metadata['sequence_index'] == 'column'
-    
+        assert instance._sequence_index == 'column'
+
     def test_validate_sequence_index_not_in_sequence_key(self):
         """Test the ``_validate_sequence_index_not_in_sequence_key`` method."""
         # Setup
@@ -1164,9 +1164,19 @@ def test_validate_sequence_index_not_in_sequence_key(self):
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
             instance._validate_sequence_index_not_in_sequence_key()
-    
+
     def test_validate(self):
-        """Test ``validate``."""
+        """Test the ``validate`` method.
+
+        Ensure the method calls the correct methods with the correct parameters.
+
+        Setup:
+            - A ``SingleTableMetadata`` instance with:
+                - ``_columns``, ``_constraints``, ``_primary_key``, ``_alternate_keys``,
+                  ``_sequence_key`` and ``_sequence_index`` defined.
+                - ``_validate_key``, ``_validate_alternate_keys``, ``_validate_sequence_index``
+                  and ``_validate_sequence_index_not_in_sequence_key`` mocked.
+        """
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'col1': {'sdtype': 'numerical'}, 'col2': {'sdtype': 'numerical'}}
@@ -1175,16 +1185,21 @@ def test_validate(self):
         instance._alternate_keys = ['col2']
         instance._sequence_key = 'col1'
         instance._sequence_index = 'col2'
-
-        instance.add_constraint(
-            constraint_name='Inequality',
-            low_column_name='col1',
-            high_column_name='col2'
-        )
+        instance._validate_key = Mock()
+        instance._validate_alternate_keys = Mock()
+        instance._validate_sequence_index = Mock()
+        instance._validate_sequence_index_not_in_sequence_key = Mock()
 
         # Run
         instance.validate()
 
+        # Assert
+        instance._validate_key.assert_has_calls(
+            [call(instance._primary_key, 'primary'), call(instance._sequence_key, 'sequence')]
+        )
+        instance._validate_alternate_keys.assert_called_once_with(instance._alternate_keys)
+        instance._validate_sequence_index.assert_called_once_with(instance._sequence_index)
+        instance._validate_sequence_index_not_in_sequence_key.assert_called_once()
 
     def test_to_dict(self):
         """Test the ``to_dict`` method from ``SingleTableMetadata``.

From 96a1c0edb8acf21ba91e44afd8f3964ac8fc82f8 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 12:42:48 -0700
Subject: [PATCH 06/18] Change MetadataError to InvalidMetadataError

---
 sdv/metadata/__init__.py                 |  4 ++--
 sdv/metadata/dataset.py                  | 10 +++++-----
 sdv/metadata/errors.py                   |  6 +-----
 sdv/metadata/single_table.py             |  4 ++--
 sdv/metadata/table.py                    |  4 ++--
 tests/unit/metadata/test_single_table.py |  6 +++---
 6 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/sdv/metadata/__init__.py b/sdv/metadata/__init__.py
index f6f0e34df..27a6ff0f2 100644
--- a/sdv/metadata/__init__.py
+++ b/sdv/metadata/__init__.py
@@ -2,14 +2,14 @@
 
 from sdv.metadata import visualization
 from sdv.metadata.dataset import Metadata
-from sdv.metadata.errors import MetadataError, MetadataNotFittedError
+from sdv.metadata.errors import InvalidMetadataError, MetadataNotFittedError
 from sdv.metadata.multi_table import MultiTableMetadata
 from sdv.metadata.single_table import SingleTableMetadata
 from sdv.metadata.table import Table
 
 __all__ = (
     'Metadata',
-    'MetadataError',
+    'InvalidMetadataError',
     'MetadataNotFittedError',
     'MultiTableMetadata',
     'SingleTableMetadata',
diff --git a/sdv/metadata/dataset.py b/sdv/metadata/dataset.py
index bd77b95a5..3e6bb4dbb 100644
--- a/sdv/metadata/dataset.py
+++ b/sdv/metadata/dataset.py
@@ -12,7 +12,7 @@
 
 from sdv.constraints import Constraint
 from sdv.metadata import visualization
-from sdv.metadata.errors import MetadataError
+from sdv.metadata.errors import InvalidMetadataError
 
 LOGGER = logging.getLogger(__name__)
 
@@ -551,7 +551,7 @@ def _validate_table(self, table_name, table_meta, table_data=None, errors=None):
                 on the metadata.
 
         Raises:
-            MetadataError:
+            InvalidMetadataError:
                 If there is any error in the metadata or the data does not
                 match the metadata description.
         """
@@ -635,7 +635,7 @@ def validate(self, tables=None):
         """
         tables_meta = self._metadata.get('tables')
         if not tables_meta:
-            raise MetadataError('"tables" entry not found in Metadata.')
+            raise InvalidMetadataError('"tables" entry not found in Metadata.')
 
         if tables and not isinstance(tables, dict):
             tables = self.load_tables()
@@ -654,7 +654,7 @@ def validate(self, tables=None):
             self._validate_circular_relationships(table_name, errors=errors)
 
         if errors:
-            raise MetadataError('Invalid Metadata specification:\n - ' + '\n - '.join(errors))
+            raise InvalidMetadataError('Invalid Metadata specification:\n - ' + '\n - '.join(errors))
 
     def _check_field(self, table, field, exists=False):
         """Validate the existance of the table and existance (or not) of field."""
@@ -832,7 +832,7 @@ def add_relationship(self, parent, child, foreign_key=None, validate=True):
         if validate:
             try:
                 self.validate()
-            except MetadataError:
+            except InvalidMetadataError:
                 self._metadata = metadata_backup
                 raise
 
diff --git a/sdv/metadata/errors.py b/sdv/metadata/errors.py
index ceb7e1aa8..08f62ca8a 100644
--- a/sdv/metadata/errors.py
+++ b/sdv/metadata/errors.py
@@ -1,13 +1,9 @@
 """Metadata Exceptions."""
 
 
-class MetadataError(Exception):
-    """Error to raise when Metadata is not valid."""
-
-
 class InvalidMetadataError(Exception):
     """Error to raise when Metadata is not valid."""
 
 
-class MetadataNotFittedError(MetadataError):
+class MetadataNotFittedError(InvalidMetadataError):
     """Error to raise when Metadata is used before fitting."""
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 325d20af3..f1d7cd963 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -10,7 +10,7 @@
 import pandas as pd
 
 from sdv.constraints import Constraint
-from sdv.metadata.errors import InvalidMetadataError, MetadataError
+from sdv.metadata.errors import InvalidMetadataError
 
 
 class SingleTableMetadata:
@@ -457,7 +457,7 @@ def add_constraint(self, constraint_name, **kwargs):
         try:
             constraint_class = Constraint._get_class_from_dict(constraint_name)
         except KeyError:
-            raise MetadataError(f"Invalid constraint ('{constraint_name}').")
+            raise InvalidMetadataError(f"Invalid constraint ('{constraint_name}').")
 
         constraint_class._validate_metadata(self, **kwargs)
         constraint = constraint_class(**kwargs)
diff --git a/sdv/metadata/table.py b/sdv/metadata/table.py
index 6ef4751d2..aa67d1156 100644
--- a/sdv/metadata/table.py
+++ b/sdv/metadata/table.py
@@ -13,7 +13,7 @@
 from sdv.constraints import Constraint
 from sdv.constraints.errors import (
     FunctionError, MissingConstraintColumnError, MultipleConstraintsErrors)
-from sdv.metadata.errors import MetadataError, MetadataNotFittedError
+from sdv.metadata.errors import InvalidMetadataError, MetadataNotFittedError
 from sdv.metadata.utils import strings_from_regex
 
 LOGGER = logging.getLogger(__name__)
@@ -306,7 +306,7 @@ def _get_field_dtype(self, field_name, field_metadata):
         field_subtype = field_metadata.get('subtype')
         dtype = self._TYPES_TO_DTYPES.get((field_type, field_subtype))
         if not dtype:
-            raise MetadataError(
+            raise InvalidMetadataError(
                 'Invalid type and subtype combination for field {}: ({}, {})'.format(
                     field_name, field_type, field_subtype)
             )
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 2d9e66c0d..59621e4b3 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import pytest
 
-from sdv.metadata.errors import MetadataError
+from sdv.metadata.errors import InvalidMetadataError
 from sdv.metadata.single_table import SingleTableMetadata
 
 
@@ -1549,12 +1549,12 @@ def test_add_constraint_bad_constraint(self):
             - Fakse constraint name
 
         Side effect:
-            - MetadataError should be raised
+            - InvalidMetadataError should be raised
         """
         # Setup
         metadata = SingleTableMetadata()
 
         # Run
         error_message = re.escape("Invalid constraint ('fake_constraint').")
-        with pytest.raises(MetadataError, match=error_message):
+        with pytest.raises(InvalidMetadataError, match=error_message):
             metadata.add_constraint(constraint_name='fake_constraint')

From a54f3570b199c822d86eddd27425fe6b5dc72e40 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 12:52:24 -0700
Subject: [PATCH 07/18] Simplify tests to one element + fix lint

---
 sdv/metadata/dataset.py                  |  3 ++-
 sdv/metadata/single_table.py             |  1 -
 tests/unit/metadata/test_single_table.py | 15 +++++++++------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/sdv/metadata/dataset.py b/sdv/metadata/dataset.py
index 3e6bb4dbb..b9cacc14a 100644
--- a/sdv/metadata/dataset.py
+++ b/sdv/metadata/dataset.py
@@ -654,7 +654,8 @@ def validate(self, tables=None):
             self._validate_circular_relationships(table_name, errors=errors)
 
         if errors:
-            raise InvalidMetadataError('Invalid Metadata specification:\n - ' + '\n - '.join(errors))
+            raise InvalidMetadataError(
+                'Invalid Metadata specification:\n - ' + '\n - '.join(errors))
 
     def _check_field(self, table, field, exists=False):
         """Validate the existance of the table and existance (or not) of field."""
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index f1d7cd963..f5bf4ea14 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -61,7 +61,6 @@ def _validate_datetime(column_name, **kwargs):
         datetime_format = kwargs.get('datetime_format')
         if datetime_format is not None:
             try:
-                # NOTE: I don't know if this ever crashes, it just returns the string as is
                 formated_date = datetime.now().strftime(datetime_format)
             except Exception as exception:
                 raise ValueError(
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 59621e4b3..800e0df2b 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -896,12 +896,13 @@ def test_set_primary_key_validation_columns(self):
         instance._columns = {'a', 'd'}
 
         err_msg = (
-            "Unknown primary key values {'b', 'c'}."
+            "Unknown primary key values {'b'}."
             ' Keys should be columns that exist in the table.'
         )
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
-            instance.set_primary_key(('a', 'b', 'd', 'c'))
+            instance.set_primary_key(('a', 'b', 'd'))
+            # NOTE: used to be ('a', 'b', 'd', 'c')
 
     def test_set_primary_key(self):
         """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value."""
@@ -987,12 +988,13 @@ def test_set_sequence_key_validation_columns(self):
         instance._columns = {'a', 'd'}
 
         err_msg = (
-            "Unknown sequence key values {'b', 'c'}."
+            "Unknown sequence key values {'b'}."
             ' Keys should be columns that exist in the table.'
         )
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
-            instance.set_sequence_key(('a', 'b', 'd', 'c'))
+            instance.set_sequence_key(('a', 'b', 'd'))
+            # NOTE: used to be ('a', 'b', 'd', 'c')
 
     def test_set_sequence_key(self):
         """Test that ``set_sequence_key`` sets the ``_metadata['sequence_key']`` value."""
@@ -1078,12 +1080,13 @@ def test_set_alternate_keys_validation_columns(self):
         instance._columns = {'abc', '213', '312'}
 
         err_msg = (
-            "Unknown alternate key values {'bca', '123'}."
+            "Unknown alternate key values {'123'}."
             ' Keys should be columns that exist in the table.'
         )
         # Run / Assert
         with pytest.raises(ValueError, match=err_msg):
-            instance.set_alternate_keys(['abc', ('123', '213', '312'), 'bca'])
+            instance.set_alternate_keys(['abc', ('123', '213', '312')])
+            # NOTE: used to be ['abc', ('123', '213', '312'), 'bca']
 
     def test_set_alternate_keys(self):
         """Test that ``set_alternate_keys`` sets the ``_metadata['alternate_keys']`` value."""

From 9097c5e7814970ff6e9cd13e12619fa63f2b9ea2 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 13:47:40 -0700
Subject: [PATCH 08/18] Remove _metadata

---
 sdv/metadata/single_table.py                  | 39 +++++-----
 .../integration/metadata/test_single_table.py | 13 +---
 tests/unit/metadata/test_single_table.py      | 76 ++++++++-----------
 3 files changed, 57 insertions(+), 71 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index f5bf4ea14..c8cce7c7e 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -114,15 +114,6 @@ def __init__(self):
         self._sequence_key = None
         self._sequence_index = None
         self._version = self.SCHEMA_VERSION
-        self._metadata = {
-            'columns': self._columns,
-            'primary_key': None,
-            'alternate_keys': [],
-            'sequence_key': None,
-            'sequence_index': None,
-            'constraints': self._constraints,
-            'SCHEMA_VERSION': self.SCHEMA_VERSION
-        }
 
     def _validate_unexpected_kwargs(self, column_name, sdtype, **kwargs):
         expected_kwargs = self._EXPECTED_KWARGS.get(sdtype, ['pii'])
@@ -278,9 +269,9 @@ def set_primary_key(self, id):
         """
         self._validate_key(id, 'primary')
 
-        if self._metadata['primary_key'] is not None:
+        if self._primary_key is not None:
             warnings.warn(
-                f"There is an existing primary key {self._metadata['primary_key']}."
+                f"There is an existing primary key {self._primary_key}."
                 ' This key will be removed.'
             )
 
@@ -295,9 +286,9 @@ def set_sequence_key(self, id):
         """
         self._validate_key(id, 'sequence')
 
-        if self._metadata['sequence_key'] is not None:
+        if self._sequence_key is not None:
             warnings.warn(
-                f"There is an existing sequence key {self._metadata['sequence_key']}."
+                f"There is an existing sequence key {self._sequence_key}."
                 ' This key will be removed.'
             )
 
@@ -328,7 +319,7 @@ def set_alternate_keys(self, ids):
                 List of names (or tuple of names) of the alternate key columns.
         """
         self._validate_alternate_keys(ids)
-        self._metadata['alternate_keys'] = ids
+        self._alternate_keys = ids
 
     def _validate_sequence_index(self, column_name):
         if not isinstance(column_name, str):
@@ -408,10 +399,21 @@ def validate(self):
                 + '\n'.join([str(e) for e in errors])
             )
 
+    def _metadata_dict(self):
+        return {
+            'columns': self._columns,
+            'constraints': self._constraints,
+            'primary_key': self._primary_key,
+            'alternate_keys': self._alternate_keys,
+            'sequence_key': self._sequence_key,
+            'sequence_index': self._sequence_index,
+            'SCHEMA_VERSION': self._version
+        }
+
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
         metadata = {}
-        for key, value in self._metadata.items():
+        for key, value in self._metadata_dict().items():
             if key == 'constraints' and value:
                 constraints = []
                 for constraint in value:
@@ -427,8 +429,8 @@ def to_dict(self):
 
         return deepcopy(metadata)
 
-    def _set_metadata_dict(self, metadata):
-        """Set the ``metadata`` dictionary to the current instance.
+    def _set_metadata_attributes(self, metadata):
+        """Set the metadata attributes to the current instance.
 
         Args:
             metadata (dict):
@@ -440,7 +442,6 @@ def _set_metadata_dict(self, metadata):
                 value = [Constraint.from_dict(constraint_dict) for constraint_dict in value]
 
             if value:
-                self._metadata[key] = value
                 setattr(self, f'_{key}', value)
 
     def add_constraint(self, constraint_name, **kwargs):
@@ -474,7 +475,7 @@ def _load_from_dict(cls, metadata):
             Instance of ``SingleTableMetadata``.
         """
         instance = cls()
-        instance._set_metadata_dict(metadata)
+        instance._set_metadata_attributes(metadata)
         return instance
 
     @classmethod
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index 1be7b6bd4..291cee176 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -24,15 +24,10 @@ def test_single_table_metadata():
     assert instance._columns == {}
     assert instance._constraints == []
     assert instance._version == 'SINGLE_TABLE_V1'
-    assert instance._metadata == {
-        'columns': {},
-        'primary_key': None,
-        'alternate_keys': [],
-        'sequence_key': None,
-        'sequence_index': None,
-        'constraints': [],
-        'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
-    }
+    assert instance._primary_key is None
+    assert instance._sequence_key is None
+    assert instance._alternate_keys == []
+    assert instance._sequence_index is None
 
 
 def test_validate():
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 800e0df2b..2b6df1126 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -55,17 +55,12 @@ def test___init__(self):
 
         # Assert
         assert instance._columns == {}
+        assert instance._primary_key is None
+        assert instance._sequence_key is None
+        assert instance._alternate_keys == []
+        assert instance._sequence_index is None
         assert instance._constraints == []
         assert instance._version == 'SINGLE_TABLE_V1'
-        assert instance._metadata == {
-            'columns': {},
-            'primary_key': None,
-            'alternate_keys': [],
-            'sequence_key': None,
-            'sequence_index': None,
-            'constraints': [],
-            'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
-        }
 
     def test__validate_numerical_default_and_invalid(self):
         """Test the ``_validate_numerical`` method.
@@ -905,7 +900,7 @@ def test_set_primary_key_validation_columns(self):
             # NOTE: used to be ('a', 'b', 'd', 'c')
 
     def test_set_primary_key(self):
-        """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value."""
+        """Test that ``set_primary_key`` sets the ``_primary_key`` value."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column'}
@@ -917,7 +912,7 @@ def test_set_primary_key(self):
         assert instance._primary_key == 'column'
 
     def test_set_primary_key_tuple(self):
-        """Test that ``set_primary_key`` sets the ``_metadata['primary_key']`` value for tuples."""
+        """Test that ``set_primary_key`` sets the ``_primary_key`` value for tuples."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column1', 'column2'}
@@ -933,7 +928,7 @@ def test_set_primary_key_warning(self, warning_mock):
         """Test that ``set_primary_key`` raises a warning when a primary key already exists.
 
         Setup:
-            - An instance of ``SingleTableMetadata`` with ``_metadata['primary_key']`` set.
+            - An instance of ``SingleTableMetadata`` with ``_primary_key`` set.
 
         Input:
             - String.
@@ -997,7 +992,7 @@ def test_set_sequence_key_validation_columns(self):
             # NOTE: used to be ('a', 'b', 'd', 'c')
 
     def test_set_sequence_key(self):
-        """Test that ``set_sequence_key`` sets the ``_metadata['sequence_key']`` value."""
+        """Test that ``set_sequence_key`` sets the ``_sequence_key`` value."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column'}
@@ -1009,7 +1004,7 @@ def test_set_sequence_key(self):
         assert instance._sequence_key == 'column'
 
     def test_set_sequence_key_tuple(self):
-        """Test that ``set_sequence_key`` sets ``_metadata['sequence_key']`` for tuples."""
+        """Test that ``set_sequence_key`` sets ``_sequence_key`` for tuples."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column1', 'column2'}
@@ -1025,7 +1020,7 @@ def test_set_sequence_key_warning(self, warning_mock):
         """Test that ``set_sequence_key`` raises a warning when a sequence key already exists.
 
         Setup:
-            - An instance of ``SingleTableMetadata`` with ``_metadata['sequence_key']`` set.
+            - An instance of ``SingleTableMetadata`` with ``_sequence_key`` set.
 
         Input:
             - String.
@@ -1036,7 +1031,7 @@ def test_set_sequence_key_warning(self, warning_mock):
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column1'}
-        instance._metadata['sequence_key'] = 'column0'
+        instance._sequence_key = 'column0'
 
         # Run
         instance.set_sequence_key('column1')
@@ -1089,7 +1084,7 @@ def test_set_alternate_keys_validation_columns(self):
             # NOTE: used to be ['abc', ('123', '213', '312'), 'bca']
 
     def test_set_alternate_keys(self):
-        """Test that ``set_alternate_keys`` sets the ``_metadata['alternate_keys']`` value."""
+        """Test that ``set_alternate_keys`` sets the ``_alternate_keys`` value."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column1', 'column2', 'column3'}
@@ -1098,7 +1093,7 @@ def test_set_alternate_keys(self):
         instance.set_alternate_keys(['column1', ('column2', 'column3')])
 
         # Assert
-        assert instance._metadata['alternate_keys'] == ['column1', ('column2', 'column3')]
+        assert instance._alternate_keys == ['column1', ('column2', 'column3')]
 
     def test_set_sequence_index_validation(self):
         """Test that ``set_sequence_index`` crashes for invalid arguments.
@@ -1142,7 +1137,7 @@ def test_set_sequence_index_validation_columns(self):
             instance.set_sequence_index('column')
 
     def test_set_sequence_index(self):
-        """Test that ``set_sequence_index`` sets the ``_metadata['sequence_index']`` value."""
+        """Test that ``set_sequence_index`` sets the ``_sequence_index`` value."""
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'column'}
@@ -1242,8 +1237,8 @@ def test_to_dict(self):
         result['columns']['my_column'] = 1
         assert instance._columns['my_column'] == 'value'
 
-    def test__set_metadata_dict(self):
-        """Test the ``_set_metadata_dict`` to a instance.
+    def test__set_metadata_attributes(self):
+        """Test the ``_set_metadata_attributes`` to a instance.
 
         Setup:
             - Instance of ``SingleTableMetadata``.
@@ -1260,20 +1255,16 @@ def test__set_metadata_dict(self):
         }
 
         # Run
-        instance._set_metadata_dict(metadata)
+        instance._set_metadata_attributes(metadata)
 
         # Assert
-        assert instance._metadata == {
-            'columns': {'my_column': 'value'},
-            'primary_key': None,
-            'alternate_keys': [],
-            'sequence_key': None,
-            'sequence_index': None,
-            'constraints': [],
-            'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
-        }
-
         assert instance._columns == {'my_column': 'value'}
+        assert instance._primary_key is None
+        assert instance._sequence_key is None
+        assert instance._alternate_keys == []
+        assert instance._sequence_index is None
+        assert instance._constraints == []
+        assert instance._version == 'SINGLE_TABLE_V1'
 
     def test__load_from_dict(self):
         """Test that ``_load_from_dict`` returns a instance with the ``dict`` updated objects."""
@@ -1292,9 +1283,13 @@ def test__load_from_dict(self):
         instance = SingleTableMetadata._load_from_dict(my_metadata)
 
         # Assert
-        assert instance._metadata == my_metadata
         assert instance._columns == {'my_column': 'value'}
+        assert instance._primary_key == 'pk'
+        assert instance._sequence_key is None
+        assert instance._alternate_keys == []
+        assert instance._sequence_index is None
         assert instance._constraints == []
+        assert instance._version == 'SINGLE_TABLE_V1'
 
     @patch('sdv.metadata.single_table.Path')
     def test_load_from_json_path_does_not_exist(self, mock_path):
@@ -1408,18 +1403,13 @@ def test_load_from_json(self, mock_json, mock_path, mock_open, mock_constraint):
         instance = SingleTableMetadata.load_from_json('filepath.json')
 
         # Assert
-        expected_metadata = {
-            'columns': {'animals': {'type': 'categorical'}},
-            'primary_key': 'animals',
-            'alternate_keys': [],
-            'sequence_key': None,
-            'sequence_index': None,
-            'constraints': [{'my_constraint': 'my_params'}],
-            'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
-        }
         assert instance._columns == {'animals': {'type': 'categorical'}}
+        assert instance._primary_key == 'animals'
+        assert instance._sequence_key is None
+        assert instance._alternate_keys == []
+        assert instance._sequence_index is None
         assert instance._constraints == [{'my_constraint': 'my_params'}]
-        assert instance._metadata == expected_metadata
+        assert instance._version == 'SINGLE_TABLE_V1'
         mock_constraint.from_dict.assert_called_once()
 
     @patch('sdv.metadata.single_table.Path')

From 544928f03ad8bbfd800b82814ec170c5d77a73e1 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 13:57:49 -0700
Subject: [PATCH 09/18] Change way _metadata implemented in to_dict

---
 sdv/metadata/single_table.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index c8cce7c7e..635f5bdd6 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -413,7 +413,8 @@ def _metadata_dict(self):
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
         metadata = {}
-        for key, value in self._metadata_dict().items():
+        for key in self._KEYS:
+            value = getattr(self, f'_{key}') if key != 'SCHEMA_VERSION' else self._version
             if key == 'constraints' and value:
                 constraints = []
                 for constraint in value:

From 73be37e60aac363450bbf4d676d8abc842066fe9 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 15:08:22 -0700
Subject: [PATCH 10/18] Add constraint validation + integration test + change
 _constraints to be list of tuple

---
 sdv/constraints/base.py                       |  3 +-
 sdv/metadata/single_table.py                  | 44 ++++++++++---------
 .../integration/metadata/test_single_table.py |  6 +++
 tests/unit/metadata/test_single_table.py      | 16 ++++---
 4 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/sdv/constraints/base.py b/sdv/constraints/base.py
index f16facc12..459bad4cc 100644
--- a/sdv/constraints/base.py
+++ b/sdv/constraints/base.py
@@ -126,7 +126,8 @@ def _validate_inputs(cls, **kwargs):
                 f'Invalid values {invalid_vals} are present in {article} {constraint} constraint.'
             ))
 
-        raise MultipleConstraintsErrors(errors)
+        if errors:
+            raise MultipleConstraintsErrors(errors)
 
     @classmethod
     def _validate_metadata_columns(cls, metadata, **kwargs):
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 635f5bdd6..2f90dac60 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -353,14 +353,35 @@ def _validate_sequence_index_not_in_sequence_key(self):
                 ' These columns must be different.'
             )
 
+    def _validate_constraint(self, constraint_name, **kwargs):
+        """Validate a constraint against the single table metadata.
+
+        Args:
+            constraint_name (string):
+                Name of the constraint class.
+            **kwargs:
+                Any other arguments the constraint requires.
+        """
+        try:
+            constraint_class = Constraint._get_class_from_dict(constraint_name)
+        except KeyError:
+            raise InvalidMetadataError(f"Invalid constraint ('{constraint_name}').")
+
+        constraint_class._validate_metadata(self, **kwargs)
+
     def validate(self):
         """Validate the metadata.
 
         Raises:
             - ``InvalidMetadataError`` if the metadata is invalid.
         """
-        # Validate keys
+        # Validate constraints
         errors = []
+        for tuple in self._constraints:
+            constraint_name, kwargs = tuple
+            self._validate_constraint(constraint_name, **kwargs)
+
+        # Validate keys
         try:
             self._validate_key(self._primary_key, 'primary')
         except ValueError as e:
@@ -399,17 +420,6 @@ def validate(self):
                 + '\n'.join([str(e) for e in errors])
             )
 
-    def _metadata_dict(self):
-        return {
-            'columns': self._columns,
-            'constraints': self._constraints,
-            'primary_key': self._primary_key,
-            'alternate_keys': self._alternate_keys,
-            'sequence_key': self._sequence_key,
-            'sequence_index': self._sequence_index,
-            'SCHEMA_VERSION': self._version
-        }
-
     def to_dict(self):
         """Return a python ``dict`` representation of the ``SingleTableMetadata``."""
         metadata = {}
@@ -455,14 +465,8 @@ def add_constraint(self, constraint_name, **kwargs):
             **kwargs:
                 Any other arguments the constraint requires.
         """
-        try:
-            constraint_class = Constraint._get_class_from_dict(constraint_name)
-        except KeyError:
-            raise InvalidMetadataError(f"Invalid constraint ('{constraint_name}').")
-
-        constraint_class._validate_metadata(self, **kwargs)
-        constraint = constraint_class(**kwargs)
-        self._constraints.append(constraint)
+        self._validate_constraint(constraint_name, **kwargs)
+        self._constraints.append((constraint_name, kwargs))
 
     @classmethod
     def _load_from_dict(cls, metadata):
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index 291cee176..a4277b0f8 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -44,6 +44,12 @@ def test_validate():
         low_column_name='col1',
         high_column_name='col2'
     )
+    instance.add_constraint(
+        constraint_name='ScalarInequality',
+        column_name='col1',
+        relation='<',
+        value=10
+    )
     instance.set_primary_key('col1')
     instance.set_alternate_keys([('col1', 'col2')])
     instance.set_sequence_index('col1')
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index 2b6df1126..cb08fbeb1 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -1495,8 +1495,8 @@ def test___repr__(self, mock_json):
     def test_add_constraint(self, constraint_mock):
         """Test the ``add_constraint`` method.
 
-        The method should create an instance of the specified constraint, run validation on the
-        constraint against the rest of the metadata and add the instance to the
+        The method should create an instance of the specified constraint, validate it
+        against the rest of the metadata and add ``{constraint_name: kwargs}`` to the
         ``self._constraints`` list.
 
         Setup:
@@ -1522,17 +1522,19 @@ def test_add_constraint(self, constraint_mock):
 
         # Assert
         constraint_mock._get_class_from_dict.assert_called_once_with('Inequality')
-        assert metadata._constraints == [dummy_constraint_class.return_value]
-        dummy_constraint_class.assert_called_once_with(
-            low_column_name='child_age',
-            high_column_name='start_date'
-        )
         dummy_constraint_class._validate_metadata.assert_called_once_with(
             metadata,
             low_column_name='child_age',
             high_column_name='start_date'
         )
 
+        assert metadata._constraints == [(
+            'Inequality', {
+                'low_column_name': 'child_age',
+                'high_column_name': 'start_date'
+            }
+        )]
+
     def test_add_constraint_bad_constraint(self):
         """Test the ``add_constraint`` method with a non-existent constraint.
 

From 82bbff0d6548b4353bb8b8b3ec61c7979fef0e46 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 19:28:38 -0700
Subject: [PATCH 11/18] Fix error message for validate + add error integration
 test

---
 sdv/metadata/single_table.py                    |  7 ++++++-
 tests/integration/metadata/test_single_table.py | 13 ++++++++++++-
 tests/unit/constraints/test_tabular.py          |  2 +-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 2f90dac60..9915f5116 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 from sdv.constraints import Constraint
+from sdv.constraints.errors import MultipleConstraintsErrors
 from sdv.metadata.errors import InvalidMetadataError
 
 
@@ -379,7 +380,11 @@ def validate(self):
         errors = []
         for tuple in self._constraints:
             constraint_name, kwargs = tuple
-            self._validate_constraint(constraint_name, **kwargs)
+            try:
+                self._validate_constraint(constraint_name, **kwargs)
+            except MultipleConstraintsErrors as e:
+                reformated_errors = '\n'.join(map(str, e.errors))
+                errors.append(reformated_errors)
 
         # Validate keys
         try:
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index a4277b0f8..931663d14 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -74,6 +74,10 @@ def test_validate_errors():
         'col9': {'sdtype': 'datetime', 'datetime_format': '%1-%Y-%m-%d-%'},
         'col10': {'sdtype': 'text', 'regex_format': '[A-{6}'},
     }
+    instance._constraints = [
+        ('Inequality', {'low_column_name': 'col1', 'wrong_arg': 'col2'}),
+        ('ScalarInequality', {'column_name': 'col1', 'relation': '<', 'value': 'string'})
+    ]
     instance._primary_key = 10
     instance._alternate_keys = 'col1'
     instance._sequence_key = ('col3', 'col1')
@@ -81,7 +85,14 @@ def test_validate_errors():
 
     err_msg = re.escape(
         'The following errors were found in the metadata:'
-        "\n\n'primary_key' must be a string or tuple of strings."
+        "\n\nMissing required values {'high_column_name'} in an Inequality constraint."
+        "\nInvalid values {'wrong_arg'} are present in an Inequality constraint."
+        "\nA Inequality constraint is being applied to invalid column names {None}."
+        ' The columns must exist in the table.'
+        "\nAn Inequality constraint is being applied to mismatched sdtypes [None, 'col1']."
+        ' Both columns must be either numerical or datetime.'
+        "\n'value' must be an int or float"
+        "\n'primary_key' must be a string or tuple of strings."
         "\nUnknown sequence key values {'col3'}. Keys should be columns that exist in the table."
         "\n'alternate_keys' must be a list of strings or a list of tuples of strings."
         "\nUnknown sequence key value {'col3'}. Keys should be columns that exist in the table."
diff --git a/tests/unit/constraints/test_tabular.py b/tests/unit/constraints/test_tabular.py
index a26e5734a..4cff6f574 100644
--- a/tests/unit/constraints/test_tabular.py
+++ b/tests/unit/constraints/test_tabular.py
@@ -1616,7 +1616,7 @@ def test__validate_metadata_specific_to_constraint_numerical_error(self):
         metadata._columns = {'a': {'sdtype': 'numerical'}}
 
         # Run
-        error_message = "'value' must be an int or float"
+        error_message = "'value' must be an int or float."
         with pytest.raises(ConstraintMetadataError, match=error_message):
             ScalarInequality._validate_metadata_specific_to_constraint(
                 metadata,

From 562ffd31e22dbd80d659a3f2144c3e1970a46b31 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Tue, 2 Aug 2022 20:37:02 -0700
Subject: [PATCH 12/18] Fix all constraint related error messages

---
 sdv/constraints/base.py                       | 10 ++--
 sdv/constraints/tabular.py                    | 46 +++++++++----------
 sdv/metadata/single_table.py                  |  4 +-
 .../integration/metadata/test_single_table.py |  8 ++--
 tests/unit/constraints/test_tabular.py        | 10 ++--
 tests/unit/metadata/test_single_table.py      |  2 +-
 6 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/sdv/constraints/base.py b/sdv/constraints/base.py
index 459bad4cc..602a76f00 100644
--- a/sdv/constraints/base.py
+++ b/sdv/constraints/base.py
@@ -136,16 +136,16 @@ def _validate_metadata_columns(cls, metadata, **kwargs):
         else:
             column_names = kwargs.get('column_names')
 
-        missing_columns = set(column_names) - set(metadata._columns)
-
+        missing_columns = set(column_names) - set(metadata._columns) - {None}
         if missing_columns:
+            article = 'An' if cls.__name__ == 'Inequality' else 'A'
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to invalid column names '
+                f'{article} {cls.__name__} constraint is being applied to invalid column names '
                 f'{missing_columns}. The columns must exist in the table.'
             )
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         pass
 
     @classmethod
diff --git a/sdv/constraints/tabular.py b/sdv/constraints/tabular.py
index b34ef3dfd..7379d9426 100644
--- a/sdv/constraints/tabular.py
+++ b/sdv/constraints/tabular.py
@@ -345,8 +345,8 @@ def _validate_metadata_columns(cls, metadata, **kwargs):
         kwargs['column_names'] = [kwargs.get('high_column_name'), kwargs.get('low_column_name')]
         super()._validate_metadata_columns(metadata, **kwargs)
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         high = kwargs.get('high_column_name')
         low = kwargs.get('low_column_name')
         high_sdtype = metadata._columns.get(high, {}).get('sdtype')
@@ -355,8 +355,8 @@ def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
         both_numerical = high_sdtype == low_sdtype == 'numerical'
         if not (both_datetime or both_numerical):
             raise ConstraintMetadataError(
-                f'An {cls.__name__} constraint is being applied to mismatched sdtypes '
-                f'{[high, low]}. Both columns must be either numerical or datetime.'
+                f'An Inequality constraint is being applied to mismatched sdtype columns'
+                f' {[high, low]}. Both columns must be either numerical or datetime.'
             )
 
     def __init__(self, low_column_name, high_column_name, strict_boundaries=False):
@@ -504,14 +504,14 @@ def _validate_inputs(cls, **kwargs):
         if errors:
             raise MultipleConstraintsErrors(errors)
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         column_name = kwargs.get('column_name')
         sdtype = metadata._columns.get(column_name, {}).get('sdtype')
         val = kwargs.get('value')
         if sdtype == 'numerical':
             if not isinstance(val, (int, float)):
-                raise ConstraintMetadataError("'value' must be an int or float")
+                raise ConstraintMetadataError("'value' must be an int or float.")
 
         elif sdtype == 'datetime':
             datetime_format = metadata._columns.get(column_name).get('datetime_format')
@@ -523,7 +523,7 @@ def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
 
         else:
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to mismatched sdtypes. '
+                f'A ScalarInequality constraint is being applied to mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
@@ -672,13 +672,13 @@ class Positive(ScalarInequality):
             zero ``>`` or include it ``>=``.
     """
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         column_name = kwargs.get('column_name')
         sdtype = metadata._columns.get(column_name, {}).get('sdtype')
         if sdtype != 'numerical':
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to an invalid column '
+                f'A Positive constraint is being applied to an invalid column '
                 f"'{column_name}'. This constraint is only defined for numerical columns."
             )
 
@@ -700,13 +700,13 @@ class Negative(ScalarInequality):
             zero ``<`` or include it ``<=``.
     """
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         column_name = kwargs.get('column_name')
         sdtype = metadata._columns.get(column_name, {}).get('sdtype')
         if sdtype != 'numerical':
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to an invalid column '
+                f'A Negative constraint is being applied to an invalid column '
                 f"'{column_name}'. This constraint is only defined for numerical columns."
             )
 
@@ -742,8 +742,8 @@ def _validate_metadata_columns(cls, metadata, **kwargs):
         kwargs['column_names'] = [high, low, middle]
         super()._validate_metadata_columns(metadata, **kwargs)
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         high = kwargs.get('high_column_name')
         low = kwargs.get('low_column_name')
         middle = kwargs.get('middle_column_name')
@@ -754,7 +754,7 @@ def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
         all_numerical = high_sdtype == low_sdtype == middle_sdtype == 'numerical'
         if not (all_datetime or all_numerical):
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to mismatched sdtypes '
+                f'A Range constraint is being applied to mismatched sdtype columns '
                 f'{[high, middle, low]}. All columns must be either numerical or datetime.'
             )
 
@@ -926,12 +926,12 @@ def _validate_init_inputs(low_value, high_value):
                 'represents a datetime.'
             )
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         column_name = kwargs.get('column_name')
         if column_name not in metadata._columns:
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to invalid column names '
+                f'A ScalarRange constraint is being applied to invalid column names '
                 f'({column_name}). The columns must exist in the table.'
             )
         sdtype = metadata._columns.get(column_name).get('sdtype')
@@ -955,7 +955,7 @@ def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
 
         else:
             raise ConstraintMetadataError(
-                f'A {cls.__name__} constraint is being applied to mismatched sdtypes. '
+                f'A ScalarRange constraint is being applied to mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
@@ -1263,8 +1263,8 @@ def __init__(self, column_names):
         self.column_names = column_names
         self.constraint_columns = tuple(self.column_names)
 
-    @classmethod
-    def _validate_metadata_specific_to_constraint(cls, metadata, **kwargs):
+    @staticmethod
+    def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         column_names = kwargs.get('column_names')
         keys = set()
         if isinstance(metadata._primary_key, tuple):
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 9915f5116..7e43d00c3 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -10,8 +10,8 @@
 import pandas as pd
 
 from sdv.constraints import Constraint
-from sdv.constraints.errors import MultipleConstraintsErrors
 from sdv.metadata.errors import InvalidMetadataError
+from sdv.constraints.errors import MultipleConstraintsErrors
 
 
 class SingleTableMetadata:
@@ -329,7 +329,7 @@ def _validate_sequence_index(self, column_name):
         if column_name not in self._columns:
             column_name = {column_name}
             raise ValueError(
-                f'Unknown sequence key value {column_name}.'
+                f'Unknown sequence index value {column_name}.'
                 ' Keys should be columns that exist in the table.'
             )
 
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index 931663d14..4eeac88bd 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -87,15 +87,13 @@ def test_validate_errors():
         'The following errors were found in the metadata:'
         "\n\nMissing required values {'high_column_name'} in an Inequality constraint."
         "\nInvalid values {'wrong_arg'} are present in an Inequality constraint."
-        "\nA Inequality constraint is being applied to invalid column names {None}."
-        ' The columns must exist in the table.'
-        "\nAn Inequality constraint is being applied to mismatched sdtypes [None, 'col1']."
+        "\nAn Inequality constraint is being applied to mismatched sdtype columns [None, 'col1']."
         ' Both columns must be either numerical or datetime.'
-        "\n'value' must be an int or float"
+        "\n'value' must be an int or float."
         "\n'primary_key' must be a string or tuple of strings."
         "\nUnknown sequence key values {'col3'}. Keys should be columns that exist in the table."
         "\n'alternate_keys' must be a list of strings or a list of tuples of strings."
-        "\nUnknown sequence key value {'col3'}. Keys should be columns that exist in the table."
+        "\nUnknown sequence index value {'col3'}. Keys should be columns that exist in the table."
         "\n'sequence_index' and 'sequence_key' have the same value {'col3'}."
         ' These columns must be different.'
         "\nInvalid values '(invalid1)' for categorical column 'col4'."
diff --git a/tests/unit/constraints/test_tabular.py b/tests/unit/constraints/test_tabular.py
index 4cff6f574..ed00a215f 100644
--- a/tests/unit/constraints/test_tabular.py
+++ b/tests/unit/constraints/test_tabular.py
@@ -948,7 +948,7 @@ def test__validate_metadata_columns_raises_error(self):
 
         # Run
         error_message = re.escape(
-            'A Inequality constraint is being applied to invalid column names '
+            'An Inequality constraint is being applied to invalid column names '
             "{'c'}. The columns must exist in the table."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -990,7 +990,7 @@ def test__validate_metadata_specific_to_constraint_datetime_error(self):
 
         # Run
         error_message = re.escape(
-            'An Inequality constraint is being applied to mismatched sdtypes '
+            'An Inequality constraint is being applied to mismatched sdtype columns '
             "['a', 'b']. Both columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -1035,7 +1035,7 @@ def test__validate_metadata_specific_to_constraint_numerical_error(self):
 
         # Run
         error_message = re.escape(
-            'An Inequality constraint is being applied to mismatched sdtypes '
+            'An Inequality constraint is being applied to mismatched sdtype columns '
             "['a', 'b']. Both columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -2581,7 +2581,7 @@ def test__validate_metadata_specific_to_constraint_datetime_error(self):
 
         # Run
         error_message = re.escape(
-            'A Range constraint is being applied to mismatched sdtypes '
+            'A Range constraint is being applied to mismatched sdtype columns '
             "['a', 'c', 'b']. All columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -2637,7 +2637,7 @@ def test__validate_metadata_specific_to_constraint_numerical_error(self):
 
         # Run
         error_message = re.escape(
-            'A Range constraint is being applied to mismatched sdtypes '
+            'A Range constraint is being applied to mismatched sdtype columns '
             "['a', 'c', 'b']. All columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index cb08fbeb1..f4adc9d3f 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -1129,7 +1129,7 @@ def test_set_sequence_index_validation_columns(self):
         instance._columns = {'a', 'd'}
 
         err_msg = (
-            "Unknown sequence key value {'column'}."
+            "Unknown sequence index value {'column'}."
             ' Keys should be columns that exist in the table.'
         )
         # Run / Assert

From 4cda5b45d27bfe188946fcaec65a713433fb6c0a Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 08:49:15 -0700
Subject: [PATCH 13/18] Add helper _append_error to validate + fix constraint
 related errors

---
 sdv/constraints/tabular.py                    | 11 ++---
 sdv/metadata/single_table.py                  | 43 ++++++-------------
 .../integration/metadata/test_single_table.py |  2 -
 3 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/sdv/constraints/tabular.py b/sdv/constraints/tabular.py
index 7379d9426..b713c49b4 100644
--- a/sdv/constraints/tabular.py
+++ b/sdv/constraints/tabular.py
@@ -353,7 +353,7 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         low_sdtype = metadata._columns.get(low, {}).get('sdtype')
         both_datetime = high_sdtype == low_sdtype == 'datetime'
         both_numerical = high_sdtype == low_sdtype == 'numerical'
-        if not (both_datetime or both_numerical):
+        if not (both_datetime or both_numerical) and not (high is None or low is None):
             raise ConstraintMetadataError(
                 f'An Inequality constraint is being applied to mismatched sdtype columns'
                 f' {[high, low]}. Both columns must be either numerical or datetime.'
@@ -518,12 +518,12 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
             matches_format = matches_datetime_format(val, datetime_format)
             if not matches_format:
                 raise ConstraintMetadataError(
-                    "'value' must be a datetime string of the right format"
+                    "'value' must be a datetime string of the right format."
                 )
 
         else:
             raise ConstraintMetadataError(
-                f'A ScalarInequality constraint is being applied to mismatched sdtypes. '
+                'A ScalarInequality constraint is being applied to mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
@@ -752,7 +752,8 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         middle_sdtype = metadata._columns.get(middle, {}).get('sdtype')
         all_datetime = high_sdtype == low_sdtype == middle_sdtype == 'datetime'
         all_numerical = high_sdtype == low_sdtype == middle_sdtype == 'numerical'
-        if not (all_datetime or all_numerical):
+        if not (all_datetime or all_numerical) and \
+           not (high is None or low is None or middle is None):
             raise ConstraintMetadataError(
                 f'A Range constraint is being applied to mismatched sdtype columns '
                 f'{[high, middle, low]}. All columns must be either numerical or datetime.'
@@ -955,7 +956,7 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
 
         else:
             raise ConstraintMetadataError(
-                f'A ScalarRange constraint is being applied to mismatched sdtypes. '
+                'A ScalarRange constraint is being applied to mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 7e43d00c3..5db66cfdf 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -10,8 +10,8 @@
 import pandas as pd
 
 from sdv.constraints import Constraint
-from sdv.metadata.errors import InvalidMetadataError
 from sdv.constraints.errors import MultipleConstraintsErrors
+from sdv.metadata.errors import InvalidMetadataError
 
 
 class SingleTableMetadata:
@@ -370,6 +370,13 @@ def _validate_constraint(self, constraint_name, **kwargs):
 
         constraint_class._validate_metadata(self, **kwargs)
 
+    def _append_error(self, errors, method, *args, **kwargs):
+        """Inplace, append the produced error to the passed ``errors`` list."""
+        try:
+            method(*args, **kwargs)
+        except ValueError as e:
+            errors.append(e)
+
     def validate(self):
         """Validate the metadata.
 
@@ -387,37 +394,15 @@ def validate(self):
                 errors.append(reformated_errors)
 
         # Validate keys
-        try:
-            self._validate_key(self._primary_key, 'primary')
-        except ValueError as e:
-            errors.append(e)
-
-        try:
-            self._validate_key(self._sequence_key, 'sequence')
-        except ValueError as e:
-            errors.append(e)
-
-        try:
-            self._validate_alternate_keys(self._alternate_keys)
-        except ValueError as e:
-            errors.append(e)
-
-        try:
-            self._validate_sequence_index(self._sequence_index)
-        except ValueError as e:
-            errors.append(e)
-
-        try:
-            self._validate_sequence_index_not_in_sequence_key()
-        except ValueError as e:
-            errors.append(e)
+        self._append_error(errors, self._validate_key, self._primary_key, 'primary')
+        self._append_error(errors, self._validate_key, self._sequence_key, 'sequence')
+        self._append_error(errors, self._validate_alternate_keys, self._alternate_keys)
+        self._append_error(errors, self._validate_sequence_index, self._sequence_index)
+        self._append_error(errors, self._validate_sequence_index_not_in_sequence_key)
 
         # Validate columns
         for column, kwargs in self._columns.items():
-            try:
-                self._validate_column(column, **kwargs)
-            except ValueError as e:
-                errors.append(e)
+            self._append_error(errors, self._validate_column, column, **kwargs)
 
         if errors:
             raise InvalidMetadataError(
diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index 4eeac88bd..8f615865b 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -87,8 +87,6 @@ def test_validate_errors():
         'The following errors were found in the metadata:'
         "\n\nMissing required values {'high_column_name'} in an Inequality constraint."
         "\nInvalid values {'wrong_arg'} are present in an Inequality constraint."
-        "\nAn Inequality constraint is being applied to mismatched sdtype columns [None, 'col1']."
-        ' Both columns must be either numerical or datetime.'
         "\n'value' must be an int or float."
         "\n'primary_key' must be a string or tuple of strings."
         "\nUnknown sequence key values {'col3'}. Keys should be columns that exist in the table."

From 4a58faaf4f29617f7cc7998f6b66676549a369f6 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 09:12:30 -0700
Subject: [PATCH 14/18] Add constraint unit test

---
 tests/unit/metadata/test_single_table.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index f4adc9d3f..e956eae85 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -1178,11 +1178,15 @@ def test_validate(self):
         # Setup
         instance = SingleTableMetadata()
         instance._columns = {'col1': {'sdtype': 'numerical'}, 'col2': {'sdtype': 'numerical'}}
-        instance._constraints = []
+        instance._constraints = [
+            ('Inequality', {'low_column_name': 'col1', 'high_column_name': 'col2'}),
+            ('ScalarInequality', {'column_name': 'col1', 'relation': '<', 'value': 10})
+        ]
         instance._primary_key = 'col1'
         instance._alternate_keys = ['col2']
         instance._sequence_key = 'col1'
         instance._sequence_index = 'col2'
+        instance._validate_constraint = Mock()
         instance._validate_key = Mock()
         instance._validate_alternate_keys = Mock()
         instance._validate_sequence_index = Mock()
@@ -1192,6 +1196,10 @@ def test_validate(self):
         instance.validate()
 
         # Assert
+        instance._validate_constraint.assert_has_calls([
+            call('Inequality', low_column_name='col1', high_column_name='col2'),
+            call('ScalarInequality', column_name='col1', relation='<', value=10)
+        ])
         instance._validate_key.assert_has_calls(
             [call(instance._primary_key, 'primary'), call(instance._sequence_key, 'sequence')]
         )

From c56355654048f9d032204518bec206a4eec26073 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 09:36:29 -0700
Subject: [PATCH 15/18] Fix lint

---
 tests/integration/metadata/test_single_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/metadata/test_single_table.py b/tests/integration/metadata/test_single_table.py
index 8f615865b..e456e2886 100644
--- a/tests/integration/metadata/test_single_table.py
+++ b/tests/integration/metadata/test_single_table.py
@@ -60,7 +60,7 @@ def test_validate():
 
 
 def test_validate_errors():
-    """Test Test ``SingleTableMetadata.validate`` raises the correct errors."""
+    """Test ``SingleTableMetadata.validate`` raises the correct errors."""
     # Setup
     instance = SingleTableMetadata()
     instance._columns = {

From adfd39168e1df49ff76e4a982fbef860b7ec50ce Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 10:14:57 -0700
Subject: [PATCH 16/18] change _metadata[pk] to pk

---
 sdv/metadata/multi_table.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdv/metadata/multi_table.py b/sdv/metadata/multi_table.py
index 5c66b1465..b0cd157a0 100644
--- a/sdv/metadata/multi_table.py
+++ b/sdv/metadata/multi_table.py
@@ -62,7 +62,7 @@ def visualize(self, show_table_details=True, show_relationship_labels=True,
                 columns = [f"{name} : {meta.get('sdtype')}" for name, meta in column_dict]
                 nodes[table_name] = {
                     'columns': r'\l'.join(columns),
-                    'primary_key': f"Primary key: {table_meta._metadata['primary_key']}"
+                    'primary_key': f'Primary key: {table_meta._primary_key}'
                 }
 
         else:
@@ -72,7 +72,7 @@ def visualize(self, show_table_details=True, show_relationship_labels=True,
             parent = relationship.get('parent_table_name')
             child = relationship.get('child_table_name')
             foreign_key = relationship.get('child_foreign_key')
-            primary_key = self._tables.get(parent)._metadata.get('primary_key')
+            primary_key = self._tables.get(parent)._primary_key
             edge_label = f'  {foreign_key} → {primary_key}' if show_relationship_labels else ''
             edges.append((parent, child, edge_label))
 

From df76a4b700feed73373c9f7e91f86474ea28e1b1 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 11:50:41 -0700
Subject: [PATCH 17/18] Add tests requested + addresses feedback

---
 sdv/constraints/tabular.py               |  9 +++++----
 tests/unit/constraints/test_base.py      | 15 +++++++++++++++
 tests/unit/constraints/test_tabular.py   | 12 ++++++------
 tests/unit/metadata/test_single_table.py | 19 ++++++++++++++++---
 4 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/sdv/constraints/tabular.py b/sdv/constraints/tabular.py
index b713c49b4..51fd04f75 100644
--- a/sdv/constraints/tabular.py
+++ b/sdv/constraints/tabular.py
@@ -355,7 +355,7 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         both_numerical = high_sdtype == low_sdtype == 'numerical'
         if not (both_datetime or both_numerical) and not (high is None or low is None):
             raise ConstraintMetadataError(
-                f'An Inequality constraint is being applied to mismatched sdtype columns'
+                'An Inequality constraint is being applied to columns with mismatched sdtypes'
                 f' {[high, low]}. Both columns must be either numerical or datetime.'
             )
 
@@ -523,7 +523,8 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
 
         else:
             raise ConstraintMetadataError(
-                'A ScalarInequality constraint is being applied to mismatched sdtypes. '
+                'A ScalarInequality constraint is being applied '
+                'to columns with mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
@@ -755,7 +756,7 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
         if not (all_datetime or all_numerical) and \
            not (high is None or low is None or middle is None):
             raise ConstraintMetadataError(
-                f'A Range constraint is being applied to mismatched sdtype columns '
+                'A Range constraint is being applied to columns with mismatched sdtypes '
                 f'{[high, middle, low]}. All columns must be either numerical or datetime.'
             )
 
@@ -956,7 +957,7 @@ def _validate_metadata_specific_to_constraint(metadata, **kwargs):
 
         else:
             raise ConstraintMetadataError(
-                'A ScalarRange constraint is being applied to mismatched sdtypes. '
+                'A ScalarRange constraint is being applied to columns with mismatched sdtypes. '
                 'Numerical columns must be compared to integer or float values. '
                 'Datetimes column must be compared to datetime strings.'
             )
diff --git a/tests/unit/constraints/test_base.py b/tests/unit/constraints/test_base.py
index dbb1849af..5f8718604 100644
--- a/tests/unit/constraints/test_base.py
+++ b/tests/unit/constraints/test_base.py
@@ -159,6 +159,21 @@ def test_import_object_function():
 
 class TestConstraint():
 
+    def test__validate_inputs(self):
+        """Test the ``_validate_inputs`` method.
+
+        The method should only raise errors if the input paramaters are invalid.
+
+        Raise:
+            - ``MultipleConstraintsErrors`` if errors were found.
+        """
+        # Run
+        Constraint._validate_inputs(args='value', kwargs='value')
+
+        # Run / Assert
+        with pytest.raises(MultipleConstraintsErrors):
+            Constraint._validate_inputs(wrong_args='value')
+
     @patch('sdv.constraints.base.Constraint._validate_inputs')
     @patch('sdv.constraints.base.Constraint._validate_metadata_columns')
     @patch('sdv.constraints.base.Constraint._validate_metadata_specific_to_constraint')
diff --git a/tests/unit/constraints/test_tabular.py b/tests/unit/constraints/test_tabular.py
index ed00a215f..91bb57e90 100644
--- a/tests/unit/constraints/test_tabular.py
+++ b/tests/unit/constraints/test_tabular.py
@@ -990,7 +990,7 @@ def test__validate_metadata_specific_to_constraint_datetime_error(self):
 
         # Run
         error_message = re.escape(
-            'An Inequality constraint is being applied to mismatched sdtype columns '
+            'An Inequality constraint is being applied to columns with mismatched sdtypes '
             "['a', 'b']. Both columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -1035,7 +1035,7 @@ def test__validate_metadata_specific_to_constraint_numerical_error(self):
 
         # Run
         error_message = re.escape(
-            'An Inequality constraint is being applied to mismatched sdtype columns '
+            'An Inequality constraint is being applied to columns with mismatched sdtypes '
             "['a', 'b']. Both columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -1705,7 +1705,7 @@ def test__validate_metadata_specific_to_constraint_bad_type(self):
 
         # Run
         error_message = (
-            'A ScalarInequality constraint is being applied to mismatched sdtypes. '
+            'A ScalarInequality constraint is being applied to columns with mismatched sdtypes. '
             'Numerical columns must be compared to integer or float values. '
             'Datetimes column must be compared to datetime strings.'
         )
@@ -2581,7 +2581,7 @@ def test__validate_metadata_specific_to_constraint_datetime_error(self):
 
         # Run
         error_message = re.escape(
-            'A Range constraint is being applied to mismatched sdtype columns '
+            'A Range constraint is being applied to columns with mismatched sdtypes '
             "['a', 'c', 'b']. All columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -2637,7 +2637,7 @@ def test__validate_metadata_specific_to_constraint_numerical_error(self):
 
         # Run
         error_message = re.escape(
-            'A Range constraint is being applied to mismatched sdtype columns '
+            'A Range constraint is being applied to columns with mismatched sdtypes '
             "['a', 'c', 'b']. All columns must be either numerical or datetime."
         )
         with pytest.raises(ConstraintMetadataError, match=error_message):
@@ -3326,7 +3326,7 @@ def test__validate_metadata_specific_to_constraint_bad_type(self):
 
         # Run
         error_message = (
-            'A ScalarRange constraint is being applied to mismatched sdtypes. '
+            'A ScalarRange constraint is being applied to columns with mismatched sdtypes. '
             'Numerical columns must be compared to integer or float values. '
             'Datetimes column must be compared to datetime strings.'
         )
diff --git a/tests/unit/metadata/test_single_table.py b/tests/unit/metadata/test_single_table.py
index e956eae85..af691f468 100644
--- a/tests/unit/metadata/test_single_table.py
+++ b/tests/unit/metadata/test_single_table.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import pytest
 
+from sdv.constraints.errors import MultipleConstraintsErrors
 from sdv.metadata.errors import InvalidMetadataError
 from sdv.metadata.single_table import SingleTableMetadata
 
@@ -1186,14 +1187,23 @@ def test_validate(self):
         instance._alternate_keys = ['col2']
         instance._sequence_key = 'col1'
         instance._sequence_index = 'col2'
-        instance._validate_constraint = Mock()
+        instance._validate_constraint = Mock(side_effect=MultipleConstraintsErrors(['cnt_error']))
         instance._validate_key = Mock()
         instance._validate_alternate_keys = Mock()
         instance._validate_sequence_index = Mock()
         instance._validate_sequence_index_not_in_sequence_key = Mock()
-
+        instance._validate_column = Mock(side_effect=ValueError('column_error'))
+
+        err_msg = re.escape(
+            'The following errors were found in the metadata:'
+            '\n\ncnt_error'
+            '\ncnt_error'
+            '\ncolumn_error'
+            '\ncolumn_error'
+        )
         # Run
-        instance.validate()
+        with pytest.raises(InvalidMetadataError, match=err_msg):
+            instance.validate()
 
         # Assert
         instance._validate_constraint.assert_has_calls([
@@ -1203,6 +1213,9 @@ def test_validate(self):
         instance._validate_key.assert_has_calls(
             [call(instance._primary_key, 'primary'), call(instance._sequence_key, 'sequence')]
         )
+        instance._validate_column.assert_has_calls(
+            [call('col1', sdtype='numerical'), call('col2', sdtype='numerical')]
+        )
         instance._validate_alternate_keys.assert_called_once_with(instance._alternate_keys)
         instance._validate_sequence_index.assert_called_once_with(instance._sequence_index)
         instance._validate_sequence_index_not_in_sequence_key.assert_called_once()

From af34779180105400c4984c0690f4f3c83f7dbdb9 Mon Sep 17 00:00:00 2001
From: Felipe Hofmann <fealho@gmail.com>
Date: Wed, 3 Aug 2022 13:29:12 -0700
Subject: [PATCH 18/18] Address minor feedback

---
 sdv/metadata/single_table.py        | 6 +++---
 tests/unit/constraints/test_base.py | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sdv/metadata/single_table.py b/sdv/metadata/single_table.py
index 5db66cfdf..b8c8bad38 100644
--- a/sdv/metadata/single_table.py
+++ b/sdv/metadata/single_table.py
@@ -345,9 +345,9 @@ def set_sequence_index(self, column_name):
 
     def _validate_sequence_index_not_in_sequence_key(self):
         """Check that ``_sequence_index`` and ``_sequence_key`` don't overlap."""
-        sk = self._sequence_key
-        sequence_key = {sk} if isinstance(sk, str) else set(sk)
-        if self._sequence_index in sequence_key or sk is None:
+        seq_key = self._sequence_key
+        sequence_key = {seq_key} if isinstance(seq_key, str) else set(seq_key)
+        if self._sequence_index in sequence_key or seq_key is None:
             index = {self._sequence_index}
             raise ValueError(
                 f"'sequence_index' and 'sequence_key' have the same value {index}."
diff --git a/tests/unit/constraints/test_base.py b/tests/unit/constraints/test_base.py
index 5f8718604..7fb2dd242 100644
--- a/tests/unit/constraints/test_base.py
+++ b/tests/unit/constraints/test_base.py
@@ -171,8 +171,9 @@ def test__validate_inputs(self):
         Constraint._validate_inputs(args='value', kwargs='value')
 
         # Run / Assert
-        with pytest.raises(MultipleConstraintsErrors):
-            Constraint._validate_inputs(wrong_args='value')
+        err_msg = "Invalid values {'wrong_args'} are present in a Constraint constraint."
+        with pytest.raises(MultipleConstraintsErrors, match=err_msg):
+            Constraint._validate_inputs(args='value', kwargs='value', wrong_args='value')
 
     @patch('sdv.constraints.base.Constraint._validate_inputs')
     @patch('sdv.constraints.base.Constraint._validate_metadata_columns')