Skip to content

Commit

Permalink
KeyUniqueness metric should only be applied to primary and alternate …
Browse files Browse the repository at this point in the history
…keys (#511)
  • Loading branch information
R-Palazzo committed Nov 14, 2023
1 parent 4633847 commit 5a378e4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class DataValidity(BaseSingleTableProperty):
'datetime': BoundaryAdherence,
'categorical': CategoryAdherence,
'boolean': CategoryAdherence,
'id': KeyUniqueness,
}

def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_end_to_end(self):
result = column_shapes.get_score(real_data, synthetic_data, metadata)

# Assert
assert result == 0.9444444444444445
assert result == 0.9636363636363637

def test_with_progress_bar(self):
"""Test that the progress bar is correctly updated."""
Expand All @@ -35,5 +35,5 @@ def test_with_progress_bar(self):
result = column_shapes.get_score(real_data, synthetic_data, metadata, progress_bar)

# Assert
assert result == 0.9444444444444445
assert result == 0.9636363636363637
assert mock_update.call_count == num_columns
40 changes: 19 additions & 21 deletions tests/integration/reports/multi_table/test_diagnostic_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_end_to_end(self):
results = report.get_score()

# Assert
assert results == 0.9814814814814815
assert results == 0.9878787878787879

def test_end_to_end_with_object_datetimes(self):
"""Test the ``DiagnosticReport`` report with object datetimes."""
Expand All @@ -38,9 +38,9 @@ def test_end_to_end_with_object_datetimes(self):
# Assert
expected_dataframe = pd.DataFrame({
'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'],
'Score': [0.9444444444444445, 1.0, 1.0]
'Score': [0.9636363636363637, 1.0, 1.0]
})
assert results == 0.9814814814814815
assert results == 0.9878787878787879
pd.testing.assert_frame_equal(properties, expected_dataframe)

def test_end_to_end_with_metrics_failing(self):
Expand All @@ -62,37 +62,36 @@ def test_end_to_end_with_metrics_failing(self):
# Assert
expected_properties = pd.DataFrame({
'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'],
'Score': [0.9677777777777777, 0.7833333333333333, 1.0]
'Score': [1.0, 0.7833333333333333, 1.0]
})
expected_details = pd.DataFrame({
'Table': [
'users', 'users', 'users', 'users', 'sessions', 'sessions', 'sessions',
'sessions', 'transactions', 'transactions', 'transactions', 'transactions',
'transactions'
'transactions', 'transactions', 'transactions', 'transactions',
],
'Column': [
'user_id', 'country', 'gender', 'age', 'session_id', 'user_id', 'device',
'os', 'transaction_id', 'session_id', 'timestamp', 'amount', 'approved'
'user_id', 'country', 'gender', 'age', 'session_id', 'device',
'os', 'transaction_id', 'timestamp', 'amount', 'approved'
],
'Metric': [
'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', 'BoundaryAdherence',
'KeyUniqueness', 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence',
'KeyUniqueness', 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence',
'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence',
'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence',
'CategoryAdherence'
],
'Score': [
1.0, 1.0, 1.0, np.nan, 1.0, 0.7777777777777778, 1.0, 1.0, 1.0, 0.9,
1.0, 1.0, 1.0, np.nan, 1.0, 1.0, 1.0, 1.0,
np.nan, np.nan, 1.0
],
'Error': [
None, None, None,
"TypeError: '<=' not supported between instances of 'str' and 'int'",
None, None, None, None, None, None,
None, None, None, None,
"TypeError: '<=' not supported between instances of 'str' and 'Timestamp'",
"TypeError: '<=' not supported between instances of 'str' and 'float'", None
]
})
assert results == 0.9170370370370371
assert results == 0.9277777777777777
pd.testing.assert_frame_equal(
report.get_properties(), expected_properties, check_exact=False, atol=2e-2
)
Expand All @@ -111,7 +110,7 @@ def test_get_properties(self):
# Assert
expected_dataframe = pd.DataFrame({
'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'],
'Score': [0.9444444444444445, 1.0, 1.0]
'Score': [0.9636363636363637, 1.0, 1.0]
})

pd.testing.assert_frame_equal(properties, expected_dataframe)
Expand All @@ -130,21 +129,20 @@ def test_get_details(self):
expected_dataframe = pd.DataFrame({
'Table': [
'users', 'users', 'users', 'users', 'sessions', 'sessions', 'sessions',
'sessions', 'transactions', 'transactions', 'transactions', 'transactions',
'transactions'
'transactions', 'transactions', 'transactions', 'transactions',
],
'Column': [
'user_id', 'country', 'gender', 'age', 'session_id', 'user_id', 'device',
'os', 'transaction_id', 'session_id', 'timestamp', 'amount', 'approved'
'user_id', 'country', 'gender', 'age', 'session_id', 'device',
'os', 'transaction_id', 'timestamp', 'amount', 'approved'
],
'Metric': [
'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', 'BoundaryAdherence',
'KeyUniqueness', 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence',
'KeyUniqueness', 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence',
'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence',
'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence',
'CategoryAdherence'
],
'Score': [
1.0, 1.0, 1.0, 0.9, 1.0, 0.7777777777777778, 1.0, 1.0, 1.0, 0.9, 0.9,
1.0, 1.0, 1.0, 0.9, 1.0, 1.0, 1.0, 1.0, 0.9,
0.8, 1.0
]
})
Expand Down

0 comments on commit 5a378e4

Please sign in to comment.