Skip to content

Commit

Permalink
Create example test
Browse files Browse the repository at this point in the history
  • Loading branch information
multimeric committed Dec 12, 2016
1 parent 49be882 commit dfa02a6
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 24 deletions.
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ Now you want to be able to ensure that the data in your CSV is in the correct fo
])
test_data = pd.from_csv(StringIO('''
Gerald,Hampton,82,Male,2582GABK
Yuuwa,Miyake,27,Male,7951WVLW
Edyta,Majewska,50,Female,7758NSID
Gerald ,Hampton,82,Male,2582GABK
Yuuwa,Miyake,270,male,7951WVLW
Edyta,Majewska ,50,Female,775ANSID
'''))
schema.validate()
Expand Down
2 changes: 1 addition & 1 deletion schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ def validate(self, df: pd.DataFrame) -> typing.List[ValidationError]:
for series, column in column_pairs:
errors += column.validate(series)

return errors
return sorted(errors, key=lambda e: e.row)
33 changes: 33 additions & 0 deletions test/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import unittest
import sys
import pathlib

here = pathlib.Path(__file__).parent
sys.path.append(str((here / '../..').resolve()))

class Example(unittest.TestCase):
def test_example(self):

import pandas as pd
from io import StringIO
from pandas_schema import Column, Schema
from pandas_schema.validation import LeadingWhitespaceValidation, TrailingWhitespaceValidation, CanConvertValidation, MatchesRegexValidation, InRangeValidation, InListValidation

schema = Schema([
Column('Given Name', [LeadingWhitespaceValidation(), TrailingWhitespaceValidation()]),
Column('Family Name', [LeadingWhitespaceValidation(), TrailingWhitespaceValidation()]),
Column('Age', [InRangeValidation(0, 120)]),
Column('Sex', [InListValidation(['Male', 'Female', 'Other'])]),
Column('Customer ID', [MatchesRegexValidation(r'\d{4}[A-Z]{4}')])
])

test_data = pd.read_csv(StringIO('''
Given Name,Family Name,Age,Sex,Customer ID
Gerald ,Hampton,82,Male,2582GABK
Yuuwa,Miyake,270,male,7951WVLW
Edyta,Majewska ,50,Female,775ANSID
'''.strip()), sep=',', dtype=str)

errors = schema.validate(test_data)
for error in errors:
print(error)
34 changes: 17 additions & 17 deletions validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class ElementValidation(BaseValidation):
__metaclass__ = abc.ABCMeta

@abc.abstractmethod
def get_message(self, value: any) -> str:
def get_message(self) -> str:
"""
Create a message to be displayed whenever this validation fails
:param value: The value of the failing object (Series, or single value)
Expand Down Expand Up @@ -68,7 +68,8 @@ def get_errors(self, series: pd.Series, column: 'column.Column'):
for i in indices:
element = series[i]
errors.append(ValidationError(
message=self.get_message(element),
message=self.get_message(),
value=element,
row=i,
column=series.name
))
Expand All @@ -92,7 +93,7 @@ def __init__(self, validation: typing.Callable[[pd.Series], pd.Series], message:
self._validation = validation
super().__init__()

def get_message(self, value):
def get_message(self):
return self._message

def validate(self, series: pd.Series) -> pd.Series:
Expand All @@ -108,8 +109,8 @@ def __init__(self, min=-math.inf, max=math.inf):
self.min = min
self.max = max

def get_message(self, value: any):
return '{} was not in the range [{}, {})'.format(value, self.min, self.max)
def get_message(self):
return 'was not in the range [{}, {})'.format(self.min, self.max)

def validate(self, series: pd.Series) -> pd.Series:
series = pd.to_numeric(series)
Expand Down Expand Up @@ -145,7 +146,7 @@ def __init__(self, func):
raise PanSchArgumentError('The object "{}" passed to CanCallValidation is not callable!'.format(type))
super().__init__()

def get_message(self, value):
def get_message(self):
return 'raised an exception when the callable {} was called on it'.format(self.callable)

def can_call(self, var):
Expand Down Expand Up @@ -175,11 +176,11 @@ def __init__(self, _type):
else:
raise PanSchArgumentError('{} is not a valid type'.format(_type))

def get_message(self, value):
def get_message(self):
return 'cannot be converted to type {}'.format(self.callable)


class MatchesRegexValidation(BaseValidation):
class MatchesRegexValidation(ElementValidation):
"""
Validates that a regular expression can match somewhere in each element in this column
"""
Expand All @@ -191,8 +192,8 @@ def __init__(self, regex: typing.re.Pattern):

self.pattern = regex

def get_message(self, value):
return 'does not match the regex {}'.format(self.pattern)
def get_message(self):
return 'does not match the regex "{}"'.format(self.pattern)

def validate(self, series: pd.Series) -> pd.Series:
return series.astype(str).str.contains(self.pattern)
Expand All @@ -206,7 +207,7 @@ class TrailingWhitespaceValidation(ElementValidation):
def __init__(self):
pass

def get_message(self, value):
def get_message(self):
return 'contains trailing whitespace'

def validate(self, series: pd.Series) -> pd.Series:
Expand All @@ -221,7 +222,7 @@ class LeadingWhitespaceValidation(ElementValidation):
def __init__(self):
pass

def get_message(self, value):
def get_message(self):
return 'contains leading whitespace'

def validate(self, series: pd.Series) -> pd.Series:
Expand All @@ -236,9 +237,8 @@ class InListValidation(ElementValidation):
def __init__(self, options: typing.Iterable):
self.options = options

def get_message(self, value):
return 'has a value of "{}" which is not in the list of legal options ("{}")'.format(value,
','.join(self.options))
def get_message(self):
return 'is not in the list of legal options ({})'.format(', '.join(self.options))

def validate(self, series: pd.Series) -> pd.Series:
return series.isin(self.options)
Expand All @@ -252,8 +252,8 @@ class DateFormatValidation(ElementValidation):
def __init__(self, date_format: str):
self.date_format = date_format

def get_message(self, value):
return 'has a value of "{}", which does not match the date format string "{}"'.format(value, self.date_format)
def get_message(self):
return 'does not match the date format string "{}"'.format(self.date_format)

def valid_date(self, val):
try:
Expand Down
7 changes: 4 additions & 3 deletions validation_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ class ValidationError:
Represents a difference between the schema and data frame, found during the validation of the data frame
"""

def __init__(self, message: str, row: int = None, column: str = None):
def __init__(self, message: str, value: str = None, row: int = None, column: str = None):
self.message = message
self.value = value
self.row = row
self.column = column

def __str__(self):
if self.row and self.column:
return '{{row: {}, column: "{}"}}: {}'.format(self.row, self.column, self.message)
if self.row is not None and self.column is not None and self.value is not None:
return '{{row: {}, column: "{}"}}: "{}" {}'.format(self.row, self.column, self.value, self.message)
else:
return self.message

0 comments on commit dfa02a6

Please sign in to comment.