diff --git a/tests/library/test_collections_operators.py b/tests/library/test_collections_operators.py index 742f8b106..a2964e2df 100644 --- a/tests/library/test_collections_operators.py +++ b/tests/library/test_collections_operators.py @@ -7,7 +7,13 @@ Slice, Wrap, ) -from unitxt.processors import AddPrefix, GetSQL +from unitxt.processors import ( + AddPrefix, + FixWhiteSpace, + GetSQL, + RemoveArticles, + RemovePunctuations, +) from unitxt.test_utils.operators import check_operator from tests.utils import UnitxtTestCase @@ -204,3 +210,67 @@ def test_get_sql_with_with_clause(self): } ] check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_remove_articles_with_empty_input(self): + operator = RemoveArticles(field="text") + inputs = [{"text": ""}] + targets = [{"text": ""}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_remove_articles_with_no_articles(self): + operator = RemoveArticles(field="text") + inputs = [{"text": "Hello world!"}] + targets = [{"text": "Hello world!"}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_remove_punctuations(self): + operator = RemovePunctuations(field="text") + inputs = [ + {"text": "Hello, world!"}, + {"text": "This is a sentence with punctuation: .,;!?"}, + {"text": "No punctuation here"}, + ] + targets = [ + {"text": "Hello world"}, + {"text": "This is a sentence with punctuation "}, + {"text": "No punctuation here"}, + ] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_remove_punctuations_with_empty_input(self): + operator = RemovePunctuations(field="text") + inputs = [{"text": ""}] + targets = [{"text": ""}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_remove_punctuations_with_only_punctuations(self): + operator = RemovePunctuations(field="text") + inputs = [{"text": ".,;!?"}] + targets = [{"text": ""}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_fix_white_space(self): + operator = FixWhiteSpace(field="text") + inputs = [ + {"text": " This is a test "}, + {"text": "NoExtraSpacesHere"}, + {"text": " "}, + ] + targets = [ + {"text": "This is a test"}, + {"text": "NoExtraSpacesHere"}, + {"text": ""}, + ] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_fix_white_space_with_empty_input(self): + operator = FixWhiteSpace(field="text") + inputs = [{"text": ""}] + targets = [{"text": ""}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self) + + def test_fix_white_space_with_newline_and_tabs(self): + operator = FixWhiteSpace(field="text") + inputs = [{"text": " \tThis is a\n test with \t\nspaces."}] + targets = [{"text": "This is a test with spaces."}] + check_operator(operator=operator, inputs=inputs, targets=targets, tester=self)