From d7bc8dad6eed2dec43caceb816fa7daa4f9ce052 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 17 Feb 2022 18:34:17 +0100 Subject: [PATCH] Add stub for new method API and an initial test suite --- .../0.0.0-dev/src/Data/Text/Extensions.enso | 25 +++++++++-- .../lib/Standard/Base/0.0.0-dev/src/Main.enso | 3 +- test/Tests/src/Data/Text_Spec.enso | 45 +++++++++++++++++++ 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 36ad54bdeb14..01c2c0e37f37 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -753,7 +753,13 @@ Text.ends_with suffix = Text_Utils.ends_with this suffix Checks whether `this` contains `sequence` as its substring. Arguments: - - sequence: The text to see if it is contained in `this`. + - term: The term to find. + - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity + rules specified in the matcher. + If a `Regex_Matcher`, the term is used as a regular expression and matched + using the associated options. + + Returns: `True` if term is found within `this`. `False` otherwise. ! Unicode Equality The definition of equality includes Unicode canonicalization. I.e. two @@ -765,8 +771,21 @@ Text.ends_with suffix = Text_Utils.ends_with this suffix See if the text "Hello" contains the text "ell". "Hello".contains "ell" -Text.contains : Text -> Boolean -Text.contains sequence = Text_Utils.contains this sequence + + > Example + See if the text "Cześć" contains the text 's\u{301}' (which folds to 'ś'). + + "Cześć".contains 's\u{301}' + + > Example + See if the text "Hello!" contains the text 'LO', ignoring case sensitivity. + + "Hello!".contains 'LO' (Text_Matcher Case_Insensitive.new) +Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean +Text.contains term="" matcher=Text_Matcher.new = + # TODO + _ = matcher + Text_Utils.contains this term ## Text to JSON conversion. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso index 9ab6aadc7103..459a28a04f71 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso @@ -13,6 +13,7 @@ import project.Data.Ordering.Sort_Order import project.Data.Pair import project.Data.Range import project.Data.Text.Extensions +import project.Data.Text.Matching import project.Data.Vector import project.Error.Common import project.Error.Extensions @@ -47,7 +48,7 @@ from project.Data.Noise export all hiding Noise from project.Data.Pair export Pair from project.Data.Range export Range from project.Data.Text.Extensions export Text, Split_Kind, Line_Ending_Style -from project.Data.Text.Matching export Text, Split_Kind, Line_Ending_Style +from project.Data.Text.Matching export Case_Insensitive, Text_Matcher, Regex_Matcher from project.Error.Common export all from project.Error.Extensions export all from project.Meta.Enso_Project export all diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index da088b5e30dd..4bce3ceeb1d8 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -204,6 +204,51 @@ spec = 'a'.each builder2.append builder2.to_vector . should_equal ['a'] + Test.specify "should check for contains using Unicode normalization" <| + "Hello".contains "ell" . should_be_true + "Cześć".contains 's\u{301}' . should_be_true + "Cześć".contains 'ść' . should_be_true + "Czes\u{301}c\u{301}".contains 'ść' . should_be_true + "Cześć".contains 'sc' . should_be_false + "Czes\u{301}c\u{301}".contains 'sc' . should_be_false + + "ABC" . contains "a" . should_be_false + "" . contains "foo" . should_be_false + "abc" . contains "" . should_be_true + "" . contains "" . should_be_true + "foo foo foo" . contains "foo" . should_be_true + + "Hello!".contains "lo" . should_be_true + "Hello!".contains "Lo" . should_be_false + + Test.specify "should allow for case-insensitive contains checks" <| + "Hello!".contains 'LO' (Text_Matcher Case_Insensitive.new) . should_be_true + "FoObar" . contains "foo" (Text_Matcher Case_Insensitive.new) . should_be_true + "aaaIAAA" . contains "i" (Text_Matcher Case_Insensitive.new) . should_be_true + "Foo" . contains "bar" (Text_Matcher Case_Insensitive.new) . should_be_false + + Test.specify "should allow for Regex contains checks" <| + "Hello!".contains "[a-z]" Regex_Matcher.new . should_be_true + "foobar" . contains "b.." Regex_Matcher.new . should_be_true + "foob" . contains "b.." Regex_Matcher.new . should_be_false + + "123 meters and 4 centimeters" . contains "[0-9]+" Regex_Matcher.new . should_be_true + "foo" . contains "[0-9]+" Regex_Matcher.new . should_be_false + + "Cześć" . contains "ś" Regex_Matcher.new . should_be_true + "Cześć" . contains 's\u{301}' Regex_Matcher.new . should_be_true + 'Czes\u{301}c\u{301}' . contains 's\u{301}' Regex_Matcher.new . should_be_true + + # TODO maybe more checks for all the options + "fooBar" . contains "b.." (Regex_Matcher.new case_sensitive=Case_Insensitive.new) . should_be_true + "foar" . contains "b.." (Regex_Matcher.new case_sensitive=Case_Insensitive.new) . should_be_false + + long_text = """ + Hello from a long text. EOL + SOL Hmm... + long_text . contains "EOL.SOL" (Regex_Matcher.new dot_matches_newline=True) . should_be_true + long_text . contains "EOL.SOL" (Regex_Matcher.new dot_matches_newline=False) . should_be_false + Test.group "Regex matching" <| Test.specify "should be possible on text" <| match = "My Text: Goes Here".match "^My Text: (.+)$" mode=Regex_Mode.First