Fix tests, implement exact matching

enso-org · Feb 18, 2022 · 03e6e12 · 03e6e12
1 parent d7bc8da
commit 03e6e12
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 7 deletions.
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
@@ -780,12 +780,20 @@ Text.ends_with suffix = Text_Utils.ends_with this suffix
    > Example
      See if the text "Hello!" contains the text 'LO', ignoring case sensitivity.
 
-         "Hello!".contains 'LO' (Text_Matcher Case_Insensitive.new)
+         "Hello!".contains "LO" (Text_Matcher Case_Insensitive.new)
+
+   > Example
+     See if the text "Hello!" contains any lowercase letters, using a regex.
+
+         "Hello!".contains "[a-z]" Regex_Matcher.new
 Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
-Text.contains term="" matcher=Text_Matcher.new =
-    # TODO
-    _ = matcher
-    Text_Utils.contains this term
+Text.contains term="" matcher=Text_Matcher.new = case matcher of
+    Text_Matcher case_sensitivity -> case case_sensitivity of
+        True -> Text_Utils.contains this term
+        Case_Insensitive locale ->
+            Text_Utils.contains (this.to_lower_case locale) (term.to_lower_case locale)
+    Regex_Matcher case_sensitive multiline match_ascii dot_matches_newline comments ->
+        Error.throw "TODO"
 
 ## Text to JSON conversion.
 

diff --git a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
@@ -1,6 +1,7 @@
 package org.enso.base;
 
 import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.Normalizer2;
 import java.nio.charset.StandardCharsets;
 import java.util.regex.Pattern;
 
@@ -210,6 +211,9 @@ public static int compare_normalized(String a, String b) {
    * @return whether {@code substring} is a substring of {@code string}.
    */
   public static boolean contains(String string, String substring) {
+    Normalizer2 normalizer = Normalizer2.getNFDInstance();
+    string = normalizer.normalize(string);
+    substring = normalizer.normalize(substring);
     return string.contains(substring);
   }
 

diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso
@@ -206,11 +206,15 @@ spec =
 
         Test.specify "should check for contains using Unicode normalization" <|
             "Hello".contains "ell" . should_be_true
+
             "Cześć".contains 's\u{301}' . should_be_true
+            "Cześć".contains 'c\u{301}' . should_be_true
             "Cześć".contains 'ść' . should_be_true
-            "Czes\u{301}c\u{301}".contains 'ść' . should_be_true
+            'Czes\u{301}c\u{301}'.contains 'ś' . should_be_true
+            'Czes\u{301}c\u{301}'.contains 'ć' . should_be_true
+            'Czes\u{301}c\u{301}'.contains 'ść' . should_be_true
             "Cześć".contains 'sc' . should_be_false
-            "Czes\u{301}c\u{301}".contains 'sc' . should_be_false
+            'Czes\u{301}c\u{301}'.contains 'sc' . should_be_false
 
             "ABC" . contains "a" . should_be_false
             "" . contains "foo" . should_be_false