Skip to content

Commit

Permalink
Fix tests, implement exact matching
Browse files Browse the repository at this point in the history
  • Loading branch information
radeusgd committed Feb 18, 2022
1 parent d7bc8da commit 03e6e12
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -780,12 +780,20 @@ Text.ends_with suffix = Text_Utils.ends_with this suffix
> Example
See if the text "Hello!" contains the text 'LO', ignoring case sensitivity.

"Hello!".contains 'LO' (Text_Matcher Case_Insensitive.new)
"Hello!".contains "LO" (Text_Matcher Case_Insensitive.new)

> Example
See if the text "Hello!" contains any lowercase letters, using a regex.

"Hello!".contains "[a-z]" Regex_Matcher.new
Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
Text.contains term="" matcher=Text_Matcher.new =
# TODO
_ = matcher
Text_Utils.contains this term
Text.contains term="" matcher=Text_Matcher.new = case matcher of
Text_Matcher case_sensitivity -> case case_sensitivity of
True -> Text_Utils.contains this term
Case_Insensitive locale ->
Text_Utils.contains (this.to_lower_case locale) (term.to_lower_case locale)
Regex_Matcher case_sensitive multiline match_ascii dot_matches_newline comments ->
Error.throw "TODO"

## Text to JSON conversion.

Expand Down
4 changes: 4 additions & 0 deletions std-bits/base/src/main/java/org/enso/base/Text_Utils.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.enso.base;

import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.Normalizer2;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -210,6 +211,9 @@ public static int compare_normalized(String a, String b) {
* @return whether {@code substring} is a substring of {@code string}.
*/
public static boolean contains(String string, String substring) {
Normalizer2 normalizer = Normalizer2.getNFDInstance();
string = normalizer.normalize(string);
substring = normalizer.normalize(substring);
return string.contains(substring);
}

Expand Down
8 changes: 6 additions & 2 deletions test/Tests/src/Data/Text_Spec.enso
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,15 @@ spec =

Test.specify "should check for contains using Unicode normalization" <|
"Hello".contains "ell" . should_be_true

"Cześć".contains 's\u{301}' . should_be_true
"Cześć".contains 'c\u{301}' . should_be_true
"Cześć".contains 'ść' . should_be_true
"Czes\u{301}c\u{301}".contains 'ść' . should_be_true
'Czes\u{301}c\u{301}'.contains 'ś' . should_be_true
'Czes\u{301}c\u{301}'.contains 'ć' . should_be_true
'Czes\u{301}c\u{301}'.contains 'ść' . should_be_true
"Cześć".contains 'sc' . should_be_false
"Czes\u{301}c\u{301}".contains 'sc' . should_be_false
'Czes\u{301}c\u{301}'.contains 'sc' . should_be_false

"ABC" . contains "a" . should_be_false
"" . contains "foo" . should_be_false
Expand Down

0 comments on commit 03e6e12

Please sign in to comment.