From 2edcb6e7d4ab51d76f3b430289d62f8978d00c7f Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 23 Jan 2023 17:29:35 +0000 Subject: [PATCH 01/14] Update Starts_With. Fix error handling on Data.fetch. --- .../0.0.0-dev/src/Data/Text/Extensions.enso | 31 ++----- .../0.0.0-dev/src/Data/Table_Conversions.enso | 15 +++ test/Tests/src/Data/Text_Spec.enso | 92 ++++--------------- 3 files changed, 40 insertions(+), 98 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 0d063b890408..90eee4be6e6b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -716,10 +716,8 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints codepoints.to_array Arguments: - prefix: The prefix to see if `self` starts with. - - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity - rules specified in the matcher. - If a `Regex_Matcher`, the term is used as a regular expression and matched - using the associated options. + - case_sensitivity: Specifies if the text values should be compared case + sensitively. ! Unicode Equality The definition of equality includes Unicode canonicalization. I.e. two @@ -727,37 +725,22 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints codepoints.to_array ensures that different ways of expressing the same character in the underlying binary representation are considered equal. - This however is not always well handled by the regex engine. The behaviour - is as follows: - 'ś' . starts_with 's' == False 's\u{301}' . starts_with 's' == False 's\u{301}' . starts_with 'ś' == True 'ś' . starts_with 's\u{301}' == True - 'ś' . starts_with 's' Regex_Matcher == True - 's\u{301}' . starts_with 's' Regex_Matcher == True - 's\u{301}' . starts_with 'ś' Regex_Matcher == True - 'ś' . starts_with 's\u{301}' Regex_Matcher == True - > Example See if the text "Hello!" starts with the specified prefix. "Hello!".starts_with "Hello" == True "Hello!".starts_with "hello" == False - "Hello!".starts_with "hello" (Text_Matcher Case_Insensitive) == True - "Hello!".starts_with "[a-z]" Regex_Matcher == False - "Hello!".starts_with "[A-Z]" Regex_Matcher == True -Text.starts_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean -Text.starts_with self prefix matcher=Text_Matcher.Case_Sensitive = case matcher of - Text_Matcher.Case_Sensitive -> Text_Utils.starts_with self prefix - Text_Matcher.Case_Insensitive locale -> + "Hello!".starts_with "hello" Case_Sensitivity.Insensitive == True +Text.starts_with : Text -> Case_Sensitivity -> Boolean +Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Sensitive -> Text_Utils.starts_with self prefix + Case_Sensitivity.Insensitive locale -> self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale - _ : Regex_Matcher -> - preprocessed_pattern = "\A(?:" + prefix + ")" - compiled_pattern = matcher.compile preprocessed_pattern - match = compiled_pattern.match self Matching_Mode.First - match.is_nothing.not ## ALIAS Check Suffix diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso index 74355acf2d32..ce5799f5ea48 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso @@ -89,3 +89,18 @@ Table.from_objects value fields=Nothing = Table.new (used_fields.zip used_values) _ : Array -> Table.from_objects (Vector.from_polyglot_array value) fields _ -> Error.throw (Illegal_Argument.Error "Invalid value for Table.from_objects. Currently must be one of JS_Object, Vector, Array, Number, Boolean, Text and Nothing are supported (got "+(Meta.get_simple_type_name value)+").") + +## Converts a Text into a Table using a regular expression pattern +Text.parse_to_table : Text -> Text -> Boolean -> Table +Text.parse_to_table self pattern row_delimiter='\n' parse_value=False = + lines = self.split row_delimiter + + regex = Regex.compile pattern + first_match = regex.match lines.first . first + + no_groups = first_match.groups.length == 1 + filter = if no_groups then (v-> v.first) else (v-> v.drop 1) + rows = lines.map l-> filter (regex.match l . first . groups) + header = if no_groups then ["Value"] else 0.up_to no_groups.length i-> "Column "+(i+1).to_text + + Table.from_rows header rows diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 4c771778ecad..59f3e4e1639e 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -884,82 +884,26 @@ spec = Test.specify "starts_with should work as shown in the examples" <| "Hello!".starts_with "Hello" . should_be_true "Hello!".starts_with "hello" . should_be_false - "Hello!".starts_with "hello" Text_Matcher.Case_Insensitive . should_be_true - "Hello!".starts_with "[a-z]" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - "Hello!".starts_with "[A-Z]" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true + "Hello!".starts_with "hello" Case_Sensitivity.Insensitive . should_be_true Test.specify "should allow for case-insensitive starts_with checks" <| - "Hello".starts_with "he" Text_Matcher.Case_Insensitive . should_be_true - - "Ściana".starts_with 's\u{301}' Text_Matcher.Case_Insensitive . should_be_true - "Ściana".starts_with 's' Text_Matcher.Case_Insensitive . should_be_false - 'S\u{301}ciana'.starts_with 'ś' Text_Matcher.Case_Insensitive . should_be_true - 'S\u{301}ciana'.starts_with 's\u{301}' Text_Matcher.Case_Insensitive . should_be_true - 'S\u{301}ciana'.starts_with 's' Text_Matcher.Case_Insensitive . should_be_false - - "ABC" . starts_with "A" Text_Matcher.Case_Insensitive . should_be_true - "ABC" . starts_with "a" Text_Matcher.Case_Insensitive . should_be_true - "ABC" . starts_with "C" Text_Matcher.Case_Insensitive . should_be_false - "" . starts_with "foo" Text_Matcher.Case_Insensitive . should_be_false - "abc" . starts_with "" Text_Matcher.Case_Insensitive . should_be_true - "" . starts_with "" Text_Matcher.Case_Insensitive . should_be_true - "fOo FOO foo" . starts_with "FoO" Text_Matcher.Case_Insensitive . should_be_true - - "Hello!".starts_with "he" Text_Matcher.Case_Insensitive . should_be_true - - Test.specify "should allow for Regex starts_with checks" <| - "Hello!".starts_with "[A-Z]" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foobar" . starts_with ".o." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foob" . starts_with ".f." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - - "123 meters and 4 centimeters" . starts_with "[0-9]+" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foo 123" . starts_with "[0-9]+" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - - # Correct non-regex behaviour for reference. - 'ś' . starts_with 's' == False - 's\u{301}' . starts_with 's' == False - 's\u{301}' . starts_with 'ś' == True - 'ś' . starts_with 's\u{301}' == True - - # These two behave as expected. - 's\u{301}' . starts_with 'ś' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) == True - 'ś' . starts_with 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) == True - - ## These two are included to document the current behaviour - (even though ideally, we would want them to return False). - 'ś' . starts_with 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) == True - 's\u{301}' . starts_with 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) == True - - "ściana" . starts_with "ś" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "ściana" . starts_with 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 's\u{301}ciana' . starts_with 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 's\u{301}ciana' . starts_with 'ś' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - - ## These two tests below are disabled due to how regex is handling - letters with accents. See the tests above for explanation. - #"ściana" . starts_with "s" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - # 's\u{301}ciana' . starts_with 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - - "fOOBar" . starts_with ".o." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true - "faaaar" . starts_with ".o." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_false - - long_text = """ - EOL - SOL Hmm... - long_text . starts_with "EOL.SOL" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true - long_text . starts_with "EOL.SOL" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false - - "aaazzz" . starts_with "a|b" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "bbbzzz" . starts_with "a|b" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "zzzaaa" . starts_with "a|b" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - "zzzbbb" . starts_with "a|b" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - "aaazzz" . starts_with "(a|b){2}" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "bbbzzz" . starts_with "(a|b){2}" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "zzzaaa" . starts_with "(a|b){2}" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - "ABC" . starts_with "\AA" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "ABC" . starts_with "\AA\z" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - "foobar" . starts_with "" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "" . starts_with "" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true + "Hello".starts_with "he" Case_Sensitivity.Insensitive . should_be_true + + "Ściana".starts_with 's\u{301}' Case_Sensitivity.Insensitive . should_be_true + "Ściana".starts_with 's' Case_Sensitivity.Insensitive . should_be_false + 'S\u{301}ciana'.starts_with 'ś' Case_Sensitivity.Insensitive . should_be_true + 'S\u{301}ciana'.starts_with 's\u{301}' Case_Sensitivity.Insensitive . should_be_true + 'S\u{301}ciana'.starts_with 's' Case_Sensitivity.Insensitive . should_be_false + + "ABC" . starts_with "A" Case_Sensitivity.Insensitive . should_be_true + "ABC" . starts_with "a" Case_Sensitivity.Insensitive . should_be_true + "ABC" . starts_with "C" Case_Sensitivity.Insensitive . should_be_false + "" . starts_with "foo" Case_Sensitivity.Insensitive . should_be_false + "abc" . starts_with "" Case_Sensitivity.Insensitive . should_be_true + "" . starts_with "" Case_Sensitivity.Insensitive . should_be_true + "fOo FOO foo" . starts_with "FoO" Case_Sensitivity.Insensitive . should_be_true + + "Hello!".starts_with "he" Case_Sensitivity.Insensitive . should_be_true Test.specify "should check for ends_with using Unicode normalization" <| "Hello".ends_with "lo" . should_be_true From be71e7d43e02c0939b4bb0a6ddb28caad5956418 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 10:11:36 +0000 Subject: [PATCH 02/14] Update Ends_With. --- .../0.0.0-dev/src/Data/Text/Extensions.enso | 22 ++---- test/Tests/src/Data/Text_Spec.enso | 72 +++++-------------- 2 files changed, 23 insertions(+), 71 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 90eee4be6e6b..61d5d2a570ca 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -748,10 +748,8 @@ Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case Arguments: - suffix: The suffix to see if `self` ends with. - - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity - rules specified in the matcher. - If a `Regex_Matcher`, the term is used as a regular expression and matched - using the associated options. + - case_sensitivity: Specifies if the text values should be compared case + sensitively. ! Unicode Equality The definition of equality includes Unicode canonicalization. I.e. two @@ -764,18 +762,12 @@ Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case "Hello World".ends_with "World" == True "Hello World".ends_with "world" == False - "Hello World".ends_with "world" (Text_Matcher Case_Insensitive) == True - "Hello World".ends_with "[A-Z][a-z]{4}" Regex_Matcher == True -Text.ends_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean -Text.ends_with self suffix matcher=Text_Matcher.Case_Sensitive = case matcher of - Text_Matcher.Case_Sensitive -> Text_Utils.ends_with self suffix - Text_Matcher.Case_Insensitive locale -> + "Hello World".ends_with "world" Case_Sensitivity.Insensitive == True +Text.ends_with : Text -> Case_Sensitivity -> Boolean +Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Sensitive -> Text_Utils.ends_with self suffix + Case_Sensitivity.Insensitive locale -> self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale - _ : Regex_Matcher -> - preprocessed_pattern = "(?:" + suffix + ")\z" - compiled_pattern = matcher.compile preprocessed_pattern - match = compiled_pattern.match self Matching_Mode.First - match.is_nothing.not ## ALIAS Contains diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 59f3e4e1639e..5e158448f56c 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -926,64 +926,24 @@ spec = Test.specify "ends_with should work as shown in the examples" <| "Hello World".ends_with "World" . should_be_true "Hello World".ends_with "world" . should_be_false - "Hello World".ends_with "world" Text_Matcher.Case_Insensitive . should_be_true - "Hello World".ends_with "[A-Z][a-z]{4}" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true + "Hello World".ends_with "world" Case_Sensitivity.Insensitive . should_be_true Test.specify "should allow for case-insensitive ends_with checks" <| - "Hello".ends_with "LO" Text_Matcher.Case_Insensitive . should_be_true - - "rzeczywistość".ends_with 'C\u{301}' Text_Matcher.Case_Insensitive . should_be_true - "rzeczywistość".ends_with 'C' Text_Matcher.Case_Insensitive . should_be_false - 'rzeczywistos\u{301}c\u{301}'.ends_with 'Ć' Text_Matcher.Case_Insensitive . should_be_true - 'rzeczywistos\u{301}c\u{301}'.ends_with 'C\u{301}' Text_Matcher.Case_Insensitive . should_be_true - 'rzeczywistos\u{301}c\u{301}'.ends_with 'C' Text_Matcher.Case_Insensitive . should_be_false - - "ABC" . ends_with "C" Text_Matcher.Case_Insensitive . should_be_true - "ABC" . ends_with "c" Text_Matcher.Case_Insensitive . should_be_true - "ABC" . ends_with "A" Text_Matcher.Case_Insensitive . should_be_false - "" . ends_with "foo" Text_Matcher.Case_Insensitive . should_be_false - "abc" . ends_with "" Text_Matcher.Case_Insensitive . should_be_true - "" . ends_with "" Text_Matcher.Case_Insensitive . should_be_true - "fOo FOO fOo" . ends_with "FoO" Text_Matcher.Case_Insensitive . should_be_true - - Test.specify "should allow for Regex ends_with checks" <| - "Hello".ends_with "[a-z]" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "Hello!".ends_with "[a-z]" Regex_Matcher.Value . should_be_false - - "foobar" . ends_with ".o." Regex_Matcher.Value . should_be_false - "foobar" . ends_with ".a." Regex_Matcher.Value . should_be_true - - "123 meters and 4 centimeters" . ends_with "[0-9]+" Regex_Matcher.Value . should_be_false - "foo 123" . ends_with "[0-9]+" Regex_Matcher.Value . should_be_true - - "rzeczywistość" . ends_with "ć" Regex_Matcher.Value . should_be_true - "rzeczywistość" . ends_with 'c\u{301}' Regex_Matcher.Value . should_be_true - 'rzeczywistos\u{301}c\u{301}' . ends_with 'c\u{301}' Regex_Matcher.Value . should_be_true - 'rzeczywistos\u{301}c\u{301}' . ends_with 'ć' Regex_Matcher.Value . should_be_true - "rzeczywistość" . ends_with "c" Regex_Matcher.Value . should_be_false - 'rzeczywistos\u{301}c\u{301}' . ends_with 'c' Regex_Matcher.Value . should_be_false - - 'rzeczywistos\u{301}c\u{301}' . ends_with 'Ć' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true - "fOOBar" . ends_with ".A." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true - "faaaar" . ends_with ".o." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_false - - long_text = """ - Hnnnn EOL - SOL - long_text . ends_with "EOL.SOL" (Regex_Matcher.Value dot_matches_newline=True) . should_be_true - long_text . ends_with "EOL.SOL" (Regex_Matcher.Value dot_matches_newline=False) . should_be_false - - "zzzaaa" . ends_with "a|b" Regex_Matcher.Value . should_be_true - "zzzbbb" . ends_with "a|b" Regex_Matcher.Value . should_be_true - "aaazzz" . ends_with "a|b" Regex_Matcher.Value . should_be_false - "bbbzzz" . ends_with "a|b" Regex_Matcher.Value . should_be_false - "zzzaaa" . ends_with "(a|b){2}" Regex_Matcher.Value . should_be_true - "zzzbbb" . ends_with "(a|b){2}" Regex_Matcher.Value . should_be_true - "aaazzz" . ends_with "(a|b){2}" Regex_Matcher.Value . should_be_false - "ABC" . ends_with "C\z" Regex_Matcher.Value . should_be_true - "ABC" . ends_with "\AC\z" Regex_Matcher.Value . should_be_false - "foobar" . ends_with "" Regex_Matcher.Value . should_be_true - "" . ends_with "" Regex_Matcher.Value . should_be_true + "Hello".ends_with "LO" Case_Sensitivity.Insensitive . should_be_true + + "rzeczywistość".ends_with 'C\u{301}' Case_Sensitivity.Insensitive . should_be_true + "rzeczywistość".ends_with 'C' Case_Sensitivity.Insensitive . should_be_false + 'rzeczywistos\u{301}c\u{301}'.ends_with 'Ć' Case_Sensitivity.Insensitive . should_be_true + 'rzeczywistos\u{301}c\u{301}'.ends_with 'C\u{301}' Case_Sensitivity.Insensitive . should_be_true + 'rzeczywistos\u{301}c\u{301}'.ends_with 'C' Case_Sensitivity.Insensitive . should_be_false + + "ABC" . ends_with "C" Case_Sensitivity.Insensitive . should_be_true + "ABC" . ends_with "c" Case_Sensitivity.Insensitive . should_be_true + "ABC" . ends_with "A" Case_Sensitivity.Insensitive . should_be_false + "" . ends_with "foo" Case_Sensitivity.Insensitive . should_be_false + "abc" . ends_with "" Case_Sensitivity.Insensitive . should_be_true + "" . ends_with "" Case_Sensitivity.Insensitive . should_be_true + "fOo FOO fOo" . ends_with "FoO" Case_Sensitivity.Insensitive . should_be_true Test.specify "should allow to pad a text" <| "Hello World!".pad 15 . should_equal "Hello World! " From e986a7ebad2f9515c6af54772696423bd7b87aa0 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 10:16:04 +0000 Subject: [PATCH 03/14] Missing import. --- .../lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 1 + 1 file changed, 1 insertion(+) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 61d5d2a570ca..f9f845b09163 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -8,6 +8,7 @@ import project.Data.Numbers.Integer import project.Data.Range.Extensions import project.Data.Range.Range import project.Data.Text.Case.Case +import project.Data.Text.Case_Sensitivity.Case_Sensitivity import project.Data.Text.Encoding.Encoding import project.Data.Text.Location import project.Data.Text.Matching_Mode From 168d6ad24f95b704f7cd1c748a6513251beedfbc Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 10:50:31 +0000 Subject: [PATCH 04/14] Update `contains`. --- .../0.0.0-dev/src/Data/Text/Extensions.enso | 36 +++-------- .../Database/0.0.0-dev/src/Data/SQL_Type.enso | 2 +- .../0.0.0-dev/src/Data/Table_Conversions.enso | 15 ----- test/Tests/src/Data/Text_Spec.enso | 59 ++++--------------- 4 files changed, 21 insertions(+), 91 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index f9f845b09163..252f628e9dce 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -773,15 +773,12 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca ## ALIAS Contains Checks whether `self` contains `sequence` as its substring. + Returns: `True` if term is found within `self`. `False` otherwise. Arguments: - term: The term to find. - - matcher: If a `Text_Matcher`, the text is compared using case-sensitivity - rules specified in the matcher. - If a `Regex_Matcher`, the term is used as a regular expression and matched - using the associated options. - - Returns: `True` if term is found within `self`. `False` otherwise. + - case_sensitivity: Specifies if the text values should be compared case + sensitively. ! Unicode Equality The definition of equality includes Unicode canonicalization. I.e. two @@ -789,19 +786,11 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca ensures that different ways of expressing the same character in the underlying binary representation are considered equal. - This however is not always well handled by the regex engine. The behaviour - is as follows: - 'ś' . contains 's' == False 's\u{301}' . contains 's' == False 's\u{301}' . contains 'ś' == True 'ś' . contains 's\u{301}' == True - 'ś' . contains 's' Regex_Matcher == True - 's\u{301}' . contains 's' Regex_Matcher == True - 's\u{301}' . contains 'ś' Regex_Matcher == True - 'ś' . contains 's\u{301}' Regex_Matcher == True - > Example See if the text "Hello" contains the text "ell". @@ -815,21 +804,12 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca > Example See if the text "Hello!" contains the text 'LO', ignoring case. - "Hello!".contains "LO" (Text_Matcher Case_Insensitive) - - > Example - See if the text "Hello!" contains any lowercase letters, using a regex. - - "Hello!".contains "[a-z]" Regex_Matcher -Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean -Text.contains self term="" matcher=Text_Matcher.Case_Sensitive = case matcher of - Text_Matcher.Case_Sensitive -> Text_Utils.contains self term - Text_Matcher.Case_Insensitive locale -> + "Hello!".contains "LO" Case_Sensitivity.Insensitive +Text.contains : Text -> Case_Sensitivity -> Boolean +Text.contains self term="" case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Sensitive -> Text_Utils.contains self term + Case_Sensitivity.Insensitive locale -> Text_Utils.contains_case_insensitive self term locale.java_locale - _ : Regex_Matcher -> - compiled_pattern = matcher.compile term - match = compiled_pattern.match self Matching_Mode.First - match.is_nothing.not ## Takes an integer and returns a new text, consisting of `count` concatenated copies of `self`. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso index 8d3a940c09f9..2f5a7efb20e8 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso @@ -140,4 +140,4 @@ type SQL_Type match more possible types. is_likely_text : Boolean is_likely_text self = - self.is_definitely_text || self.name.contains "text" Text_Matcher.Case_Insensitive + self.is_definitely_text || self.name.contains "text" Case_Sensitivity.Insensitive diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso index ce5799f5ea48..74355acf2d32 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table_Conversions.enso @@ -89,18 +89,3 @@ Table.from_objects value fields=Nothing = Table.new (used_fields.zip used_values) _ : Array -> Table.from_objects (Vector.from_polyglot_array value) fields _ -> Error.throw (Illegal_Argument.Error "Invalid value for Table.from_objects. Currently must be one of JS_Object, Vector, Array, Number, Boolean, Text and Nothing are supported (got "+(Meta.get_simple_type_name value)+").") - -## Converts a Text into a Table using a regular expression pattern -Text.parse_to_table : Text -> Text -> Boolean -> Table -Text.parse_to_table self pattern row_delimiter='\n' parse_value=False = - lines = self.split row_delimiter - - regex = Regex.compile pattern - first_match = regex.match lines.first . first - - no_groups = first_match.groups.length == 1 - filter = if no_groups then (v-> v.first) else (v-> v.drop 1) - rows = lines.map l-> filter (regex.match l . first . groups) - header = if no_groups then ["Value"] else 0.up_to no_groups.length i-> "Column "+(i+1).to_text - - Table.from_rows header rows diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 5e158448f56c..8034ea10b723 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -809,58 +809,23 @@ spec = "Hello!".contains "lo" . should_be_true "Hello!".contains "Lo" . should_be_false - Test.specify "should allow for case-insensitive contains checks" <| - "Hello!".contains 'LO' Text_Matcher.Case_Insensitive . should_be_true - "FoObar" . contains "foo" Text_Matcher.Case_Insensitive . should_be_true - "aaaIAAA" . contains "i" Text_Matcher.Case_Insensitive . should_be_true - "Foo" . contains "bar" Text_Matcher.Case_Insensitive . should_be_false - "Ściana" . contains "ś" Text_Matcher.Case_Insensitive . should_be_true - "Ściana" . contains "s" Text_Matcher.Case_Insensitive . should_be_false - - "Straße" . contains "ss" . should_be_false - "Strasse" . contains "ß" . should_be_false - "Straße" . contains "ss" Text_Matcher.Case_Insensitive . should_be_true - "Strasse" . contains "ß" Text_Matcher.Case_Insensitive . should_be_true - - Test.specify "should allow for Regex contains checks" <| - "Hello!".contains "[a-z]" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foobar" . contains "b.." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foob" . contains "b.." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - - "123 meters and 4 centimeters" . contains "[0-9]+" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "foo" . contains "[0-9]+" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - 'ś' . contains 's' . should_be_false 's\u{301}' . contains 's' . should_be_false 's\u{301}' . contains 'ś' . should_be_true 'ś' . contains 's\u{301}' . should_be_true - ## These first two cases are not really desirable, but we are - documenting here what is the current behaviour. - ## This shows what regex is doing by default and we cannot easily fix - that. - 's\u{301}' . contains 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 'ś' . contains 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - 's\u{301}' . contains 'ś' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 'ś' . contains 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - - "Cześć" . contains "ś" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - "Cześć" . contains 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 'Czes\u{301}c\u{301}' . contains 's\u{301}' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - 'Czes\u{301}c\u{301}' . contains 'ś' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true - ## These two tests below are disabled due to how regex is handling - letters with accents. See the tests above for explanation. - #"Cześć" . contains "s" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - #'Czes\u{301}c\u{301}' . contains 's' (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive) . should_be_false - - "fooBar" . contains "b.." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true - "foar" . contains "b.." (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_false - - long_text = """ - Hello from a long text. EOL - SOL Hmm... - long_text . contains "EOL.SOL" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true - long_text . contains "EOL.SOL" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false + Test.specify "should allow for case-insensitive contains checks" <| + "Hello!".contains 'LO' Case_Sensitivity.Insensitive . should_be_true + "FoObar" . contains "foo" Case_Sensitivity.Insensitive . should_be_true + "aaaIAAA" . contains "i" Case_Sensitivity.Insensitive . should_be_true + "Foo" . contains "bar" Case_Sensitivity.Insensitive . should_be_false + "Ściana" . contains "ś" Case_Sensitivity.Insensitive . should_be_true + "Ściana" . contains "s" Case_Sensitivity.Insensitive . should_be_false + + "Straße" . contains "ss" . should_be_false + "Strasse" . contains "ß" . should_be_false + "Straße" . contains "ss" Case_Sensitivity.Insensitive . should_be_true + "Strasse" . contains "ß" Case_Sensitivity.Insensitive . should_be_true Test.specify "should check for starts_with using Unicode normalization" <| "Hello".starts_with "He" . should_be_true From 0cd626947b45753141a8507e8614333f00b2ccb2 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 11:34:38 +0000 Subject: [PATCH 05/14] Add Case_Sensitivity.Deault. --- .../Base/0.0.0-dev/src/Data/Ordering/Comparator.enso | 3 ++- .../Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso | 3 ++- .../Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso | 6 ++++++ .../Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 3 +++ .../Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso | 1 + .../Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso | 6 ++---- .../lib/Standard/Database/0.0.0-dev/src/Data/Table.enso | 2 ++ .../0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso | 2 +- 8 files changed, 19 insertions(+), 7 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso index 08e8bedd91d3..81044e459658 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso @@ -33,12 +33,13 @@ new custom_comparator=Nothing = Specifies how to compare Text values within the Comparator. for_text_ordering : Text_Ordering -> ObjectComparator for_text_ordering text_ordering = - case_sensitivity = text_ordering.case_sensitivity.if_nothing Case_Sensitivity.Sensitive + case_sensitivity = text_ordering.case_sensitivity case text_ordering.sort_digits_as_numbers of True -> txt_cmp a b = Natural_Order.compare a b case_sensitivity . to_sign new.withCustomTextComparator txt_cmp False -> case case_sensitivity of + Case_Sensitivity.Default -> new Case_Sensitivity.Sensitive -> new Case_Sensitivity.Insensitive locale -> new.withCaseInsensitivity locale.java_locale diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso index 90eb6f1f4c62..341e10df0840 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Natural_Order.enso @@ -23,8 +23,9 @@ polyglot java import com.ibm.icu.text.BreakIterator compare : Text -> Text -> Case_Sensitivity -> Ordering compare text1 text2 case_sensitivity=Case_Sensitivity.Sensitive = compare_text = case case_sensitivity of - Case_Sensitivity.Insensitive locale -> a -> b -> a.compare_to_ignore_case b locale + Case_Sensitivity.Default -> _.compare_to _ Case_Sensitivity.Sensitive -> _.compare_to _ + Case_Sensitivity.Insensitive locale -> a -> b -> a.compare_to_ignore_case b locale iter1 = BreakIterator.getCharacterInstance iter1.setText text1 diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso index 9469c88be9e7..7da05971b54c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso @@ -3,6 +3,11 @@ import project.Data.Locale.Locale polyglot java import org.enso.base.text.TextFoldingStrategy type Case_Sensitivity + ## Use the default case-sensitivity for the environment. + For in-memory operations, will be case sensitive. + For database operations, will follow the collation default. + Default + ## Represents a case-sensitive comparison mode. Sensitive @@ -16,6 +21,7 @@ type Case_Sensitivity Creates a Java `TextFoldingStrategy` from the case sensitivity setting. folding_strategy : Case_Sensitivity -> TextFoldingStrategy folding_strategy case_sensitivity = case case_sensitivity of + Case_Sensitivity.Default -> TextFoldingStrategy.unicodeNormalizedFold Case_Sensitivity.Sensitive -> TextFoldingStrategy.unicodeNormalizedFold Case_Sensitivity.Insensitive locale -> TextFoldingStrategy.caseInsensitiveFold locale.java_locale diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 252f628e9dce..6a8611a45e57 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -739,6 +739,7 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints codepoints.to_array "Hello!".starts_with "hello" Case_Sensitivity.Insensitive == True Text.starts_with : Text -> Case_Sensitivity -> Boolean Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Default -> self.starts_with prefix Case_Sensitivity.Sensitive Case_Sensitivity.Sensitive -> Text_Utils.starts_with self prefix Case_Sensitivity.Insensitive locale -> self.take (Index_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale @@ -766,6 +767,7 @@ Text.starts_with self prefix case_sensitivity=Case_Sensitivity.Sensitive = case "Hello World".ends_with "world" Case_Sensitivity.Insensitive == True Text.ends_with : Text -> Case_Sensitivity -> Boolean Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Default -> self.ends_with suffix Case_Sensitivity.Sensitive Case_Sensitivity.Sensitive -> Text_Utils.ends_with self suffix Case_Sensitivity.Insensitive locale -> self.take (Index_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale @@ -807,6 +809,7 @@ Text.ends_with self suffix case_sensitivity=Case_Sensitivity.Sensitive = case ca "Hello!".contains "LO" Case_Sensitivity.Insensitive Text.contains : Text -> Case_Sensitivity -> Boolean Text.contains self term="" case_sensitivity=Case_Sensitivity.Sensitive = case case_sensitivity of + Case_Sensitivity.Default -> self.contains term Case_Sensitivity.Sensitive Case_Sensitivity.Sensitive -> Text_Utils.contains self term Case_Sensitivity.Insensitive locale -> Text_Utils.contains_case_insensitive self term locale.java_locale diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso index b4968e8797f6..fe430e2a7657 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex_Matcher.enso @@ -44,6 +44,7 @@ type Regex_Matcher compile : Text -> Pattern compile self pattern = case_insensitive = case self.case_sensitivity of + Case_Sensitivity.Default -> False Case_Sensitivity.Sensitive -> False ## TODO [RW] Currently locale is not supported in case-insensitive Regex matching. There are plans to revisit it: diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso index 07099d37597e..37dd7e4a123e 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Ordering.enso @@ -40,11 +40,9 @@ type Text_Ordering Case_Insensitive (locale:Locale=Locale.default) (sort_digits_as_numbers:Boolean=False) ## PRIVATE - Returns this ordering's case sensitivity setting. It will return - `Nothing` for the `Default` ordering, meaning that the case sensitivity - is to be determined by the backend. + Returns this ordering's case sensitivity setting. case_sensitivity : Case_Sensitivity case_sensitivity self = case self of - Text_Ordering.Default _ -> Nothing + Text_Ordering.Default _ -> Case_Sensitivity.Default Text_Ordering.Case_Sensitive _ -> Case_Sensitivity.Sensitive Text_Ordering.Case_Insensitive locale _ -> Case_Sensitivity.Insensitive locale diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 5525f1f262a5..5d01396da1c4 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -676,7 +676,9 @@ type Table _ -> Nothing key_columns = Warning.map_warnings_and_errors warning_mapper <| self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems + ## ToDo: This should be DB default text_case_insensitive = case case_sensitivity of + Case_Sensitivity.Default -> False Case_Sensitivity.Sensitive -> False Case_Sensitivity.Insensitive locale -> Helpers.assume_default_locale locale <| diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 983948c48f7b..306890e0ac97 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -267,7 +267,7 @@ make_order_descriptor internal_column sort_direction text_ordering = ## In the future we can modify this error to suggest using a custom defined collation. if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation.Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else case text_ordering.case_sensitivity of - Nothing -> + Case_Sensitivity.Default -> Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing Case_Sensitivity.Sensitive -> Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic" From e9121f4d32815c733249f244ce446d967023d7aa Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 11:56:38 +0000 Subject: [PATCH 06/14] Fix issues with Case_Sensitivity.Deault. --- .../Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso | 2 +- .../Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 6bd2f5f86fba..ac583c0150b3 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -70,7 +70,7 @@ type SQLite_Dialect True -> if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation.Error "Natural ordering is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.") else case text_ordering.case_sensitivity of - Nothing -> + Case_Sensitivity.Default -> Order_Descriptor.Value internal_column.expression sort_direction collation=Nothing Case_Sensitivity.Sensitive -> Order_Descriptor.Value internal_column.expression sort_direction collation="BINARY" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 990cd8b30764..b83be0d5b2c4 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -306,6 +306,7 @@ sort_columns : Vector -> Sort_Direction -> Text_Ordering -> Vector sort_columns internal_columns order text_ordering = case_sensitivity = text_ordering.case_sensitivity.if_nothing Case_Sensitivity.Sensitive mapper = case case_sensitivity of + Case_Sensitivity.Default -> _.name Case_Sensitivity.Sensitive -> _.name Case_Sensitivity.Insensitive locale -> col -> col.name.to_case_insensitive_key locale=locale From f5f6ed3a6405304a4e3734b791a421c8354d7871 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 12:30:32 +0000 Subject: [PATCH 07/14] Add tests for Case_Sensitivity.Default on Text. --- test/Tests/src/Data/Text_Spec.enso | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 8034ea10b723..0a0b04875c1a 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -787,6 +787,9 @@ spec = Test.specify "should check for contains using Unicode normalization" <| "Hello".contains "ell" . should_be_true + "Hello".contains "eLl" . should_be_false + "Hello".contains "ell" Case_Sensitivity.Default . should_be_true + "Hello".contains "eLl" Case_Sensitivity.Default . should_be_false "Cześć".contains 's\u{301}' . should_be_true "Cześć".contains 'c\u{301}' . should_be_true @@ -806,6 +809,12 @@ spec = "" . contains "" . should_be_true "foo foo foo" . contains "foo" . should_be_true + "ABC" . contains "a" Case_Sensitivity.Default . should_be_false + "" . contains "foo" Case_Sensitivity.Default . should_be_false + "abc" . contains "" Case_Sensitivity.Default . should_be_true + "" . contains "" Case_Sensitivity.Default . should_be_true + "foo foo foo" . contains "foo" Case_Sensitivity.Default . should_be_true + "Hello!".contains "lo" . should_be_true "Hello!".contains "Lo" . should_be_false @@ -829,6 +838,9 @@ spec = Test.specify "should check for starts_with using Unicode normalization" <| "Hello".starts_with "He" . should_be_true + "Hello".starts_with "he" . should_be_false + "Hello".starts_with "He" Case_Sensitivity.Default . should_be_true + "Hello".starts_with "he" Case_Sensitivity.Default . should_be_false "Ściana".starts_with 'S\u{301}' . should_be_true "Ściana".starts_with 'Ś' . should_be_true @@ -839,6 +851,8 @@ spec = "ABC" . starts_with "A" . should_be_true "ABC" . starts_with "a" . should_be_false + "ABC" . starts_with "A" Case_Sensitivity.Default . should_be_true + "ABC" . starts_with "a" Case_Sensitivity.Default . should_be_false "" . starts_with "foo" . should_be_false "abc" . starts_with "" . should_be_true "" . starts_with "" . should_be_true @@ -873,6 +887,8 @@ spec = Test.specify "should check for ends_with using Unicode normalization" <| "Hello".ends_with "lo" . should_be_true "Hello".ends_with "LO" . should_be_false + "Hello".ends_with "lo" Case_Sensitivity.Default . should_be_true + "Hello".ends_with "LO" Case_Sensitivity.Default . should_be_false "rzeczywistość".ends_with 'c\u{301}' . should_be_true "rzeczywistość".ends_with 'ć' . should_be_true From ace611083eda7e487a0549814020f86aa571ff53 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 13:30:27 +0000 Subject: [PATCH 08/14] Adjust distinct to support Case_Sensitivity. Add SQLite full support. --- .../Database/0.0.0-dev/src/Data/Dialect.enso | 2 +- .../Database/0.0.0-dev/src/Data/Table.enso | 14 +++++++------- .../0.0.0-dev/src/Internal/Base_Generator.enso | 6 ++++-- .../Common/Database_Distinct_Helper.enso | 10 +++++----- .../src/Internal/Postgres/Postgres_Dialect.enso | 6 +++--- .../src/Internal/SQLite/SQLite_Dialect.enso | 16 ++++++++++++---- .../Standard/Table/0.0.0-dev/src/Data/Table.enso | 2 +- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index 33278f9b364c..1c8a3c1157c3 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -59,7 +59,7 @@ type Dialect ## PRIVATE Prepares a distinct operation. - prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table + prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table prepare_distinct self = Unimplemented.throw "This is an interface only." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 5d01396da1c4..0369125b0627 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -669,21 +669,21 @@ type Table - If floating points values are present in the distinct columns, a `Floating_Point_Grouping` warning. distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table - distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning = + distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Default on_problems=Report_Warning = problem_builder = Problem_Builder.new warning_mapper error = case error of No_Output_Columns -> Maybe.Some No_Input_Columns_Selected _ -> Nothing key_columns = Warning.map_warnings_and_errors warning_mapper <| self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems - ## ToDo: This should be DB default - text_case_insensitive = case case_sensitivity of - Case_Sensitivity.Default -> False - Case_Sensitivity.Sensitive -> False + + text_case_sensitivity = case case_sensitivity of Case_Sensitivity.Insensitive locale -> Helpers.assume_default_locale locale <| - True - new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder + case_sensitivity + _ -> case_sensitivity + + new_table = self.connection.dialect.prepare_distinct self key_columns text_case_sensitivity problem_builder problem_builder.attach_problems_before on_problems new_table ## Joins two tables according to the specified join conditions. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index 3d6a3e74c09f..d7d40cd6a3bb 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -178,7 +178,7 @@ base_dialect = functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]] - text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case] + text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case, make_case_sensitive] nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]] contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]] base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains) @@ -293,11 +293,13 @@ generate_from_part dialect from_spec = case from_spec of sub = generate_query dialect (Query.Select columns context) sub.paren ++ alias dialect as_name - ## PRIVATE fold_case = lift_unary_op "FOLD_CASE" arg-> code "LOWER(UPPER(" ++ arg ++ "))" +## PRIVATE +make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" arg->arg + ## PRIVATE simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b-> code "LOWER(UPPER(" ++ a ++ ")) = LOWER(UPPER(" ++ b ++ "))" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso index b28a79be20f1..2cc6e096517b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso @@ -5,13 +5,13 @@ from Standard.Table.Errors import Floating_Point_Grouping import project.Internal.IR.SQL_Expression.SQL_Expression ## PRIVATE -make_distinct_expression text_case_insensitive problem_builder key_column = +make_distinct_expression text_case_sensitivity problem_builder key_column = if key_column.sql_type.is_definitely_double then problem_builder.report_other_warning (Floating_Point_Grouping.Error key_column.name) expr = key_column.expression - needs_case_fold = text_case_insensitive && key_column.sql_type.is_definitely_text - case needs_case_fold of - True -> SQL_Expression.Operation "FOLD_CASE" [expr] - False -> expr + if key_column.sql_type.is_definitely_text.not then expr else case text_case_sensitivity of + Case_Sensitivity.Insensitive _ -> SQL_Expression.Operation "FOLD_CASE" [expr] + Case_Sensitivity.Sensitive -> SQL_Expression.Operation "MAKE_CASE_SENSITIVE" [expr] + Case_Sensitivity.Default -> expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 306890e0ac97..5749904586c1 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -81,13 +81,13 @@ type Postgres_Dialect ## PRIVATE Prepares a distinct operation. - prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table - prepare_distinct self table key_columns text_case_insensitive problem_builder = + prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table + prepare_distinct self table key_columns text_case_sensitivity problem_builder = setup = table.context.as_subquery table.name+"_inner" [table.internal_columns] new_columns = setup.new_columns.first column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c] new_key_columns = key_columns.map c-> column_mapping.at c.name - distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder) + distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_sensitivity problem_builder) new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions table.updated_context_and_columns new_context new_columns subquery=True diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index ac583c0150b3..b9158dd19928 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -100,19 +100,19 @@ type SQLite_Dialect ## PRIVATE Prepares a distinct operation. - prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table - prepare_distinct self table key_columns text_case_insensitive problem_builder = + prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table + prepare_distinct self table key_columns text_case_sensitivity problem_builder = setup = table.context.as_subquery table.name+"_inner" [table.internal_columns] new_columns = setup.new_columns.first column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c] new_key_columns = key_columns.map c-> column_mapping.at c.name - distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder) + distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_sensitivity problem_builder) new_context = Context.for_subquery setup.subquery . set_groups distinct_expressions table.updated_context_and_columns new_context new_columns subquery=True ## PRIVATE make_internal_generator_dialect = - text = [starts_with, contains, ends_with]+concat_ops + text = [starts_with, contains, ends_with, fold_case, make_case_sensitive]+concat_ops counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]] stats = [agg_stddev_pop, agg_stddev_samp] arith_extensions = [decimal_div] @@ -235,6 +235,14 @@ ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> res = str ++ " LIKE ('%' || " ++ sub ++ ")" res.paren +## PRIVATE +fold_case = Base_Generator.lift_unary_op "FOLD_CASE" arg-> + code "((" ++ arg ++ ") COLLATE NOCASE)" + +## PRIVATE +make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg-> + code "((" ++ arg ++ ") COLLATE BINARY)" + ## PRIVATE make_contains_expr expr substring = code "instr(" ++ expr ++ ", " ++ substring ++ ") > 0" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index dcd1bf062d9c..dacf8c9ef914 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -675,7 +675,7 @@ type Table - If floating points values are present in the distinct columns, a `Floating_Point_Grouping` warning. distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table - distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning = + distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Default on_problems=Report_Warning = warning_mapper error = case error of No_Output_Columns -> Maybe.Some No_Input_Columns_Selected _ -> Nothing From 03efd67fee83eaf07d66d9e45599b8d4cb054e0f Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 14:06:42 +0000 Subject: [PATCH 09/14] Tidy up. --- .../src/Data/Ordering/Comparator.enso | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso index 81044e459658..4dc584aaabd0 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Ordering/Comparator.enso @@ -32,14 +32,12 @@ new custom_comparator=Nothing = - text_ordering: Specifies how to compare Text values within the Comparator. for_text_ordering : Text_Ordering -> ObjectComparator -for_text_ordering text_ordering = - case_sensitivity = text_ordering.case_sensitivity - case text_ordering.sort_digits_as_numbers of - True -> - txt_cmp a b = Natural_Order.compare a b case_sensitivity . to_sign - new.withCustomTextComparator txt_cmp - False -> case case_sensitivity of - Case_Sensitivity.Default -> new - Case_Sensitivity.Sensitive -> new - Case_Sensitivity.Insensitive locale -> - new.withCaseInsensitivity locale.java_locale +for_text_ordering text_ordering = case text_ordering.sort_digits_as_numbers of + True -> + txt_cmp a b = Natural_Order.compare a b text_ordering.case_sensitivity . to_sign + new.withCustomTextComparator txt_cmp + False -> case text_ordering.case_sensitivity of + Case_Sensitivity.Default -> new + Case_Sensitivity.Sensitive -> new + Case_Sensitivity.Insensitive locale -> + new.withCaseInsensitivity locale.java_locale From fb5d9163607aa0c70bc041f3bd5fa25db61ff269 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 14:08:50 +0000 Subject: [PATCH 10/14] Changelog. --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fe03969fbc3..afb7f8b12ba2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -282,6 +282,8 @@ - [Implemented `Table.union` for the in-memory backend.][4052] - [Implemented `Table.cross_join` and `Table.zip` for the in-memory backend.][4063] +- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new + simpler API.][4078] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -441,6 +443,7 @@ [4044]: https://github.com/enso-org/enso/pull/4044 [4052]: https://github.com/enso-org/enso/pull/4052 [4063]: https://github.com/enso-org/enso/pull/4063 +[4078]: https://github.com/enso-org/enso/pull/4078 #### Enso Compiler From 9720eeec4841fc81693b3a6d5ea9a8e3e2c84a81 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 14:46:44 +0000 Subject: [PATCH 11/14] Fix benchmark. --- test/Benchmarks/src/Text/Contains.enso | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/Benchmarks/src/Text/Contains.enso b/test/Benchmarks/src/Text/Contains.enso index e9bd99365969..6e13fd1b3596 100644 --- a/test/Benchmarks/src/Text/Contains.enso +++ b/test/Benchmarks/src/Text/Contains.enso @@ -20,12 +20,8 @@ bench = big_template = make_alpha_template 10000 big_random = Vector.new 200 _-> faker.string_value big_template - Bench.measure (check_all big_random ["AAAAAA"] Text_Matcher.Case_Sensitive) suite_prefix+" exact" 10 10 - Bench.measure (check_all big_random ["AAAAAA"] Text_Matcher.Case_Insensitive) suite_prefix+" case-insensitive" 10 10 - Bench.measure (check_all big_random ["AAAAAA"] Regex_Matcher.Value) suite_prefix+" exact regex" 10 10 - Bench.measure (check_all big_random ["AAAAAA"] (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive)) suite_prefix+" case-insensitive regex" 10 10 - Bench.measure (check_all big_random ["A.{5000}A"] Regex_Matcher.Value) suite_prefix+" const-width regex" 10 10 - Bench.measure (check_all big_random ["AAAAA.*AAAAA"] Regex_Matcher.Value) suite_prefix+" wildcard regex" 10 10 + Bench.measure (check_all big_random ["AAAAAA"] Case_Sensitivity.Sensitive) suite_prefix+" exact" 10 10 + Bench.measure (check_all big_random ["AAAAAA"] Case_Sensitivity.Insensitive) suite_prefix+" case-insensitive" 10 10 bench_contains "Text.contains " (Faker.upper_case_letters + Faker.lower_case_letters + 'ąę\u{301}\u{302}\u{303}\u{321}'.char_vector) From 240d1f32c6555f817ff3313fdd8c905ad394efe9 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 24 Jan 2023 18:12:09 +0000 Subject: [PATCH 12/14] PR comments. --- .../Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso | 4 ++-- .../lib/Standard/Database/0.0.0-dev/src/Data/Table.enso | 8 +------- .../Database/0.0.0-dev/src/Internal/Base_Generator.enso | 3 ++- .../src/Internal/Common/Database_Distinct_Helper.enso | 5 ++++- .../0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso | 4 ++-- .../0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso | 4 ++-- 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso index 7da05971b54c..43d30d9f05fe 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso @@ -4,8 +4,8 @@ polyglot java import org.enso.base.text.TextFoldingStrategy type Case_Sensitivity ## Use the default case-sensitivity for the environment. - For in-memory operations, will be case sensitive. - For database operations, will follow the collation default. + For in-memory operations, it will be case sensitive. + For database operations, it will follow the collation default. Default ## Represents a case-sensitive comparison mode. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 0369125b0627..d4eb9ddfa174 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -677,13 +677,7 @@ type Table key_columns = Warning.map_warnings_and_errors warning_mapper <| self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems - text_case_sensitivity = case case_sensitivity of - Case_Sensitivity.Insensitive locale -> - Helpers.assume_default_locale locale <| - case_sensitivity - _ -> case_sensitivity - - new_table = self.connection.dialect.prepare_distinct self key_columns text_case_sensitivity problem_builder + new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder problem_builder.attach_problems_before on_problems new_table ## Joins two tables according to the specified join conditions. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index d7d40cd6a3bb..af9d432a11bd 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -298,7 +298,8 @@ fold_case = lift_unary_op "FOLD_CASE" arg-> code "LOWER(UPPER(" ++ arg ++ "))" ## PRIVATE -make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" arg->arg +make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" arg-> + Error.throw <| Unsupported_Database_Operation.Error ("Case sensitive operations are not currently supported by this connection.") ## PRIVATE simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b-> diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso index 2cc6e096517b..66a0c3724f3e 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso @@ -2,6 +2,7 @@ from Standard.Base import all from Standard.Table.Errors import Floating_Point_Grouping +import project.Internal.Helpers import project.Internal.IR.SQL_Expression.SQL_Expression ## PRIVATE @@ -12,6 +13,8 @@ make_distinct_expression text_case_sensitivity problem_builder key_column = expr = key_column.expression if key_column.sql_type.is_definitely_text.not then expr else case text_case_sensitivity of - Case_Sensitivity.Insensitive _ -> SQL_Expression.Operation "FOLD_CASE" [expr] + Case_Sensitivity.Insensitive locale -> + Helpers.assume_default_locale locale <| + SQL_Expression.Operation "FOLD_CASE" [expr] Case_Sensitivity.Sensitive -> SQL_Expression.Operation "MAKE_CASE_SENSITIVE" [expr] Case_Sensitivity.Default -> expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 5749904586c1..98562d4c30ec 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -82,12 +82,12 @@ type Postgres_Dialect ## PRIVATE Prepares a distinct operation. prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table - prepare_distinct self table key_columns text_case_sensitivity problem_builder = + prepare_distinct self table key_columns case_sensitivity problem_builder = setup = table.context.as_subquery table.name+"_inner" [table.internal_columns] new_columns = setup.new_columns.first column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c] new_key_columns = key_columns.map c-> column_mapping.at c.name - distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_sensitivity problem_builder) + distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression case_sensitivity problem_builder) new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions table.updated_context_and_columns new_context new_columns subquery=True diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index b9158dd19928..464d6e0bfed5 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -101,12 +101,12 @@ type SQLite_Dialect ## PRIVATE Prepares a distinct operation. prepare_distinct : Table -> Vector -> Case_Sensitivity -> Problem_Builder -> Table - prepare_distinct self table key_columns text_case_sensitivity problem_builder = + prepare_distinct self table key_columns case_sensitivity problem_builder = setup = table.context.as_subquery table.name+"_inner" [table.internal_columns] new_columns = setup.new_columns.first column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c] new_key_columns = key_columns.map c-> column_mapping.at c.name - distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_sensitivity problem_builder) + distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression case_sensitivity problem_builder) new_context = Context.for_subquery setup.subquery . set_groups distinct_expressions table.updated_context_and_columns new_context new_columns subquery=True From 4262c5ffed027c273626d39853cbd36f23c0644a Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 25 Jan 2023 08:36:35 +0000 Subject: [PATCH 13/14] Trailing space... --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 216cc71b751a..a334fe0f4163 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -287,7 +287,7 @@ - [Implemented `Table.union` for the in-memory backend.][4052] - [Implemented `Table.cross_join` and `Table.zip` for the in-memory backend.][4063] -- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new +- [Updated `Text.starts_with`, `Text.ends_with` and `Text.contains` to new simpler API.][4078] [debug-shortcuts]: From 09578e01a8b845c6bd370221ceda5a101f64abfc Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Wed, 25 Jan 2023 11:03:22 +0000 Subject: [PATCH 14/14] Add Case Sensitive support to Postgres. Sort default on Table.distinct. --- .../lib/Standard/Database/0.0.0-dev/src/Data/Table.enso | 2 +- .../Database/0.0.0-dev/src/Internal/Base_Generator.enso | 2 +- .../0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso | 4 ++++ distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 3e6e4ef4ab6e..8c1f0161b178 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -673,7 +673,7 @@ type Table - If floating points values are present in the distinct columns, a `Floating_Point_Grouping` warning. distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table - distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning = + distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Default on_problems=Report_Warning = key_columns = self.columns_helper.select_columns selector=columns reorder=True on_problems=Problem_Behavior.Report_Error . catch No_Output_Columns _-> Error.throw No_Input_Columns_Selected problem_builder = Problem_Builder.new diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index af9d432a11bd..44c6c6b11ffc 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -298,7 +298,7 @@ fold_case = lift_unary_op "FOLD_CASE" arg-> code "LOWER(UPPER(" ++ arg ++ "))" ## PRIVATE -make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" arg-> +make_case_sensitive = lift_unary_op "MAKE_CASE_SENSITIVE" _-> Error.throw <| Unsupported_Database_Operation.Error ("Case sensitive operations are not currently supported by this connection.") ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 98562d4c30ec..2adb785cf6a9 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -250,6 +250,10 @@ ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> res = str ++ " LIKE CONCAT('%', " ++ sub ++ ")" res.paren +## PRIVATE +make_case_sensitive = Base_Generator.lift_unary_op "MAKE_CASE_SENSITIVE" arg-> + code "((" ++ arg ++ ') COLLATE "C.utf8")' + ## PRIVATE make_contains_expr expr substring = code "position(" ++ substring ++ " in " ++ expr ++ ") > 0" diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 14f5290270ed..cd77dc63529c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -680,7 +680,7 @@ type Table - If floating points values are present in the distinct columns, a `Floating_Point_Grouping` warning. distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table - distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning = + distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Default on_problems=Report_Warning = key_columns = self.columns_helper.select_columns selector=columns reorder=True on_problems=Problem_Behavior.Report_Error . catch No_Output_Columns _-> Error.throw No_Input_Columns_Selected java_columns = key_columns.map .java_column