From 7b283f33bb4d7da05269a7870a973df7a4005fad Mon Sep 17 00:00:00 2001 From: Nikita Pekin Date: Mon, 4 Apr 2022 12:19:17 -0400 Subject: [PATCH 1/3] feat: Add `Text.reverse` extension for string reversal This commit implements `Text.reverse` as an extension on `Text`. `Text.reverse` reverses strings. For example: `"Hello World!".reverse` results in `"!dlroW olleH"`. Strings are reversed by their Extended Grapheme Clusters not by their characters. This has some performance implications because we need to find these grapheme cluster boundaries when iterating. To do so, `BreakIterator.getCharacterInstance` is used. Implements: https://www.pivotaltracker.com/n/projects/2539304/stories/181265419 --- CHANGELOG.md | 2 ++ .../0.0.0-dev/src/Data/Text/Extensions.enso | 23 +++++++++++++++++++ test/Tests/src/Data/Text_Spec.enso | 11 +++++++++ 3 files changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c8d5d350b1f..23b6d6233ce7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ - [Added warning handling to `Table.aggregate`][3349] - [Improved performance of `Table.aggregate` and full warnings implementation] [3364] +- [Implemented `Text.reverse`][3377] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -140,6 +141,7 @@ [3349]: https://github.com/enso-org/enso/pull/3349 [3361]: https://github.com/enso-org/enso/pull/3361 [3364]: https://github.com/enso-org/enso/pull/3364 +[3377]: https://github.com/enso-org/enso/pull/3377 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 1dfb3fea5f87..c025dda80dcd 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -25,6 +25,7 @@ export Standard.Base.Data.Text.Line_Ending_Style polyglot java import com.ibm.icu.lang.UCharacter polyglot java import com.ibm.icu.text.BreakIterator +polyglot java import java.lang.StringBuilder polyglot java import org.enso.base.Text_Utils ## UNSTABLE @@ -58,6 +59,28 @@ Text.length = @Tail_Call count (accum + 1) iterator.next count 0 iterator.next +## Returns a new `Text` object with the characters in the reverse order of the input. + + ! What is a Character? + A character is defined as an Extended Grapheme Cluster, see Unicode + Standard Annex 29. This is the smallest unit that still has semantic + meaning in most text-processing applications. + + > Example + Reverse the text "Hello, world!". + + "Hello, world!".reverse +Text.reverse : Text +Text.reverse = + reverseStringBuilder = StringBuilder.new this.length + iterator = BreakIterator.getCharacterInstance + iterator.setText this + + iterate prev next = if next == -1 then reverseStringBuilder.toString else + reverseStringBuilder.append (Text_Utils.substring this next prev) + @Tail_Call iterate next iterator.previous + iterate iterator.last iterator.previous + ## Applies the provided `function` to each character in `this`. Arguments: diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 72d2831f9f5b..19a3ee6d7802 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -496,6 +496,17 @@ spec = str.at -6 . should_fail_with Index_Out_Of_Bounds_Error str.at 5 . should_fail_with Index_Out_Of_Bounds_Error + Test.specify "should be able to reverse characters" <| + "Hello World!".reverse . should_equal "!dlroW olleH" + + "".reverse . should_equal "" + 'e\u{301}'.reverse . should_equal 'e\u{301}' + 'e\u{301}\u00E9'.reverse . should_equal '\u00E9e\u{301}' + 'e\u{321}\u{360}'.reverse . should_equal 'e\u{321}\u{360}' + 'Iñtërnâtiônàlizætiøn☃💩'.reverse . should_equal '💩☃nøitæzilànôitânrëtñI' + 'ほげほげ'.reverse . should_equal 'げほげほ' + '\u{10000}'.reverse . should_equal '\u{10000}' + Test.specify "should allow to iterate over characters" <| str = kshi + accent_1 + accent_2 + 'abc' builder = Vector.new_builder From 221a58818b3daeef487f9df4bd4fdfd0b08eb55b Mon Sep 17 00:00:00 2001 From: Nikita Pekin Date: Tue, 5 Apr 2022 09:00:16 -0400 Subject: [PATCH 2/3] style: Add missing whitespace to documentation example for `Text.reverse` --- .../lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index c025dda80dcd..e62757460502 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -67,7 +67,7 @@ Text.length = meaning in most text-processing applications. > Example - Reverse the text "Hello, world!". + Reverse the text "Hello, world!". "Hello, world!".reverse Text.reverse : Text From 2b9425ff64b94a35f72b576a39a2f8de47411b57 Mon Sep 17 00:00:00 2001 From: Nikita Pekin Date: Tue, 5 Apr 2022 10:05:07 -0400 Subject: [PATCH 3/3] style: Indent code block to 4 spaces relative to caption --- .../lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index e62757460502..0dd6d8c7bc8b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -69,7 +69,7 @@ Text.length = > Example Reverse the text "Hello, world!". - "Hello, world!".reverse + "Hello, world!".reverse Text.reverse : Text Text.reverse = reverseStringBuilder = StringBuilder.new this.length