From 7b283f33bb4d7da05269a7870a973df7a4005fad Mon Sep 17 00:00:00 2001 From: Nikita Pekin Date: Mon, 4 Apr 2022 12:19:17 -0400 Subject: [PATCH] feat: Add `Text.reverse` extension for string reversal This commit implements `Text.reverse` as an extension on `Text`. `Text.reverse` reverses strings. For example: `"Hello World!".reverse` results in `"!dlroW olleH"`. Strings are reversed by their Extended Grapheme Clusters not by their characters. This has some performance implications because we need to find these grapheme cluster boundaries when iterating. To do so, `BreakIterator.getCharacterInstance` is used. Implements: https://www.pivotaltracker.com/n/projects/2539304/stories/181265419 --- CHANGELOG.md | 2 ++ .../0.0.0-dev/src/Data/Text/Extensions.enso | 23 +++++++++++++++++++ test/Tests/src/Data/Text_Spec.enso | 11 +++++++++ 3 files changed, 36 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c8d5d350b1f..23b6d6233ce7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ - [Added warning handling to `Table.aggregate`][3349] - [Improved performance of `Table.aggregate` and full warnings implementation] [3364] +- [Implemented `Text.reverse`][3377] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -140,6 +141,7 @@ [3349]: https://github.com/enso-org/enso/pull/3349 [3361]: https://github.com/enso-org/enso/pull/3361 [3364]: https://github.com/enso-org/enso/pull/3364 +[3377]: https://github.com/enso-org/enso/pull/3377 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 1dfb3fea5f87..c025dda80dcd 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -25,6 +25,7 @@ export Standard.Base.Data.Text.Line_Ending_Style polyglot java import com.ibm.icu.lang.UCharacter polyglot java import com.ibm.icu.text.BreakIterator +polyglot java import java.lang.StringBuilder polyglot java import org.enso.base.Text_Utils ## UNSTABLE @@ -58,6 +59,28 @@ Text.length = @Tail_Call count (accum + 1) iterator.next count 0 iterator.next +## Returns a new `Text` object with the characters in the reverse order of the input. + + ! What is a Character? + A character is defined as an Extended Grapheme Cluster, see Unicode + Standard Annex 29. This is the smallest unit that still has semantic + meaning in most text-processing applications. + + > Example + Reverse the text "Hello, world!". + + "Hello, world!".reverse +Text.reverse : Text +Text.reverse = + reverseStringBuilder = StringBuilder.new this.length + iterator = BreakIterator.getCharacterInstance + iterator.setText this + + iterate prev next = if next == -1 then reverseStringBuilder.toString else + reverseStringBuilder.append (Text_Utils.substring this next prev) + @Tail_Call iterate next iterator.previous + iterate iterator.last iterator.previous + ## Applies the provided `function` to each character in `this`. Arguments: diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 72d2831f9f5b..19a3ee6d7802 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -496,6 +496,17 @@ spec = str.at -6 . should_fail_with Index_Out_Of_Bounds_Error str.at 5 . should_fail_with Index_Out_Of_Bounds_Error + Test.specify "should be able to reverse characters" <| + "Hello World!".reverse . should_equal "!dlroW olleH" + + "".reverse . should_equal "" + 'e\u{301}'.reverse . should_equal 'e\u{301}' + 'e\u{301}\u00E9'.reverse . should_equal '\u00E9e\u{301}' + 'e\u{321}\u{360}'.reverse . should_equal 'e\u{321}\u{360}' + 'Iñtërnâtiônàlizætiøn☃💩'.reverse . should_equal '💩☃nøitæzilànôitânrëtñI' + 'ほげほげ'.reverse . should_equal 'げほげほ' + '\u{10000}'.reverse . should_equal '\u{10000}' + Test.specify "should allow to iterate over characters" <| str = kshi + accent_1 + accent_2 + 'abc' builder = Vector.new_builder