-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improvements to grapheme truncation for Utf32Str(ing)
This commit corrects the internal handling of grapheme truncation. Most notably, it fixes two bugs with the previous implementation of Utf32Str(ing): 1. Fixes a bug where an Ascii variant could have been returned even though the original string was not ASCII. (The converse, where a Unicode variant consists only of ASCII, is totally fine). 2. Fixes the handling of windows-style newline (i.e. `\r\n`) since these are single graphemes. Moreover, the `\r\n` grapheme is now mapped to `\n` rather than `\r`. In particular, Utf32Str(ing)s constructed from text containing windows-style newlines will result in Unicode variants, even if the string is entirely valid Ascii.
- Loading branch information
Showing
3 changed files
with
154 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
use crate::{Utf32Str, Utf32String}; | ||
|
||
#[test] | ||
fn test_utf32str_ascii() { | ||
/// Helper function for testing | ||
fn expect_ascii(src: &str, is_ascii: bool) { | ||
let mut buffer = Vec::new(); | ||
assert!(Utf32Str::new(src, &mut buffer).is_ascii() == is_ascii); | ||
assert!(Utf32String::from(src).slice(..).is_ascii() == is_ascii); | ||
assert!(Utf32String::from(src.to_owned()).slice(..).is_ascii() == is_ascii); | ||
} | ||
|
||
// ascii | ||
expect_ascii("", true); | ||
expect_ascii("a", true); | ||
expect_ascii("a\nb", true); | ||
expect_ascii("\n\r", true); | ||
|
||
// not ascii | ||
expect_ascii("aü", false); | ||
expect_ascii("au\u{0308}", false); | ||
|
||
// windows-style newline | ||
expect_ascii("a\r\nb", false); | ||
expect_ascii("ü\r\n", false); | ||
expect_ascii("\r\n", false); | ||
} | ||
|
||
#[test] | ||
fn test_grapheme_truncation() { | ||
// ascii is preserved | ||
let s = Utf32String::from("ab"); | ||
assert_eq!(s.slice(..).get(0), 'a'); | ||
assert_eq!(s.slice(..).get(1), 'b'); | ||
|
||
// windows-style newline is truncated to '\n' | ||
let s = Utf32String::from("\r\n"); | ||
assert_eq!(s.slice(..).get(0), '\n'); | ||
|
||
// normal graphemes are truncated to the first character | ||
let s = Utf32String::from("u\u{0308}\r\n"); | ||
assert_eq!(s.slice(..).get(0), 'u'); | ||
assert_eq!(s.slice(..).get(1), '\n'); | ||
} |