From d83fff3b3b9dd0fd6eef862e97f883d171367041 Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Tue, 29 Nov 2016 04:11:12 +0100 Subject: [PATCH] Use more specific panic message for &str slicing errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate out of bounds errors from character boundary errors, and print more details for character boundary errors. Example: &"abcαβγ"[..4] thread 'str::test_slice_fail_boundary_1' panicked at 'byte index 4 is not a char boundary; it is inside `α` (bytes 3..5) of `abcαβγ`' --- src/doc/book/strings.md | 4 ++-- src/libcollectionstest/str.rs | 16 ++++++++++++++-- src/libcore/str/mod.rs | 26 ++++++++++++++++++++++---- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/doc/book/strings.md b/src/doc/book/strings.md index 6af15d8768363..a2146b669e3c6 100644 --- a/src/doc/book/strings.md +++ b/src/doc/book/strings.md @@ -163,8 +163,8 @@ let hachi = &dog[0..2]; with this error: ```text -thread 'main' panicked at 'index 0 and/or 2 in `忠犬ハチ公` do not lie on -character boundary' +thread 'main' panicked at 'byte index 2 is not a char boundary; it is inside '忠' +(bytes 0..3) of `忠犬ハチ公`' ``` ## Concatenation diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 14a0819d381bc..9c3c3740aa96f 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -383,17 +383,29 @@ tempus vel, gravida nec quam."; // check the panic includes the prefix of the sliced string #[test] -#[should_panic(expected="Lorem ipsum dolor sit amet")] +#[should_panic(expected="byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")] fn test_slice_fail_truncated_1() { &LOREM_PARAGRAPH[..1024]; } // check the truncation in the panic message #[test] -#[should_panic(expected="luctus, im`[...] do not lie on character boundary")] +#[should_panic(expected="luctus, im`[...]")] fn test_slice_fail_truncated_2() { &LOREM_PARAGRAPH[..1024]; } +#[test] +#[should_panic(expected="byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of")] +fn test_slice_fail_boundary_1() { + &"abcαβγ"[4..]; +} + +#[test] +#[should_panic(expected="byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of")] +fn test_slice_fail_boundary_2() { + &"abcαβγ"[2..6]; +} + #[test] fn test_slice_from() { assert_eq!(&"abcd"[0..], "abcd"); diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index b4cd52e59f658..7081c3ebf43a0 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1741,13 +1741,31 @@ fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) { #[cold] fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! { const MAX_DISPLAY_LENGTH: usize = 256; - let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH); + let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH); let ellipsis = if truncated { "[...]" } else { "" }; + // 1. out of bounds + if begin > s.len() || end > s.len() { + let oob_index = if begin > s.len() { begin } else { end }; + panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis); + } + + // 2. begin <= end assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}", - begin, end, s, ellipsis); - panic!("index {} and/or {} in `{}`{} do not lie on character boundary", - begin, end, s, ellipsis); + begin, end, s_trunc, ellipsis); + + // 3. character boundary + let index = if !s.is_char_boundary(begin) { begin } else { end }; + // find the character + let mut char_start = index; + while !s.is_char_boundary(char_start) { + char_start -= 1; + } + // `char_start` must be less than len and a char boundary + let ch = s[char_start..].chars().next().unwrap(); + let char_range = char_start .. char_start + ch.len_utf8(); + panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}", + index, ch, char_range, s_trunc, ellipsis); } #[stable(feature = "core", since = "1.6.0")]