Skip to content

Commit

Permalink
rust-lang#66219 documented unsafe in core::str
Browse files Browse the repository at this point in the history
  • Loading branch information
foeb committed Nov 9, 2019
1 parent 4679271 commit 26ef195
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 13 deletions.
6 changes: 4 additions & 2 deletions src/libcore/str/lossy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use crate::str as core_str;
use crate::fmt::{self, Write};
use crate::mem;

// ignore-tidy-undocumented-unsafe

/// Lossy UTF-8 string.
#[unstable(feature = "str_internals", issue = "0")]
pub struct Utf8Lossy {
Expand All @@ -17,6 +15,7 @@ impl Utf8Lossy {
}

pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
// SAFETY: both use the same memory layout, and utf8 correctness isn't required
unsafe { mem::transmute(bytes) }
}

Expand Down Expand Up @@ -61,6 +60,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
while i < self.source.len() {
let i_ = i;

// SAFETY: 0 <= i < self.source.len()
let byte = unsafe { *self.source.get_unchecked(i) };
i += 1;

Expand All @@ -70,6 +70,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
let w = core_str::utf8_char_width(byte);

macro_rules! error { () => ({
// SAFETY: we have checked up to i that source is valid utf8
unsafe {
let r = Utf8LossyChunk {
valid: core_str::from_utf8_unchecked(&self.source[0..i_]),
Expand Down Expand Up @@ -130,6 +131,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
}

let r = Utf8LossyChunk {
// SAFETY: we have checked that the entire source is valid utf8
valid: unsafe { core_str::from_utf8_unchecked(self.source) },
broken: &[],
};
Expand Down
44 changes: 35 additions & 9 deletions src/libcore/str/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// ignore-tidy-filelength
// ignore-tidy-undocumented-unsafe

//! String manipulation.
//!
Expand Down Expand Up @@ -337,6 +336,7 @@ impl Utf8Error {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: just ran validation
Ok(unsafe { from_utf8_unchecked(v) })
}

Expand Down Expand Up @@ -375,6 +375,7 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
#[stable(feature = "str_mut_extras", since = "1.20.0")]
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: just ran validation
Ok(unsafe { from_utf8_unchecked_mut(v) })
}

Expand Down Expand Up @@ -567,7 +568,7 @@ impl<'a> Iterator for Chars<'a> {
#[inline]
fn next(&mut self) -> Option<char> {
next_code_point(&mut self.iter).map(|ch| {
// str invariant says `ch` is a valid Unicode Scalar Value
// SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
unsafe {
char::from_u32_unchecked(ch)
}
Expand Down Expand Up @@ -616,7 +617,7 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
#[inline]
fn next_back(&mut self) -> Option<char> {
next_code_point_reverse(&mut self.iter).map(|ch| {
// str invariant says `ch` is a valid Unicode Scalar Value
// SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
unsafe {
char::from_u32_unchecked(ch)
}
Expand Down Expand Up @@ -648,6 +649,7 @@ impl<'a> Chars<'a> {
#[stable(feature = "iter_to_slice", since = "1.4.0")]
#[inline]
pub fn as_str(&self) -> &'a str {
// SAFETY: Chars is only made from a str, which guarantees the iter is valid utf8
unsafe { from_utf8_unchecked(self.iter.as_slice()) }
}
}
Expand Down Expand Up @@ -1080,6 +1082,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
fn get_end(&mut self) -> Option<&'a str> {
if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
self.finished = true;
// SAFETY: self.start and self.end always lie on unicode boudaries
unsafe {
let string = self.matcher.haystack().get_unchecked(self.start..self.end);
Some(string)
Expand All @@ -1095,6 +1098,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {

let haystack = self.matcher.haystack();
match self.matcher.next_match() {
// SAFETY: Searcher guarantees that a and b lie on unicode boundaries
Some((a, b)) => unsafe {
let elt = haystack.get_unchecked(self.start..a);
self.start = b;
Expand All @@ -1120,11 +1124,13 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {

let haystack = self.matcher.haystack();
match self.matcher.next_match_back() {
// SAFETY: Searcher guarantees that a and b lie on unicode boundaries
Some((a, b)) => unsafe {
let elt = haystack.get_unchecked(b..self.end);
self.end = a;
Some(elt)
},
// SAFETY: self.start and self.end always lie on unicode boudaries
None => unsafe {
self.finished = true;
Some(haystack.get_unchecked(self.start..self.end))
Expand Down Expand Up @@ -1253,6 +1259,7 @@ where
impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<(usize, &'a str)> {
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match().map(|(start, end)| unsafe {
(start, self.0.haystack().get_unchecked(start..end))
})
Expand All @@ -1262,6 +1269,7 @@ impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
fn next_back(&mut self) -> Option<(usize, &'a str)>
where P::Searcher: ReverseSearcher<'a>
{
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match_back().map(|(start, end)| unsafe {
(start, self.0.haystack().get_unchecked(start..end))
})
Expand Down Expand Up @@ -1307,6 +1315,7 @@ where
impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().get_unchecked(a..b)
Expand All @@ -1317,6 +1326,7 @@ impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
fn next_back(&mut self) -> Option<&'a str>
where P::Searcher: ReverseSearcher<'a>
{
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match_back().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().get_unchecked(a..b)
Expand Down Expand Up @@ -1538,6 +1548,9 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
if align != usize::max_value() && align.wrapping_sub(index) % usize_bytes == 0 {
let ptr = v.as_ptr();
while index < blocks_end {
// SAFETY: since align - index and ascii_block_size are multiples of
// usize_bytes, ptr.add(index) is always aligned with a usize so we may cast
// directly to a const pointer.
unsafe {
let block = ptr.add(index) as *const usize;
// break if there is a nonascii byte
Expand Down Expand Up @@ -1760,6 +1773,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1770,6 +1784,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand Down Expand Up @@ -1799,6 +1814,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, self.start, self.end)
Expand Down Expand Up @@ -1827,6 +1843,7 @@ mod traits {
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1835,6 +1852,7 @@ mod traits {
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand All @@ -1857,8 +1875,8 @@ mod traits {
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
// is_char_boundary checks that the index is in [0, .len()]
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, 0, self.end)
Expand Down Expand Up @@ -1888,6 +1906,7 @@ mod traits {
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1896,6 +1915,7 @@ mod traits {
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand All @@ -1920,8 +1940,8 @@ mod traits {
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
// is_char_boundary checks that the index is in [0, .len()]
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, self.start, slice.len())
Expand Down Expand Up @@ -2167,7 +2187,6 @@ impl str {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline(always)]
// SAFETY: const sound because we transmute two types with the same layout
#[allow(unused_attributes)]
#[allow_internal_unstable(const_fn_union)]
pub const fn as_bytes(&self) -> &[u8] {
Expand All @@ -2176,6 +2195,7 @@ impl str {
str: &'a str,
slice: &'a [u8],
}
// SAFETY: const sound because we transmute two types with the same layout
unsafe { Slices { str: self }.slice }
}

Expand Down Expand Up @@ -2501,6 +2521,7 @@ impl str {
pub fn split_at(&self, mid: usize) -> (&str, &str) {
// is_char_boundary checks that the index is in [0, .len()]
if self.is_char_boundary(mid) {
// SAFETY: just checked that mid is on a char boundary
unsafe {
(self.get_unchecked(0..mid),
self.get_unchecked(mid..self.len()))
Expand Down Expand Up @@ -2548,6 +2569,7 @@ impl str {
if self.is_char_boundary(mid) {
let len = self.len();
let ptr = self.as_mut_ptr();
// SAFETY: just checked that mid is on a char boundary
unsafe {
(from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
from_utf8_unchecked_mut(slice::from_raw_parts_mut(
Expand Down Expand Up @@ -3746,8 +3768,8 @@ impl str {
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(i..j)
}
}
Expand Down Expand Up @@ -3785,8 +3807,8 @@ impl str {
if let Some((a, _)) = matcher.next_reject() {
i = a;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(i..self.len())
}
}
Expand Down Expand Up @@ -3833,8 +3855,8 @@ impl str {
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(0..j)
}
}
Expand Down Expand Up @@ -4029,6 +4051,7 @@ impl str {
/// ```
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
pub fn make_ascii_uppercase(&mut self) {
// SAFETY: safe because we transmute two types with the same layout
let me = unsafe { self.as_bytes_mut() };
me.make_ascii_uppercase()
}
Expand All @@ -4054,6 +4077,7 @@ impl str {
/// ```
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
pub fn make_ascii_lowercase(&mut self) {
// SAFETY: safe because we transmute two types with the same layout
let me = unsafe { self.as_bytes_mut() };
me.make_ascii_lowercase()
}
Expand Down Expand Up @@ -4216,6 +4240,7 @@ impl Default for &str {
#[stable(feature = "default_mut_str", since = "1.28.0")]
impl Default for &mut str {
/// Creates an empty mutable str
// SAFETY: str is guranteed to be utf8
fn default() -> Self { unsafe { from_utf8_unchecked_mut(&mut []) } }
}

Expand Down Expand Up @@ -4270,6 +4295,7 @@ impl_fn_for_zst! {

#[derive(Clone)]
struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
// SAFETY: not safe
unsafe { from_utf8_unchecked(bytes) }
};
}
Expand Down
18 changes: 16 additions & 2 deletions src/libcore/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
//! For more details, see the traits [`Pattern`], [`Searcher`],
//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].

// ignore-tidy-undocumented-unsafe

#![unstable(feature = "pattern",
reason = "API not fully fleshed out and ready to be stabilized",
issue = "27721")]
Expand Down Expand Up @@ -276,6 +274,13 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
#[inline]
fn next(&mut self) -> SearchStep {
let old_finger = self.finger;
// 1. self.finger and self.finger_back are kept on unicode boundaries (this is invariant)
// 2. self.finger >= 0 since it starts at 0 and only increases
// 3. self.finger < self.finger_back because otherwise the char iter would return
// SearchStep::Done
// 4. self.finger comes before the end of the haystack because self.finger_back starts at
// the end and only decreases
// SAFETY: 1-4 guarantee safety of get_unchecked
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
Expand Down Expand Up @@ -303,6 +308,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
return None;
};
// the last byte of the utf8 encoded needle
// SAFETY: we have an invariant that utf8_size < 5
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memchr(last_byte, bytes) {
// The new finger is the index of the byte we found,
Expand Down Expand Up @@ -346,6 +352,13 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
#[inline]
fn next_back(&mut self) -> SearchStep {
let old_finger = self.finger_back;
// 1. self.finger and self.old_finger are kept on unicode boundaries (this is invariant)
// 2. self.finger >= 0 since it starts at 0 and only increases
// 3. self.finger < self.finger_back because otherwise the char iter would return
// SearchStep::Done
// 4. self.finger comes before the end of the haystack because self.finger_back starts at
// the end and only decreases
// SAFETY: 1-4 guarantee safety of get_unchecked
let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
Expand Down Expand Up @@ -373,6 +386,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
return None;
};
// the last byte of the utf8 encoded needle
// SAFETY: we have an invariant that utf8_size < 5
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memrchr(last_byte, bytes) {
// we searched a slice that was offset by self.finger,
Expand Down

0 comments on commit 26ef195

Please sign in to comment.