diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index da7753216d063..f60b604f1b167 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -240,6 +240,7 @@ #![feature(exhaustive_patterns)] #![feature(intra_doc_pointers)] #![feature(lang_items)] +#![feature(let_chains)] #![feature(linkage)] #![feature(min_specialization)] #![feature(must_not_suspend)] diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 8ecea8ce07f6b..c03d197e0194c 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -168,8 +168,8 @@ pub enum Prefix<'a> { /// Device namespace prefix, e.g., `\\.\COM42`. /// - /// Device namespace prefixes consist of `\\.\` immediately followed by the - /// device name. + /// Device namespace prefixes consist of `\\.\` (possibly using `/` + /// instead of `\`), immediately followed by the device name. #[stable(feature = "rust1", since = "1.0.0")] DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), @@ -193,7 +193,7 @@ impl<'a> Prefix<'a> { fn len(&self) -> usize { use self::Prefix::*; fn os_str_len(s: &OsStr) -> usize { - os_str_as_u8_slice(s).len() + s.bytes().len() } match *self { Verbatim(x) => 4 + os_str_len(x), @@ -299,19 +299,17 @@ where } } -// See note at the top of this module to understand why these are used: -// -// These casts are safe as OsStr is internally a wrapper around [u8] on all -// platforms. -// -// Note that currently this relies on the special knowledge that libstd has; -// these types are single-element structs but are not marked repr(transparent) -// or repr(C) which would make these casts allowable outside std. -fn os_str_as_u8_slice(s: &OsStr) -> &[u8] { - unsafe { &*(s as *const OsStr as *const [u8]) } -} unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - // SAFETY: see the comment of `os_str_as_u8_slice` + // SAFETY: See note at the top of this module to understand why this and + // `OsStr::bytes` are used: + // + // This casts are safe as OsStr is internally a wrapper around [u8] on all + // platforms. + // + // Note that currently this relies on the special knowledge that libstd has; + // these types are single-element structs but are not marked + // repr(transparent) or repr(C) which would make these casts not allowable + // outside std. unsafe { &*(s as *const [u8] as *const OsStr) } } @@ -332,7 +330,7 @@ fn has_physical_root(s: &[u8], prefix: Option>) -> bool { // basic workhorse for splitting stem and extension fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { - if os_str_as_u8_slice(file) == b".." { + if file.bytes() == b".." { return (Some(file), None); } @@ -340,7 +338,7 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { // and back. This is safe to do because (1) we only look at ASCII // contents of the encoding and (2) new &OsStr values are produced // only from ASCII-bounded slices of existing &OsStr values. - let mut iter = os_str_as_u8_slice(file).rsplitn(2, |b| *b == b'.'); + let mut iter = file.bytes().rsplitn(2, |b| *b == b'.'); let after = iter.next(); let before = iter.next(); if before == Some(b"") { @@ -351,7 +349,7 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { } fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { - let slice = os_str_as_u8_slice(file); + let slice = file.bytes(); if slice == b".." { return (file, None); } @@ -1445,17 +1443,17 @@ impl PathBuf { fn _set_extension(&mut self, extension: &OsStr) -> bool { let file_stem = match self.file_stem() { None => return false, - Some(f) => os_str_as_u8_slice(f), + Some(f) => f.bytes(), }; // truncate until right after the file stem let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); - let start = os_str_as_u8_slice(&self.inner).as_ptr().addr(); + let start = self.inner.bytes().as_ptr().addr(); let v = self.as_mut_vec(); v.truncate(end_file_stem.wrapping_sub(start)); // add the new extension, if any - let new = os_str_as_u8_slice(extension); + let new = extension.bytes(); if !new.is_empty() { v.reserve_exact(new.len() + 1); v.push(b'.'); @@ -1948,7 +1946,7 @@ impl Path { } // The following (private!) function reveals the byte encoding used for OsStr. fn as_u8_slice(&self) -> &[u8] { - os_str_as_u8_slice(&self.inner) + self.inner.bytes() } /// Directly wraps a string slice as a `Path` slice. diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index d1f59d2786e91..0d8ea29c2be75 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -971,15 +971,15 @@ pub fn test_decompositions_windows() { file_prefix: None ); - t!("\\\\?\\C:/foo", - iter: ["\\\\?\\C:/foo"], + t!("\\\\?\\C:/foo/bar", + iter: ["\\\\?\\C:", "\\", "foo/bar"], has_root: true, is_absolute: true, - parent: None, - file_name: None, - file_stem: None, + parent: Some("\\\\?\\C:/"), + file_name: Some("foo/bar"), + file_stem: Some("foo/bar"), extension: None, - file_prefix: None + file_prefix: Some("foo/bar") ); t!("\\\\.\\foo\\bar", diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs index 31c7208bbf1ee..2c832aa75fd18 100644 --- a/library/std/src/sys/windows/mod.rs +++ b/library/std/src/sys/windows/mod.rs @@ -190,6 +190,10 @@ where { // Start off with a stack buf but then spill over to the heap if we end up // needing more space. + // + // This initial size also works around `GetFullPathNameW` returning + // incorrect size hints for some short paths: + // https://github.com/dylni/normpath/issues/5 let mut stack_buf = [0u16; 512]; let mut heap_buf = Vec::new(); unsafe { diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index e54fcaed4957d..a0f822070992f 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -50,37 +50,101 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { path.into() } +struct PrefixParser<'a, const LEN: usize> { + path: &'a OsStr, + prefix: [u8; LEN], +} + +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { + #[inline] + fn get_prefix(path: &OsStr) -> [u8; LEN] { + let mut prefix = [0; LEN]; + // SAFETY: Only ASCII characters are modified. + for (i, &ch) in path.bytes().iter().take(LEN).enumerate() { + prefix[i] = if ch == b'/' { b'\\' } else { ch }; + } + prefix + } + + fn new(path: &'a OsStr) -> Self { + Self { path, prefix: Self::get_prefix(path) } + } + + fn as_slice(&self) -> PrefixParserSlice<'a, '_> { + PrefixParserSlice { + path: self.path, + prefix: &self.prefix[..LEN.min(self.path.len())], + index: 0, + } + } +} + +struct PrefixParserSlice<'a, 'b> { + path: &'a OsStr, + prefix: &'b [u8], + index: usize, +} + +impl<'a> PrefixParserSlice<'a, '_> { + fn strip_prefix(&self, prefix: &str) -> Option { + self.prefix[self.index..] + .starts_with(prefix.as_bytes()) + .then(|| Self { index: self.index + prefix.len(), ..*self }) + } + + fn prefix_bytes(&self) -> &'a [u8] { + &self.path.bytes()[..self.index] + } + + fn finish(self) -> &'a OsStr { + // SAFETY: The unsafety here stems from converting between &OsStr and + // &[u8] and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced only + // from ASCII-bounded slices of existing &OsStr values. + unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) } + } +} + pub fn parse_prefix(path: &OsStr) -> Option> { use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; - if let Some(path) = strip_prefix(path, r"\\") { + let parser = PrefixParser::<8>::new(path); + let parser = parser.as_slice(); + if let Some(parser) = parser.strip_prefix(r"\\") { // \\ - if let Some(path) = strip_prefix(path, r"?\") { + + // The meaning of verbatim paths can change when they use a different + // separator. + if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') { // \\?\ - if let Some(path) = strip_prefix(path, r"UNC\") { + if let Some(parser) = parser.strip_prefix(r"UNC\") { // \\?\UNC\server\share + let path = parser.finish(); let (server, path) = parse_next_component(path, true); let (share, _) = parse_next_component(path, true); Some(VerbatimUNC(server, share)) } else { - let (prefix, _) = parse_next_component(path, true); + let path = parser.finish(); // in verbatim paths only recognize an exact drive prefix - if let Some(drive) = parse_drive_exact(prefix) { + if let Some(drive) = parse_drive_exact(path) { // \\?\C: Some(VerbatimDisk(drive)) } else { // \\?\prefix + let (prefix, _) = parse_next_component(path, true); Some(Verbatim(prefix)) } } - } else if let Some(path) = strip_prefix(path, r".\") { + } else if let Some(parser) = parser.strip_prefix(r".\") { // \\.\COM42 + let path = parser.finish(); let (prefix, _) = parse_next_component(path, false); Some(DeviceNS(prefix)) } else { + let path = parser.finish(); let (server, path) = parse_next_component(path, false); let (share, _) = parse_next_component(path, false); @@ -102,31 +166,26 @@ pub fn parse_prefix(path: &OsStr) -> Option> { } // Parses a drive prefix, e.g. "C:" and "C:\whatever" -fn parse_drive(prefix: &OsStr) -> Option { +fn parse_drive(path: &OsStr) -> Option { // In most DOS systems, it is not possible to have more than 26 drive letters. // See . fn is_valid_drive_letter(drive: &u8) -> bool { drive.is_ascii_alphabetic() } - match prefix.bytes() { + match path.bytes() { [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), _ => None, } } // Parses a drive prefix exactly, e.g. "C:" -fn parse_drive_exact(prefix: &OsStr) -> Option { +fn parse_drive_exact(path: &OsStr) -> Option { // only parse two bytes: the drive letter and the drive separator - if prefix.len() == 2 { parse_drive(prefix) } else { None } -} - -fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> { - // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]` - // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice. - match path.bytes().strip_prefix(prefix.as_bytes()) { - Some(path) => unsafe { Some(bytes_as_os_str(path)) }, - None => None, + if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { + parse_drive(path) + } else { + None } } @@ -219,15 +278,7 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result> { // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. // `lpfilename` is a pointer to a null terminated string that is not // invalidated until after `GetFullPathNameW` returns successfully. - |buffer, size| unsafe { - // While the docs for `GetFullPathNameW` have the standard note - // about needing a `\\?\` path for a long lpfilename, this does not - // appear to be true in practice. - // See: - // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths - // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html - c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) - }, + |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) }, |mut absolute| { path.clear(); @@ -263,9 +314,20 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result> { /// Make a Windows path absolute. pub(crate) fn absolute(path: &Path) -> io::Result { - if path.as_os_str().bytes().starts_with(br"\\?\") { - return Ok(path.into()); + let path = path.as_os_str(); + let prefix = parse_prefix(path); + // Verbatim paths should not be modified. + if prefix.map(|x| x.is_verbatim()).unwrap_or(false) { + // NULs in verbatim paths are rejected for consistency. + if path.bytes().contains(&0) { + return Err(io::const_io_error!( + io::ErrorKind::InvalidInput, + "strings passed to WinAPI cannot contain NULs", + )); + } + return Ok(path.to_owned().into()); } + let path = to_u16s(path)?; let lpfilename = path.as_ptr(); fill_utf16_buf( diff --git a/library/std/src/sys/windows/path/tests.rs b/library/std/src/sys/windows/path/tests.rs index 425c2011b3239..8656b04e4f410 100644 --- a/library/std/src/sys/windows/path/tests.rs +++ b/library/std/src/sys/windows/path/tests.rs @@ -94,3 +94,23 @@ fn verbatim() { // A path that contains null is not a valid path. assert!(maybe_verbatim(Path::new("\0")).is_err()); } + +fn parse_prefix(path: &str) -> Option> { + super::parse_prefix(OsStr::new(path)) +} + +#[test] +fn test_parse_prefix_verbatim() { + let prefix = Some(Prefix::VerbatimDisk(b'C')); + assert_eq!(prefix, parse_prefix(r"\\?\C:/windows/system32/notepad.exe")); + assert_eq!(prefix, parse_prefix(r"\\?\C:\windows\system32\notepad.exe")); +} + +#[test] +fn test_parse_prefix_verbatim_device() { + let prefix = Some(Prefix::UNC(OsStr::new("?"), OsStr::new("C:"))); + assert_eq!(prefix, parse_prefix(r"//?/C:/windows/system32/notepad.exe")); + assert_eq!(prefix, parse_prefix(r"//?/C:\windows\system32\notepad.exe")); + assert_eq!(prefix, parse_prefix(r"/\?\C:\windows\system32\notepad.exe")); + assert_eq!(prefix, parse_prefix(r"\\?/C:\windows\system32\notepad.exe")); +}