forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rollup merge of rust-lang#123769 - dtolnay:literal, r=fee1-dead Improve escaping of byte, byte str, and c str proc-macro literals This PR changes the behavior of `proc_macro::Literal::byte_character` (rust-lang#115268), `byte_string`, and `c_string` (rust-lang#119750) to improve their choice of escape sequences. 3 categories of changes are made: 1. Never use `\x00`. Always prefer `\0`, which is supported in all the same places. 2. Never escape `\'` inside double quotes and `\"` inside single quotes. 3. Never use `\x` for valid UTF-8 in literals that permit `\u`. The second commit adds tests covering these cases, asserting the **old** behavior. The third commit implements the behavior change and simultaneously updates the tests to assert the **new** behavior.
- Loading branch information
Showing
5 changed files
with
179 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#[derive(Copy, Clone)] | ||
pub(crate) struct EscapeOptions { | ||
/// Produce \'. | ||
pub escape_single_quote: bool, | ||
/// Produce \". | ||
pub escape_double_quote: bool, | ||
/// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII | ||
/// control characters. | ||
pub escape_nonascii: bool, | ||
} | ||
|
||
pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String { | ||
let mut repr = String::new(); | ||
|
||
if opt.escape_nonascii { | ||
for &byte in bytes { | ||
escape_single_byte(byte, opt, &mut repr); | ||
} | ||
} else { | ||
let mut chunks = bytes.utf8_chunks(); | ||
while let Some(chunk) = chunks.next() { | ||
for ch in chunk.valid().chars() { | ||
escape_single_char(ch, opt, &mut repr); | ||
} | ||
for &byte in chunk.invalid() { | ||
escape_single_byte(byte, opt, &mut repr); | ||
} | ||
} | ||
} | ||
|
||
repr | ||
} | ||
|
||
fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) { | ||
if byte == b'\0' { | ||
repr.push_str("\\0"); | ||
} else if (byte == b'\'' && !opt.escape_single_quote) | ||
|| (byte == b'"' && !opt.escape_double_quote) | ||
{ | ||
repr.push(byte as char); | ||
} else { | ||
// Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and | ||
// for ASCII control characters. | ||
repr.extend(byte.escape_ascii().map(char::from)); | ||
} | ||
} | ||
|
||
fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) { | ||
if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) { | ||
repr.push(ch); | ||
} else { | ||
// Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for | ||
// non-printable characters and for Grapheme_Extend characters, which | ||
// includes things like U+0300 "Combining Grave Accent". | ||
repr.extend(ch.escape_debug()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// ignore-tidy-linelength | ||
|
||
use proc_macro::Literal; | ||
|
||
pub fn test() { | ||
test_display_literal(); | ||
test_parse_literal(); | ||
} | ||
|
||
fn test_display_literal() { | ||
assert_eq!(Literal::isize_unsuffixed(-10).to_string(), "-10"); | ||
assert_eq!(Literal::isize_suffixed(-10).to_string(), "-10isize"); | ||
assert_eq!(Literal::f32_unsuffixed(-10.0).to_string(), "-10.0"); | ||
assert_eq!(Literal::f32_suffixed(-10.0).to_string(), "-10f32"); | ||
assert_eq!(Literal::f64_unsuffixed(-10.0).to_string(), "-10.0"); | ||
assert_eq!(Literal::f64_suffixed(-10.0).to_string(), "-10f64"); | ||
assert_eq!( | ||
Literal::f64_unsuffixed(1e100).to_string(), | ||
"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0", | ||
); | ||
|
||
assert_eq!(Literal::string("aA").to_string(), r#" "aA" "#.trim()); | ||
assert_eq!(Literal::string("\t").to_string(), r#" "\t" "#.trim()); | ||
assert_eq!(Literal::string("❤").to_string(), r#" "❤" "#.trim()); | ||
assert_eq!(Literal::string("'").to_string(), r#" "'" "#.trim()); | ||
assert_eq!(Literal::string("\"").to_string(), r#" "\"" "#.trim()); | ||
assert_eq!(Literal::string("\0").to_string(), r#" "\0" "#.trim()); | ||
assert_eq!(Literal::string("\u{1}").to_string(), r#" "\u{1}" "#.trim()); | ||
|
||
assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim()); | ||
assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim()); | ||
assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"'" "#.trim()); | ||
assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim()); | ||
assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\0" "#.trim()); | ||
assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim()); | ||
|
||
assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim()); | ||
assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim()); | ||
assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"❤" "#.trim()); | ||
assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"'" "#.trim()); | ||
assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim()); | ||
assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\u{7f}\xff\xfe\u{333}" "#.trim()); | ||
|
||
assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim()); | ||
assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim()); | ||
assert_eq!(Literal::character('❤').to_string(), r#" '❤' "#.trim()); | ||
assert_eq!(Literal::character('\'').to_string(), r#" '\'' "#.trim()); | ||
assert_eq!(Literal::character('"').to_string(), r#" '"' "#.trim()); | ||
assert_eq!(Literal::character('\0').to_string(), r#" '\0' "#.trim()); | ||
assert_eq!(Literal::character('\u{1}').to_string(), r#" '\u{1}' "#.trim()); | ||
|
||
assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim()); | ||
assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim()); | ||
assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim()); | ||
assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'"' "#.trim()); | ||
assert_eq!(Literal::byte_character(0).to_string(), r#" b'\0' "#.trim()); | ||
assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim()); | ||
} | ||
|
||
fn test_parse_literal() { | ||
assert_eq!("1".parse::<Literal>().unwrap().to_string(), "1"); | ||
assert_eq!("1.0".parse::<Literal>().unwrap().to_string(), "1.0"); | ||
assert_eq!("'a'".parse::<Literal>().unwrap().to_string(), "'a'"); | ||
assert_eq!("b'a'".parse::<Literal>().unwrap().to_string(), "b'a'"); | ||
assert_eq!("\"\n\"".parse::<Literal>().unwrap().to_string(), "\"\n\""); | ||
assert_eq!("b\"\"".parse::<Literal>().unwrap().to_string(), "b\"\""); | ||
assert_eq!("c\"\"".parse::<Literal>().unwrap().to_string(), "c\"\""); | ||
assert_eq!("r##\"\"##".parse::<Literal>().unwrap().to_string(), "r##\"\"##"); | ||
assert_eq!("10ulong".parse::<Literal>().unwrap().to_string(), "10ulong"); | ||
assert_eq!("-10ulong".parse::<Literal>().unwrap().to_string(), "-10ulong"); | ||
|
||
assert!("true".parse::<Literal>().is_err()); | ||
assert!(".8".parse::<Literal>().is_err()); | ||
assert!("0 1".parse::<Literal>().is_err()); | ||
assert!("'a".parse::<Literal>().is_err()); | ||
assert!(" 0".parse::<Literal>().is_err()); | ||
assert!("0 ".parse::<Literal>().is_err()); | ||
assert!("/* comment */0".parse::<Literal>().is_err()); | ||
assert!("0/* comment */".parse::<Literal>().is_err()); | ||
assert!("0// comment".parse::<Literal>().is_err()); | ||
assert!("- 10".parse::<Literal>().is_err()); | ||
assert!("-'x'".parse::<Literal>().is_err()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.