Skip to content

Commit

Permalink
tr: process octal escape sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
drocco007 committed Mar 17, 2021
1 parent 618d4a4 commit 1915237
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 12 deletions.
51 changes: 39 additions & 12 deletions src/uu/tr/src/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,43 @@ use std::cmp::min;
use std::iter::Peekable;
use std::ops::RangeInclusive;

/// Parse a backslash escape sequence to the corresponding character. Assumes
/// the string starts from the character _after_ the `\` and is not empty.
///
/// Returns a tuple containing the character and the number of characters
/// consumed from the input. The alphabetic escape sequences consume 1
/// character; octal escape sequences consume 1 to 3 octal digits.
#[inline]
fn unescape_char(c: char) -> char {
match c {
'a' => 0x07u8 as char,
'b' => 0x08u8 as char,
'f' => 0x0cu8 as char,
'v' => 0x0bu8 as char,
'n' => '\n',
'r' => '\r',
't' => '\t',
_ => c,
fn parse_sequence(s: &str) -> (char, usize) {
let c = s.chars().next().expect("invalid escape: empty string");

if ('0'..='7').contains(&c) {
let i = s
.chars()
.skip(1)
.take(2)
.take_while(|c| ('0'..='7').contains(&c))
.count()
+ 1;

(
(u8::from_str_radix(&s[..i], 8).expect("invalid octal escape")) as char,
i,
)
} else {
(
match c {
'a' => 0x07u8 as char,
'b' => 0x08u8 as char,
'f' => 0x0cu8 as char,
'v' => 0x0bu8 as char,
'n' => '\n',
'r' => '\r',
't' => '\t',
c => c,
},
1,
)
}
}

Expand Down Expand Up @@ -52,8 +78,9 @@ impl<'a> Iterator for Unescape<'a> {
'\\' if self.string.len() > 1 => {
// yes---it's \ and it's not the last char in a string
// we know that \ is 1 byte long so we can index into the string safely
let c = self.string[1..].chars().next().unwrap();
(Some(unescape_char(c)), 1 + c.len_utf8())
let (c, consumed) = parse_sequence(&self.string[1..]);

(Some(c), 1 + consumed)
}
c => (Some(c), c.len_utf8()), // not an escape char
};
Expand Down
63 changes: 63 additions & 0 deletions tests/by-util/test_tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,66 @@ fn missing_required_second_arg_fails() {
assert!(!result.success);
assert!(result.stderr.contains("missing operand after"));
}

#[test]
fn test_interpret_backslash_escapes() {
new_ucmd!()
.args(&["abfnrtv", r"\a\b\f\n\r\t\v"])
.pipe_in("abfnrtv")
.succeeds()
.stdout_is("\u{7}\u{8}\u{c}\n\r\t\u{b}");
}

#[test]
fn test_interpret_unrecognized_backslash_escape_as_character() {
new_ucmd!()
.args(&["qcz+=~-", r"\q\c\z\+\=\~\-"])
.pipe_in("qcz+=~-")
.succeeds()
.stdout_is("qcz+=~-");
}

#[test]
fn test_interpret_single_octal_escape() {
new_ucmd!()
.args(&["X", r"\015"])
.pipe_in("X")
.succeeds()
.stdout_is("\r");
}

#[test]
fn test_interpret_one_and_two_digit_octal_escape() {
new_ucmd!()
.args(&["XYZ", r"\0\11\77"])
.pipe_in("XYZ")
.succeeds()
.stdout_is("\0\t?");
}

#[test]
fn test_octal_escape_is_at_most_three_digits() {
new_ucmd!()
.args(&["XY", r"\0156"])
.pipe_in("XY")
.succeeds()
.stdout_is("\r6");
}

#[test]
fn test_non_octal_digit_ends_escape() {
new_ucmd!()
.args(&["rust", r"\08\11956"])
.pipe_in("rust")
.succeeds()
.stdout_is("\08\t9");
}

#[test]
fn test_interpret_backslash_at_eol_literally() {
new_ucmd!()
.args(&["X", r"\"])
.pipe_in("X")
.succeeds()
.stdout_is("\\");
}

0 comments on commit 1915237

Please sign in to comment.