Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Lexer::clone leak and UB + tests #390

Merged
merged 1 commit into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ where
fn clone(&self) -> Self {
Lexer {
extras: self.extras.clone(),
token: self.token.clone(),
token: ManuallyDrop::new(None),
..*self
}
}
Expand Down
62 changes: 62 additions & 0 deletions tests/tests/clone.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use std::cell::Cell;

use logos::Logos as _;
use logos_derive::Logos;

#[derive(Logos, Clone, Debug, PartialEq)]
pub enum Token {
#[token("a", |_| Evil::default())]
Evil(Evil),
}

#[derive(Debug, Default, PartialEq)]
pub struct Evil(Box<Cell<u8>>);

impl Clone for Evil {
fn clone(&self) -> Self {
self.0.set(self.0.get() + 1);
Self::default()
}
}

#[test]
fn test_it_works_without_cloning() {
let mut lexer = Token::lexer("aaa");
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
assert_eq!(lexer.next(), None);
}

#[test]
fn test_clone_ub() {
let mut lexer = Token::lexer("a");
assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));

// In logos 0.14.0, this causes use-after-free (UB),
// because `Clone` dereferences the value returned by the last call to `lexer.next()`,
// which got deallocated.
// A real-life example where this could happen is with `Rc`.
// Note that it may still pass `cargo test`, it will fail with Miri.
let mut lexer2 = lexer.clone();

assert_eq!(lexer2.next(), None);
}

#[test]
fn test_clone_leak() {
let mut lexer = Token::lexer("a");
let Some(Ok(Token::Evil(evil))) = lexer.next() else {
panic!("Expected Token::Evil");
};
assert_eq!(evil.0.get(), 0);

// In logos 0.14.0, this causes a memory leak because `evil` is cloned with `lexer`.
// This produces `evil.0.get() == 1`. It will fail even on `cargo test`.
let mut lexer2 = lexer.clone();
assert_eq!(evil.0.get(), 0);

assert_eq!(lexer2.next(), None);
let _ = evil.clone();
assert_eq!(evil.0.get(), 1);
}
118 changes: 118 additions & 0 deletions tests/tests/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use logos::{Lexer, Logos as _};
use logos_derive::Logos;

#[derive(Logos, Clone, Debug, PartialEq)]
#[logos(skip " ")]
pub enum Token {
#[regex(r#""([^"\\]+|\\.)*""#, lex_single_line_string)]
String(String),
}

#[test]
fn test_it_works_without_cloning() {
let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#);
assert_eq!(
lexer.next(),
Some(Ok(Token::String("Hello, world!".to_string())))
);
assert_eq!(
lexer.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(lexer.next(), None);
}

#[test]
fn test_it_works_with_cloning() {
let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#);
let mut lexer2 = lexer.clone();
assert_eq!(
lexer2.next(),
Some(Ok(Token::String("Hello, world!".to_string())))
);
let mut lexer3 = lexer2.clone();
let mut lexer4 = lexer3.clone();
assert_eq!(
lexer3.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(
lexer4.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(lexer4.next(), None);
let mut lexer5 = lexer.clone();
assert_eq!(
lexer5.next(),
Some(Ok(Token::String("Hello, world!".to_string())))
);
assert_eq!(
lexer5.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(
lexer.next(),
Some(Ok(Token::String("Hello, world!".to_string())))
);
assert_eq!(lexer5.next(), None);
assert_eq!(
lexer2.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(lexer2.next(), None);
assert_eq!(lexer3.next(), None);
assert_eq!(
lexer.next(),
Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
);
assert_eq!(lexer.next(), None);
}

// Not important
pub fn lex_single_line_string(lexer: &mut Lexer<Token>) -> Result<String, ()> {
let mut string = String::new();
let mut chars = lexer.slice()[1..lexer.slice().len() - 1].chars();
while let Some(c) = chars.next() {
match c {
'\\' => {
let c = chars.next().ok_or(())?;
match c {
'\n' => {}
'n' => string.push('\n'),
'r' => string.push('\r'),
't' => string.push('\t'),
'0' => string.push('\0'),
'\'' | '"' | '\\' => string.push(c),
'x' => {
let mut hex = String::new();
hex.push(chars.next().ok_or(())?);
hex.push(chars.next().ok_or(())?);
let code = u8::from_str_radix(&hex, 16).map_err(|_| ())?;
if code > 0x7F {
return Err(());
}
string.push(code as char);
}
'u' => {
if chars.next() != Some('{') {
return Err(());
}
let mut hex = String::new();
for _ in 0..6 {
let c = chars.next().ok_or(())?;
if c == '}' {
break;
}
hex.push(c);
}
let code = u32::from_str_radix(&hex, 16).map_err(|_| ())?;
string.push(char::from_u32(code).ok_or(())?);
}
_ => return Err(()),
}
}
_ => string.push(c),
}
}
Ok(string)
}
Loading