Skip to content

Commit

Permalink
Merge #222
Browse files Browse the repository at this point in the history
222: Validate string literals r=aochagavia a=aochagavia

Related: #6 (some validators are still missing), fixes #27

Co-authored-by: Adolfo Ochagavía <[email protected]>
  • Loading branch information
bors[bot] and aochagavia committed Nov 10, 2018
2 parents 5a9150d + 3b4c02c commit 477de79
Show file tree
Hide file tree
Showing 9 changed files with 621 additions and 274 deletions.
37 changes: 37 additions & 0 deletions crates/ra_syntax/src/ast/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3236,6 +3236,43 @@ impl<'a> AstNode<'a> for Stmt<'a> {

impl<'a> Stmt<'a> {}

// String
#[derive(Debug, Clone, Copy,)]
pub struct StringNode<R: TreeRoot<RaTypes> = OwnedRoot> {
pub(crate) syntax: SyntaxNode<R>,
}
pub type String<'a> = StringNode<RefRoot<'a>>;

impl<R1: TreeRoot<RaTypes>, R2: TreeRoot<RaTypes>> PartialEq<StringNode<R1>> for StringNode<R2> {
fn eq(&self, other: &StringNode<R1>) -> bool { self.syntax == other.syntax }
}
impl<R: TreeRoot<RaTypes>> Eq for StringNode<R> {}
impl<R: TreeRoot<RaTypes>> Hash for StringNode<R> {
fn hash<H: Hasher>(&self, state: &mut H) { self.syntax.hash(state) }
}

impl<'a> AstNode<'a> for String<'a> {
fn cast(syntax: SyntaxNodeRef<'a>) -> Option<Self> {
match syntax.kind() {
STRING => Some(String { syntax }),
_ => None,
}
}
fn syntax(self) -> SyntaxNodeRef<'a> { self.syntax }
}

impl<R: TreeRoot<RaTypes>> StringNode<R> {
pub fn borrowed(&self) -> String {
StringNode { syntax: self.syntax.borrowed() }
}
pub fn owned(&self) -> StringNode {
StringNode { syntax: self.syntax.owned() }
}
}


impl<'a> String<'a> {}

// StructDef
#[derive(Debug, Clone, Copy,)]
pub struct StructDefNode<R: TreeRoot<RaTypes> = OwnedRoot> {
Expand Down
9 changes: 8 additions & 1 deletion crates/ra_syntax/src/ast/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod generated;

use std::marker::PhantomData;
use std::string::String as RustString;

use itertools::Itertools;

Expand Down Expand Up @@ -76,7 +77,7 @@ pub trait DocCommentsOwner<'a>: AstNode<'a> {

/// Returns the textual content of a doc comment block as a single string.
/// That is, strips leading `///` and joins lines
fn doc_comment_text(self) -> String {
fn doc_comment_text(self) -> RustString {
self.doc_comments()
.map(|comment| {
let prefix = comment.prefix();
Expand Down Expand Up @@ -133,6 +134,12 @@ impl<'a> Char<'a> {
}
}

impl<'a> String<'a> {
pub fn text(&self) -> &SmolStr {
&self.syntax().leaf_text().unwrap()
}
}

impl<'a> Comment<'a> {
pub fn text(&self) -> &SmolStr {
self.syntax().leaf_text().unwrap()
Expand Down
1 change: 1 addition & 0 deletions crates/ra_syntax/src/grammar.ron
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ Grammar(
"PrefixExpr": (),
"RangeExpr": (),
"BinExpr": (),
"String": (),
"Char": (),
"Literal": (),

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,68 @@
use self::CharComponentKind::*;
use rowan::{TextRange, TextUnit};

pub fn parse_string_literal(src: &str) -> StringComponentIterator {
StringComponentIterator {
parser: Parser::new(src),
has_closing_quote: false,
}
}

#[derive(Debug, Eq, PartialEq, Clone)]
pub struct StringComponent {
pub range: TextRange,
pub kind: StringComponentKind,
}

impl StringComponent {
fn new(range: TextRange, kind: StringComponentKind) -> StringComponent {
StringComponent { range, kind }
}
}

#[derive(Debug, Eq, PartialEq, Clone)]
pub enum StringComponentKind {
IgnoreNewline,
Char(CharComponentKind),
}

pub struct StringComponentIterator<'a> {
parser: Parser<'a>,
pub has_closing_quote: bool,
}

impl<'a> Iterator for StringComponentIterator<'a> {
type Item = StringComponent;
fn next(&mut self) -> Option<StringComponent> {
if self.parser.pos == 0 {
assert!(
self.parser.advance() == '"',
"string literal should start with double quotes"
);
}

if let Some(component) = self.parser.parse_string_component() {
return Some(component);
}

// We get here when there are no char components left to parse
if self.parser.peek() == Some('"') {
self.parser.advance();
self.has_closing_quote = true;
}

assert!(
self.parser.peek() == None,
"string literal should leave no unparsed input: src = {}, pos = {}, length = {}",
self.parser.src,
self.parser.pos,
self.parser.src.len()
);

None
}
}

pub fn parse_char_literal(src: &str) -> CharComponentIterator {
CharComponentIterator {
parser: Parser::new(src),
Expand Down Expand Up @@ -93,6 +155,12 @@ impl<'a> Parser<'a> {
next
}

pub fn skip_whitespace(&mut self) {
while self.peek().map(|c| c.is_whitespace()) == Some(true) {
self.advance();
}
}

pub fn get_pos(&self) -> TextUnit {
(self.pos as u32).into()
}
Expand Down Expand Up @@ -172,6 +240,51 @@ impl<'a> Parser<'a> {
))
}
}

pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> {
// In string literals, when a `\` occurs immediately before the newline, the `\`,
// the newline, and all whitespace at the beginning of the next line are ignored
match self.peek() {
Some('\n') | Some('\r') => {
self.skip_whitespace();
Some(StringComponent::new(
TextRange::from_to(start, self.get_pos()),
StringComponentKind::IgnoreNewline,
))
}
_ => None,
}
}

pub fn parse_string_component(&mut self) -> Option<StringComponent> {
let next = self.peek()?;

// Ignore string close
if next == '"' {
return None;
}

let start = self.get_pos();
self.advance();

if next == '\\' {
// Strings can use `\` to ignore newlines, so we first try to parse one of those
// before falling back to parsing char escapes
self.parse_ignore_newline(start).or_else(|| {
let char_component = self.parse_escape(start);
Some(StringComponent::new(
char_component.range,
StringComponentKind::Char(char_component.kind),
))
})
} else {
let end = self.get_pos();
Some(StringComponent::new(
TextRange::from_to(start, end),
StringComponentKind::Char(CodePoint),
))
}
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 477de79

Please sign in to comment.