Skip to content

Commit

Permalink
auto merge of #5559 : jbclements/rust/change-to-tt-based-parsing, r=j…
Browse files Browse the repository at this point in the history
…bclements

Changes the parser to parse all streams into token-trees before hitting the parser proper, in preparation for hygiene.  As an added bonus, it appears to speed up the parser (albeit by a totally imperceptible 1%).

Also, many comments in the parser.
Also, field renaming in token-trees (readme->forest, cur->stack).
  • Loading branch information
bors committed Apr 3, 2013
2 parents 260d74d + f2e47cd commit 6153aae
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 34 deletions.
2 changes: 1 addition & 1 deletion src/librustc/driver/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ pub fn parse_input(sess: Session, +cfg: ast::crate_cfg, input: input)
-> @ast::crate {
match input {
file_input(ref file) => {
parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess)
parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess)
}
str_input(ref src) => {
// FIXME (#2319): Don't really want to box the source string
Expand Down
57 changes: 29 additions & 28 deletions src/libsyntax/ext/tt/transcribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use core::vec;
`~` */
///an unzipping of `token_tree`s
struct TtFrame {
readme: @mut ~[ast::token_tree],
forest: @mut ~[ast::token_tree],
idx: uint,
dotdotdoted: bool,
sep: Option<Token>,
Expand All @@ -37,7 +37,7 @@ pub struct TtReader {
sp_diag: @span_handler,
interner: @ident_interner,
// the unzipped tree:
cur: @mut TtFrame,
stack: @mut TtFrame,
/* for MBE-style macro transcription */
interpolations: LinearMap<ident, @named_match>,
repeat_idx: ~[uint],
Expand All @@ -58,8 +58,8 @@ pub fn new_tt_reader(sp_diag: @span_handler,
let r = @mut TtReader {
sp_diag: sp_diag,
interner: itr,
cur: @mut TtFrame {
readme: @mut src,
stack: @mut TtFrame {
forest: @mut src,
idx: 0u,
dotdotdoted: false,
sep: None,
Expand All @@ -81,7 +81,7 @@ pub fn new_tt_reader(sp_diag: @span_handler,

fn dup_tt_frame(f: @mut TtFrame) -> @mut TtFrame {
@mut TtFrame {
readme: @mut (copy *f.readme),
forest: @mut (copy *f.forest),
idx: f.idx,
dotdotdoted: f.dotdotdoted,
sep: copy f.sep,
Expand All @@ -96,7 +96,7 @@ pub fn dup_tt_reader(r: @mut TtReader) -> @mut TtReader {
@mut TtReader {
sp_diag: r.sp_diag,
interner: r.interner,
cur: dup_tt_frame(r.cur),
stack: dup_tt_frame(r.stack),
interpolations: r.interpolations,
repeat_idx: copy r.repeat_idx,
repeat_len: copy r.repeat_len,
Expand Down Expand Up @@ -167,45 +167,46 @@ fn lockstep_iter_size(t: token_tree, r: &mut TtReader) -> lis {
}
}


// return the next token from the TtReader.
// EFFECT: advances the reader's token field
pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
let ret_val = TokenAndSpan {
tok: copy r.cur_tok,
sp: r.cur_span,
};
loop {
{
let cur = &mut *r.cur;
let readme = &mut *cur.readme;
if cur.idx < readme.len() {
let stack = &mut *r.stack;
let forest = &mut *stack.forest;
if stack.idx < forest.len() {
break;
}
}

/* done with this set; pop or repeat? */
if ! r.cur.dotdotdoted
if ! r.stack.dotdotdoted
|| { *r.repeat_idx.last() == *r.repeat_len.last() - 1 } {

match r.cur.up {
match r.stack.up {
None => {
r.cur_tok = EOF;
return ret_val;
}
Some(tt_f) => {
if r.cur.dotdotdoted {
if r.stack.dotdotdoted {
r.repeat_idx.pop();
r.repeat_len.pop();
}

r.cur = tt_f;
r.cur.idx += 1u;
r.stack = tt_f;
r.stack.idx += 1u;
}
}

} else { /* repeat */
r.cur.idx = 0u;
r.stack.idx = 0u;
r.repeat_idx[r.repeat_idx.len() - 1u] += 1u;
match r.cur.sep {
match r.stack.sep {
Some(copy tk) => {
r.cur_tok = tk; /* repeat same span, I guess */
return ret_val;
Expand All @@ -216,21 +217,21 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
}
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_tok`, even though it won't happen */
match r.cur.readme[r.cur.idx] {
match r.stack.forest[r.stack.idx] {
tt_delim(copy tts) => {
r.cur = @mut TtFrame {
readme: @mut tts,
r.stack = @mut TtFrame {
forest: @mut tts,
idx: 0u,
dotdotdoted: false,
sep: None,
up: option::Some(r.cur)
up: option::Some(r.stack)
};
// if this could be 0-length, we'd need to potentially recur here
}
tt_tok(sp, copy tok) => {
r.cur_span = sp;
r.cur_tok = tok;
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
tt_seq(sp, copy tts, copy sep, zerok) => {
Expand All @@ -256,17 +257,17 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
once");
}

r.cur.idx += 1u;
r.stack.idx += 1u;
return tt_next_token(r);
} else {
r.repeat_len.push(len);
r.repeat_idx.push(0u);
r.cur = @mut TtFrame {
readme: @mut tts,
r.stack = @mut TtFrame {
forest: @mut tts,
idx: 0u,
dotdotdoted: true,
sep: sep,
up: Some(r.cur)
up: Some(r.stack)
};
}
}
Expand All @@ -280,13 +281,13 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
(b) we actually can, since it's a token. */
matched_nonterminal(nt_ident(sn,b)) => {
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
matched_nonterminal(ref other_whole_nt) => {
r.cur_span = sp;
r.cur_tok = INTERPOLATED(copy *other_whole_nt);
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
matched_seq(*) => {
Expand Down
15 changes: 14 additions & 1 deletion src/libsyntax/parse/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ pub impl Parser {
}
}

// if the given word is not a keyword, signal an error.
// if the next token is the given keyword, eat it and return
// true. Otherwise, return false.
fn eat_keyword(&self, word: &~str) -> bool {
self.require_keyword(word);
let is_kw = match *self.token {
Expand All @@ -169,6 +172,9 @@ pub impl Parser {
is_kw
}

// if the given word is not a keyword, signal an error.
// if the next token is not the given word, signal an error.
// otherwise, eat it.
fn expect_keyword(&self, word: &~str) {
self.require_keyword(word);
if !self.eat_keyword(word) {
Expand All @@ -182,10 +188,12 @@ pub impl Parser {
}
}

// return true if the given string is a strict keyword
fn is_strict_keyword(&self, word: &~str) -> bool {
self.strict_keywords.contains(word)
}

// signal an error if the current token is a strict keyword
fn check_strict_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
Expand All @@ -196,16 +204,19 @@ pub impl Parser {
}
}

// signal an error if the given string is a strict keyword
fn check_strict_keywords_(&self, w: &~str) {
if self.is_strict_keyword(w) {
self.fatal(fmt!("found `%s` in ident position", *w));
}
}

// return true if this is a reserved keyword
fn is_reserved_keyword(&self, word: &~str) -> bool {
self.reserved_keywords.contains(word)
}

// signal an error if the current token is a reserved keyword
fn check_reserved_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
Expand All @@ -216,14 +227,16 @@ pub impl Parser {
}
}

// signal an error if the given string is a reserved keyword
fn check_reserved_keywords_(&self, w: &~str) {
if self.is_reserved_keyword(w) {
self.fatal(fmt!("`%s` is a reserved keyword", *w));
}
}

// expect and consume a GT. if a >> is seen, replace it
// with a single > and continue.
// with a single > and continue. If a GT is not seen,
// signal an error.
fn expect_gt(&self) {
if *self.token == token::GT {
self.bump();
Expand Down
7 changes: 6 additions & 1 deletion src/libsyntax/parse/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ pub fn new_low_level_string_reader(span_diagnostic: @span_handler,
last_pos: filemap.start_pos,
col: CharPos(0),
curr: initial_char,
filemap: filemap, interner: itr,
filemap: filemap,
interner: itr,
/* dummy values; not read */
peek_tok: token::EOF,
peek_span: codemap::dummy_sp()
Expand Down Expand Up @@ -150,6 +151,7 @@ impl reader for TtReader {
}

// EFFECT: advance peek_tok and peek_span to refer to the next token.
// EFFECT: update the interner, maybe.
fn string_advance_token(r: @mut StringReader) {
match (consume_whitespace_and_comments(r)) {
Some(comment) => {
Expand Down Expand Up @@ -539,6 +541,9 @@ fn ident_continue(c: char) -> bool {
|| (c > 'z' && char::is_XID_continue(c))
}

// return the next token from the string
// EFFECT: advances the input past that token
// EFFECT: updates the interner
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
let mut accum_str = ~"";
let mut c = rdr.curr;
Expand Down
52 changes: 49 additions & 3 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ pub mod classify;
/// Reporting obsolete syntax
pub mod obsolete;

// info about a parsing session.
// This structure and the reader both have
// an interner associated with them. If they're
// not the same, bad things can happen.
pub struct ParseSess {
cm: @codemap::CodeMap,
cm: @codemap::CodeMap, // better be the same as the one in the reader!
next_id: node_id,
span_diagnostic: @span_handler,
span_diagnostic: @span_handler, // better be the same as the one in the reader!
interner: @ident_interner,
}

Expand Down Expand Up @@ -90,6 +94,19 @@ pub fn parse_crate_from_file(
// why is there no p.abort_if_errors here?
}

pub fn parse_crate_from_file_using_tts(
input: &Path,
cfg: ast::crate_cfg,
sess: @mut ParseSess
) -> @ast::crate {
let p = new_parser_from_file(sess, /*bad*/ copy cfg, input);
let tts = p.parse_all_token_trees();
new_parser_from_tts(sess,cfg,tts).parse_crate_mod(/*bad*/ copy cfg)
// why is there no p.abort_if_errors here?
}



pub fn parse_crate_from_source_str(
name: ~str,
source: @~str,
Expand Down Expand Up @@ -313,17 +330,46 @@ mod test {
use std;
use core::io;
use core::option::None;
use ast;

#[test] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str {
do io::with_str_writer |writer| {
val.encode(~std::json::Encoder(writer));
}
}

fn string_to_crate (source_str : @~str) -> @ast::crate {
parse_crate_from_source_str(
~"bogofile",
source_str,
~[],
new_parse_sess(None))
}

fn string_to_tt_to_crate (source_str : @~str) -> @ast::crate {
let tts = parse_tts_from_source_str(
~"bogofile",
source_str,
~[],
new_parse_sess(None));
new_parser_from_tts(new_parse_sess(None),~[],tts)
.parse_crate_mod(~[])
}

// make sure that parsing from TTs produces the same result
// as parsing from strings
#[test] fn tts_produce_the_same_result () {
let source_str = @~"fn foo (x : int) { x; }";
assert_eq!(string_to_tt_to_crate(source_str),
string_to_crate(source_str));
}

// check the contents of the tt manually:
#[test] fn alltts () {
let source_str = @~"fn foo (x : int) { x; }";
let tts = parse_tts_from_source_str(
~"bogofile",
@~"fn foo (x : int) { x; }",
source_str,
~[],
new_parse_sess(None));
assert_eq!(
Expand Down
Loading

0 comments on commit 6153aae

Please sign in to comment.