Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change to token-tree-based parsing #5559

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/librustc/driver/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ pub fn parse_input(sess: Session, +cfg: ast::crate_cfg, input: input)
-> @ast::crate {
match input {
file_input(ref file) => {
parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess)
parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess)
}
str_input(ref src) => {
// FIXME (#2319): Don't really want to box the source string
Expand Down
57 changes: 29 additions & 28 deletions src/libsyntax/ext/tt/transcribe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use core::vec;
`~` */
///an unzipping of `token_tree`s
struct TtFrame {
readme: @mut ~[ast::token_tree],
forest: @mut ~[ast::token_tree],
idx: uint,
dotdotdoted: bool,
sep: Option<Token>,
Expand All @@ -37,7 +37,7 @@ pub struct TtReader {
sp_diag: @span_handler,
interner: @ident_interner,
// the unzipped tree:
cur: @mut TtFrame,
stack: @mut TtFrame,
/* for MBE-style macro transcription */
interpolations: LinearMap<ident, @named_match>,
repeat_idx: ~[uint],
Expand All @@ -58,8 +58,8 @@ pub fn new_tt_reader(sp_diag: @span_handler,
let r = @mut TtReader {
sp_diag: sp_diag,
interner: itr,
cur: @mut TtFrame {
readme: @mut src,
stack: @mut TtFrame {
forest: @mut src,
idx: 0u,
dotdotdoted: false,
sep: None,
Expand All @@ -81,7 +81,7 @@ pub fn new_tt_reader(sp_diag: @span_handler,

fn dup_tt_frame(f: @mut TtFrame) -> @mut TtFrame {
@mut TtFrame {
readme: @mut (copy *f.readme),
forest: @mut (copy *f.forest),
idx: f.idx,
dotdotdoted: f.dotdotdoted,
sep: copy f.sep,
Expand All @@ -96,7 +96,7 @@ pub fn dup_tt_reader(r: @mut TtReader) -> @mut TtReader {
@mut TtReader {
sp_diag: r.sp_diag,
interner: r.interner,
cur: dup_tt_frame(r.cur),
stack: dup_tt_frame(r.stack),
interpolations: r.interpolations,
repeat_idx: copy r.repeat_idx,
repeat_len: copy r.repeat_len,
Expand Down Expand Up @@ -167,45 +167,46 @@ fn lockstep_iter_size(t: token_tree, r: &mut TtReader) -> lis {
}
}


// return the next token from the TtReader.
// EFFECT: advances the reader's token field
pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
let ret_val = TokenAndSpan {
tok: copy r.cur_tok,
sp: r.cur_span,
};
loop {
{
let cur = &mut *r.cur;
let readme = &mut *cur.readme;
if cur.idx < readme.len() {
let stack = &mut *r.stack;
let forest = &mut *stack.forest;
if stack.idx < forest.len() {
break;
}
}

/* done with this set; pop or repeat? */
if ! r.cur.dotdotdoted
if ! r.stack.dotdotdoted
|| { *r.repeat_idx.last() == *r.repeat_len.last() - 1 } {

match r.cur.up {
match r.stack.up {
None => {
r.cur_tok = EOF;
return ret_val;
}
Some(tt_f) => {
if r.cur.dotdotdoted {
if r.stack.dotdotdoted {
r.repeat_idx.pop();
r.repeat_len.pop();
}

r.cur = tt_f;
r.cur.idx += 1u;
r.stack = tt_f;
r.stack.idx += 1u;
}
}

} else { /* repeat */
r.cur.idx = 0u;
r.stack.idx = 0u;
r.repeat_idx[r.repeat_idx.len() - 1u] += 1u;
match r.cur.sep {
match r.stack.sep {
Some(copy tk) => {
r.cur_tok = tk; /* repeat same span, I guess */
return ret_val;
Expand All @@ -216,21 +217,21 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
}
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_tok`, even though it won't happen */
match r.cur.readme[r.cur.idx] {
match r.stack.forest[r.stack.idx] {
tt_delim(copy tts) => {
r.cur = @mut TtFrame {
readme: @mut tts,
r.stack = @mut TtFrame {
forest: @mut tts,
idx: 0u,
dotdotdoted: false,
sep: None,
up: option::Some(r.cur)
up: option::Some(r.stack)
};
// if this could be 0-length, we'd need to potentially recur here
}
tt_tok(sp, copy tok) => {
r.cur_span = sp;
r.cur_tok = tok;
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
tt_seq(sp, copy tts, copy sep, zerok) => {
Expand All @@ -256,17 +257,17 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
once");
}

r.cur.idx += 1u;
r.stack.idx += 1u;
return tt_next_token(r);
} else {
r.repeat_len.push(len);
r.repeat_idx.push(0u);
r.cur = @mut TtFrame {
readme: @mut tts,
r.stack = @mut TtFrame {
forest: @mut tts,
idx: 0u,
dotdotdoted: true,
sep: sep,
up: Some(r.cur)
up: Some(r.stack)
};
}
}
Expand All @@ -280,13 +281,13 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan {
(b) we actually can, since it's a token. */
matched_nonterminal(nt_ident(sn,b)) => {
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
matched_nonterminal(ref other_whole_nt) => {
r.cur_span = sp;
r.cur_tok = INTERPOLATED(copy *other_whole_nt);
r.cur.idx += 1u;
r.stack.idx += 1u;
return ret_val;
}
matched_seq(*) => {
Expand Down
15 changes: 14 additions & 1 deletion src/libsyntax/parse/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ pub impl Parser {
}
}

// if the given word is not a keyword, signal an error.
// if the next token is the given keyword, eat it and return
// true. Otherwise, return false.
fn eat_keyword(&self, word: &~str) -> bool {
self.require_keyword(word);
let is_kw = match *self.token {
Expand All @@ -169,6 +172,9 @@ pub impl Parser {
is_kw
}

// if the given word is not a keyword, signal an error.
// if the next token is not the given word, signal an error.
// otherwise, eat it.
fn expect_keyword(&self, word: &~str) {
self.require_keyword(word);
if !self.eat_keyword(word) {
Expand All @@ -182,10 +188,12 @@ pub impl Parser {
}
}

// return true if the given string is a strict keyword
fn is_strict_keyword(&self, word: &~str) -> bool {
self.strict_keywords.contains(word)
}

// signal an error if the current token is a strict keyword
fn check_strict_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
Expand All @@ -196,16 +204,19 @@ pub impl Parser {
}
}

// signal an error if the given string is a strict keyword
fn check_strict_keywords_(&self, w: &~str) {
if self.is_strict_keyword(w) {
self.fatal(fmt!("found `%s` in ident position", *w));
}
}

// return true if this is a reserved keyword
fn is_reserved_keyword(&self, word: &~str) -> bool {
self.reserved_keywords.contains(word)
}

// signal an error if the current token is a reserved keyword
fn check_reserved_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
Expand All @@ -216,14 +227,16 @@ pub impl Parser {
}
}

// signal an error if the given string is a reserved keyword
fn check_reserved_keywords_(&self, w: &~str) {
if self.is_reserved_keyword(w) {
self.fatal(fmt!("`%s` is a reserved keyword", *w));
}
}

// expect and consume a GT. if a >> is seen, replace it
// with a single > and continue.
// with a single > and continue. If a GT is not seen,
// signal an error.
fn expect_gt(&self) {
if *self.token == token::GT {
self.bump();
Expand Down
7 changes: 6 additions & 1 deletion src/libsyntax/parse/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ pub fn new_low_level_string_reader(span_diagnostic: @span_handler,
last_pos: filemap.start_pos,
col: CharPos(0),
curr: initial_char,
filemap: filemap, interner: itr,
filemap: filemap,
interner: itr,
/* dummy values; not read */
peek_tok: token::EOF,
peek_span: codemap::dummy_sp()
Expand Down Expand Up @@ -150,6 +151,7 @@ impl reader for TtReader {
}

// EFFECT: advance peek_tok and peek_span to refer to the next token.
// EFFECT: update the interner, maybe.
fn string_advance_token(r: @mut StringReader) {
match (consume_whitespace_and_comments(r)) {
Some(comment) => {
Expand Down Expand Up @@ -539,6 +541,9 @@ fn ident_continue(c: char) -> bool {
|| (c > 'z' && char::is_XID_continue(c))
}

// return the next token from the string
// EFFECT: advances the input past that token
// EFFECT: updates the interner
fn next_token_inner(rdr: @mut StringReader) -> token::Token {
let mut accum_str = ~"";
let mut c = rdr.curr;
Expand Down
52 changes: 49 additions & 3 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,14 @@ pub mod classify;
/// Reporting obsolete syntax
pub mod obsolete;

// info about a parsing session.
// This structure and the reader both have
// an interner associated with them. If they're
// not the same, bad things can happen.
pub struct ParseSess {
cm: @codemap::CodeMap,
cm: @codemap::CodeMap, // better be the same as the one in the reader!
next_id: node_id,
span_diagnostic: @span_handler,
span_diagnostic: @span_handler, // better be the same as the one in the reader!
interner: @ident_interner,
}

Expand Down Expand Up @@ -90,6 +94,19 @@ pub fn parse_crate_from_file(
// why is there no p.abort_if_errors here?
}

pub fn parse_crate_from_file_using_tts(
input: &Path,
cfg: ast::crate_cfg,
sess: @mut ParseSess
) -> @ast::crate {
let p = new_parser_from_file(sess, /*bad*/ copy cfg, input);
let tts = p.parse_all_token_trees();
new_parser_from_tts(sess,cfg,tts).parse_crate_mod(/*bad*/ copy cfg)
// why is there no p.abort_if_errors here?
}



pub fn parse_crate_from_source_str(
name: ~str,
source: @~str,
Expand Down Expand Up @@ -313,17 +330,46 @@ mod test {
use std;
use core::io;
use core::option::None;
use ast;

#[test] fn to_json_str<E : Encodable<std::json::Encoder>>(val: @E) -> ~str {
do io::with_str_writer |writer| {
val.encode(~std::json::Encoder(writer));
}
}

fn string_to_crate (source_str : @~str) -> @ast::crate {
parse_crate_from_source_str(
~"bogofile",
source_str,
~[],
new_parse_sess(None))
}

fn string_to_tt_to_crate (source_str : @~str) -> @ast::crate {
let tts = parse_tts_from_source_str(
~"bogofile",
source_str,
~[],
new_parse_sess(None));
new_parser_from_tts(new_parse_sess(None),~[],tts)
.parse_crate_mod(~[])
}

// make sure that parsing from TTs produces the same result
// as parsing from strings
#[test] fn tts_produce_the_same_result () {
let source_str = @~"fn foo (x : int) { x; }";
assert_eq!(string_to_tt_to_crate(source_str),
string_to_crate(source_str));
}

// check the contents of the tt manually:
#[test] fn alltts () {
let source_str = @~"fn foo (x : int) { x; }";
let tts = parse_tts_from_source_str(
~"bogofile",
@~"fn foo (x : int) { x; }",
source_str,
~[],
new_parse_sess(None));
assert_eq!(
Expand Down
Loading