From 568874bc1007162276243e613324c710c72ec932 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Sun, 6 Nov 2016 19:37:56 -0700 Subject: [PATCH 01/10] Cleanup macro_parser::parse, removing a few clones. --- src/libsyntax/ext/tt/macro_parser.rs | 86 +++++++++++----------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 1066646aa8e8a..da9692391dd87 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -287,15 +287,8 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes let mut next_eis = Vec::new(); // or proceed normally let mut eof_eis = Vec::new(); - let (sp, tok) = (parser.span, parser.token.clone()); - - /* we append new items to this while we go */ - loop { - let mut ei = match cur_eis.pop() { - None => break, /* for each Earley Item */ - Some(ei) => ei, - }; - + // for each Earley item + while let Some(mut ei) = cur_eis.pop() { // When unzipped trees end, remove them while ei.idx >= ei.top_elts.len() { match ei.stack.pop() { @@ -317,7 +310,6 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes // hack: a matcher sequence is repeating iff it has a // parent (the top level is just a container) - // disregard separator, try to go up // (remove this condition to make trailing seps ok) if idx == len { @@ -334,10 +326,10 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes // Only touch the binders we have actually bound for idx in ei.match_lo..ei.match_hi { - let sub = (ei.matches[idx]).clone(); - (&mut new_pos.matches[idx]) + let sub = ei.matches[idx].clone(); + new_pos.matches[idx] .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo, - sp.hi)))); + parser.span.hi)))); } new_pos.match_cur = ei.match_hi; @@ -347,25 +339,21 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes // can we go around again? - // the *_t vars are workarounds for the lack of unary move - match ei.sep { - Some(ref t) if idx == len => { // we need a separator - // i'm conflicted about whether this should be hygienic.... - // though in this case, if the separators are never legal - // idents, it shouldn't matter. - if token_name_eq(&tok, t) { //pass the separator - let mut ei_t = ei.clone(); - // ei_t.match_cur = ei_t.match_lo; - ei_t.idx += 1; - next_eis.push(ei_t); - } - } - _ => { // we don't need a separator - let mut ei_t = ei; - ei_t.match_cur = ei_t.match_lo; - ei_t.idx = 0; - cur_eis.push(ei_t); + // Check if we need a separator + if idx == len && ei.sep.is_some() { + if ei.sep.as_ref().map(|ref sep| token_name_eq(&parser.token, sep)) + .unwrap_or(false) { + // i'm conflicted about whether this should be hygienic.... though in + // this case, if the separators are never legal idents, it shouldn't + // matter. + // ei.match_cur = ei.match_lo; + ei.idx += 1; + next_eis.push(ei); } + } else { // we don't need a separator + ei.match_cur = ei.match_lo; + ei.idx = 0; + cur_eis.push(ei); } } else { eof_eis.push(ei); @@ -380,7 +368,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes new_ei.idx += 1; //we specifically matched zero repeats. for idx in ei.match_cur..ei.match_cur + seq.num_captures { - (&mut new_ei.matches[idx]).push(Rc::new(MatchedSeq(vec![], sp))); + new_ei.matches[idx].push(Rc::new(MatchedSeq(vec![], sp))); } cur_eis.push(new_ei); @@ -388,16 +376,15 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes let matches: Vec<_> = (0..ei.matches.len()) .map(|_| Vec::new()).collect(); - let ei_t = ei; cur_eis.push(Box::new(MatcherPos { stack: vec![], sep: seq.separator.clone(), idx: 0, matches: matches, - match_lo: ei_t.match_cur, - match_cur: ei_t.match_cur, - match_hi: ei_t.match_cur + seq.num_captures, - up: Some(ei_t), + match_lo: ei.match_cur, + match_cur: ei.match_cur, + match_hi: ei.match_cur + seq.num_captures, + up: Some(ei), sp_lo: sp.lo, top_elts: Tt(TokenTree::Sequence(sp, seq)), })); @@ -405,7 +392,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes TokenTree::Token(_, MatchNt(..)) => { // Built-in nonterminals never start with these tokens, // so we can eliminate them from consideration. - match tok { + match parser.token { token::CloseDelim(_) => {}, _ => bb_eis.push(ei), } @@ -424,10 +411,9 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes cur_eis.push(ei); } TokenTree::Token(_, ref t) => { - if token_name_eq(t,&tok) { - let mut ei_t = ei.clone(); - ei_t.idx += 1; - next_eis.push(ei_t); + if token_name_eq(t, &parser.token) { + ei.idx += 1; + next_eis.push(ei); } } } @@ -435,17 +421,15 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes } /* error messages here could be improved with links to orig. rules */ - if token_name_eq(&tok, &token::Eof) { + if token_name_eq(&parser.token, &token::Eof) { if eof_eis.len() == 1 { - let mut v = Vec::new(); - for dv in &mut (&mut eof_eis[0]).matches { - v.push(dv.pop().unwrap()); - } + let v = eof_eis[0].matches.iter_mut() + .map(|dv| dv.pop().unwrap()).collect::>(); return nameize(sess, ms, &v[..]); } else if eof_eis.len() > 1 { - return Error(sp, "ambiguity: multiple successful parses".to_string()); + return Error(parser.span, "ambiguity: multiple successful parses".to_string()); } else { - return Failure(sp, token::Eof); + return Failure(parser.span, token::Eof); } } else { if (!bb_eis.is_empty() && !next_eis.is_empty()) @@ -457,7 +441,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes _ => panic!() }).collect::>().join(" or "); - return Error(sp, format!( + return Error(parser.span, format!( "local ambiguity: multiple parsing options: {}", match next_eis.len() { 0 => format!("built-in NTs {}.", nts), @@ -466,7 +450,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes } )) } else if bb_eis.is_empty() && next_eis.is_empty() { - return Failure(sp, tok); + return Failure(parser.span, parser.token); } else if !next_eis.is_empty() { /* Now process the next token */ while !next_eis.is_empty() { From 7221b07a075aacad2b0d5962568000ba7defe5f9 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Mon, 7 Nov 2016 19:16:14 -0700 Subject: [PATCH 02/10] Remove unused argument from nameize. Also makes nameize non-public since it's only locally used. --- src/libsyntax/ext/tt/macro_parser.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index da9692391dd87..42c71d93a2849 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -200,20 +200,20 @@ pub enum NamedMatch { MatchedNonterminal(Rc) } -pub fn nameize(p_s: &ParseSess, ms: &[TokenTree], res: &[Rc]) +fn nameize(ms: &[TokenTree], res: &[Rc]) -> ParseResult>> { - fn n_rec(p_s: &ParseSess, m: &TokenTree, res: &[Rc], + fn n_rec(m: &TokenTree, res: &[Rc], ret_val: &mut HashMap>, idx: &mut usize) -> Result<(), (syntax_pos::Span, String)> { match *m { TokenTree::Sequence(_, ref seq) => { for next_m in &seq.tts { - n_rec(p_s, next_m, res, ret_val, idx)? + n_rec(next_m, res, ret_val, idx)? } } TokenTree::Delimited(_, ref delim) => { for next_m in &delim.tts { - n_rec(p_s, next_m, res, ret_val, idx)?; + n_rec(next_m, res, ret_val, idx)?; } } TokenTree::Token(sp, MatchNt(bind_name, _)) => { @@ -239,7 +239,7 @@ pub fn nameize(p_s: &ParseSess, ms: &[TokenTree], res: &[Rc]) let mut ret_val = HashMap::new(); let mut idx = 0; for m in ms { - match n_rec(p_s, m, res, &mut ret_val, &mut idx) { + match n_rec(m, res, &mut ret_val, &mut idx) { Ok(_) => {}, Err((sp, msg)) => return Error(sp, msg), } @@ -425,7 +425,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes if eof_eis.len() == 1 { let v = eof_eis[0].matches.iter_mut() .map(|dv| dv.pop().unwrap()).collect::>(); - return nameize(sess, ms, &v[..]); + return nameize(ms, &v[..]); } else if eof_eis.len() > 1 { return Error(parser.span, "ambiguity: multiple successful parses".to_string()); } else { From c9e6089d29cd3263897d91e12519b79ba8721f85 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Mon, 7 Nov 2016 19:17:17 -0700 Subject: [PATCH 03/10] Refactor to extending from a drain instead of while looping. --- src/libsyntax/ext/tt/macro_parser.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 42c71d93a2849..5f976336ccd37 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -453,9 +453,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes return Failure(parser.span, parser.token); } else if !next_eis.is_empty() { /* Now process the next token */ - while !next_eis.is_empty() { - cur_eis.push(next_eis.pop().unwrap()); - } + cur_eis.extend(next_eis.drain(..)); parser.bump(); } else /* bb_eis.len() == 1 */ { let mut ei = bb_eis.pop().unwrap(); From eef10d0b5b9e0788442fc6c8ecce57ae7f5a4047 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Mon, 7 Nov 2016 19:17:45 -0700 Subject: [PATCH 04/10] Clean up extraneous &mut. --- src/libsyntax/ext/tt/macro_parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 5f976336ccd37..9f055f33c3d07 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -459,7 +459,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes let mut ei = bb_eis.pop().unwrap(); if let TokenTree::Token(span, MatchNt(_, ident)) = ei.top_elts.get_tt(ei.idx) { let match_cur = ei.match_cur; - (&mut ei.matches[match_cur]).push(Rc::new(MatchedNonterminal( + ei.matches[match_cur].push(Rc::new(MatchedNonterminal( Rc::new(parse_nt(&mut parser, span, &ident.name.as_str()))))); ei.idx += 1; ei.match_cur += 1; From 68abb24e8d99f0fb7175c2102da3638814b6b2c7 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Mon, 7 Nov 2016 19:40:00 -0700 Subject: [PATCH 05/10] Factor out NamedParseResult. --- src/libsyntax/ext/tt/macro_parser.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 9f055f33c3d07..29e1ea1d1edd7 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -143,6 +143,8 @@ pub struct MatcherPos { sp_lo: BytePos, } +pub type NamedParseResult = ParseResult>>; + pub fn count_names(ms: &[TokenTree]) -> usize { ms.iter().fold(0, |count, elt| { count + match *elt { @@ -200,8 +202,7 @@ pub enum NamedMatch { MatchedNonterminal(Rc) } -fn nameize(ms: &[TokenTree], res: &[Rc]) - -> ParseResult>> { +fn nameize(ms: &[TokenTree], res: &[Rc]) -> NamedParseResult { fn n_rec(m: &TokenTree, res: &[Rc], ret_val: &mut HashMap>, idx: &mut usize) -> Result<(), (syntax_pos::Span, String)> { @@ -265,8 +266,6 @@ pub fn parse_failure_msg(tok: Token) -> String { } } -pub type NamedParseResult = ParseResult>>; - /// Perform a token equality check, ignoring syntax context (that is, an /// unhygienic comparison) pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { From 27c09864bd04bc3f65e8ce5721eaa5621ee9ac6a Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Thu, 10 Nov 2016 17:30:01 -0700 Subject: [PATCH 06/10] Refactor parse_nt. --- src/libsyntax/ext/tt/macro_parser.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 29e1ea1d1edd7..64acce19c1cc4 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -479,14 +479,19 @@ pub fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { p.quote_depth += 1; //but in theory, non-quoted tts might be useful let mut tt = panictry!(p.parse_token_tree()); p.quote_depth -= 1; - loop { - let nt = match tt { - TokenTree::Token(_, token::Interpolated(ref nt)) => nt.clone(), - _ => break, - }; - match *nt { - token::NtTT(ref sub_tt) => tt = sub_tt.clone(), - _ => break, + while let TokenTree::Token(sp, token::Interpolated(nt)) = tt { + if let token::NtTT(..) = *nt { + match Rc::try_unwrap(nt) { + Ok(token::NtTT(sub_tt)) => tt = sub_tt, + Ok(_) => unreachable!(), + Err(nt_rc) => match *nt_rc { + token::NtTT(ref sub_tt) => tt = sub_tt.clone(), + _ => unreachable!(), + }, + } + } else { + tt = TokenTree::Token(sp, token::Interpolated(nt.clone())); + break } } return token::NtTT(tt); From b8d6686ef3c2998d29c7ef531895ee05305cfef1 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Fri, 11 Nov 2016 16:28:47 -0700 Subject: [PATCH 07/10] Factor out inner current Earley item loop. Change multiple functions to be non-public. Change nameize to accept an iterator so as to avoid an allocation. --- src/libsyntax/ext/tt/macro_parser.rs | 299 ++++++++++++++------------- 1 file changed, 153 insertions(+), 146 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 64acce19c1cc4..3c57f7a05c29d 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -130,7 +130,7 @@ struct MatcherTtFrame { } #[derive(Clone)] -pub struct MatcherPos { +struct MatcherPos { stack: Vec, top_elts: TokenTreeOrTokenTreeVec, sep: Option, @@ -162,14 +162,13 @@ pub fn count_names(ms: &[TokenTree]) -> usize { }) } -pub fn initial_matcher_pos(ms: Vec, sep: Option, lo: BytePos) - -> Box { +fn initial_matcher_pos(ms: Vec, lo: BytePos) -> Box { let match_idx_hi = count_names(&ms[..]); - let matches: Vec<_> = (0..match_idx_hi).map(|_| Vec::new()).collect(); + let matches = create_matches(match_idx_hi); Box::new(MatcherPos { stack: vec![], top_elts: TtSeq(ms), - sep: sep, + sep: None, idx: 0, up: None, matches: matches, @@ -202,26 +201,25 @@ pub enum NamedMatch { MatchedNonterminal(Rc) } -fn nameize(ms: &[TokenTree], res: &[Rc]) -> NamedParseResult { - fn n_rec(m: &TokenTree, res: &[Rc], - ret_val: &mut HashMap>, idx: &mut usize) +fn nameize>>(ms: &[TokenTree], mut res: I) -> NamedParseResult { + fn n_rec>>(m: &TokenTree, mut res: &mut I, + ret_val: &mut HashMap>) -> Result<(), (syntax_pos::Span, String)> { match *m { TokenTree::Sequence(_, ref seq) => { for next_m in &seq.tts { - n_rec(next_m, res, ret_val, idx)? + n_rec(next_m, res.by_ref(), ret_val)? } } TokenTree::Delimited(_, ref delim) => { for next_m in &delim.tts { - n_rec(next_m, res, ret_val, idx)?; + n_rec(next_m, res.by_ref(), ret_val)?; } } TokenTree::Token(sp, MatchNt(bind_name, _)) => { match ret_val.entry(bind_name) { Vacant(spot) => { - spot.insert(res[*idx].clone()); - *idx += 1; + spot.insert(res.next().unwrap()); } Occupied(..) => { return Err((sp, format!("duplicated bind name: {}", bind_name))) @@ -238,9 +236,8 @@ fn nameize(ms: &[TokenTree], res: &[Rc]) -> NamedParseResult { } let mut ret_val = HashMap::new(); - let mut idx = 0; for m in ms { - match n_rec(m, res, &mut ret_val, &mut idx) { + match n_rec(m, res.by_ref(), &mut ret_val) { Ok(_) => {}, Err((sp, msg)) => return Error(sp, msg), } @@ -266,9 +263,8 @@ pub fn parse_failure_msg(tok: Token) -> String { } } -/// Perform a token equality check, ignoring syntax context (that is, an -/// unhygienic comparison) -pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { +/// Perform a token equality check, ignoring syntax context (that is, an unhygienic comparison) +fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { match (t1,t2) { (&token::Ident(id1),&token::Ident(id2)) | (&token::Lifetime(id1),&token::Lifetime(id2)) => @@ -277,154 +273,165 @@ pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { } } -pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseResult { - let mut parser = Parser::new_with_doc_flag(sess, Box::new(rdr), true); - let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), None, parser.span.lo)); - - loop { - let mut bb_eis = Vec::new(); // black-box parsed by parser.rs - let mut next_eis = Vec::new(); // or proceed normally - let mut eof_eis = Vec::new(); +fn create_matches(len: usize) -> Vec>> { + (0..len).into_iter().map(|_| Vec::new()).collect() +} - // for each Earley item - while let Some(mut ei) = cur_eis.pop() { - // When unzipped trees end, remove them - while ei.idx >= ei.top_elts.len() { - match ei.stack.pop() { - Some(MatcherTtFrame { elts, idx }) => { - ei.top_elts = elts; - ei.idx = idx + 1; - } - None => break +fn inner_parse_loop(cur_eis: &mut SmallVector>, + next_eis: &mut Vec>, + eof_eis: &mut Vec>, + bb_eis: &mut Vec>, + token: &Token, span: &syntax_pos::Span) -> ParseResult<()> { + while let Some(mut ei) = cur_eis.pop() { + // When unzipped trees end, remove them + while ei.idx >= ei.top_elts.len() { + match ei.stack.pop() { + Some(MatcherTtFrame { elts, idx }) => { + ei.top_elts = elts; + ei.idx = idx + 1; } + None => break } + } - let idx = ei.idx; - let len = ei.top_elts.len(); - - /* at end of sequence */ - if idx >= len { - // can't move out of `match`es, so: - if ei.up.is_some() { - // hack: a matcher sequence is repeating iff it has a - // parent (the top level is just a container) - - // disregard separator, try to go up - // (remove this condition to make trailing seps ok) - if idx == len { - // pop from the matcher position - - let mut new_pos = ei.up.clone().unwrap(); - - // update matches (the MBE "parse tree") by appending - // each tree as a subtree. - - // I bet this is a perf problem: we're preemptively - // doing a lot of array work that will get thrown away - // most of the time. - - // Only touch the binders we have actually bound - for idx in ei.match_lo..ei.match_hi { - let sub = ei.matches[idx].clone(); - new_pos.matches[idx] - .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo, - parser.span.hi)))); - } - - new_pos.match_cur = ei.match_hi; - new_pos.idx += 1; - cur_eis.push(new_pos); + let idx = ei.idx; + let len = ei.top_elts.len(); + + // at end of sequence + if idx >= len { + // We are repeating iff there is a parent + if ei.up.is_some() { + // Disregarding the separator, add the "up" case to the tokens that should be + // examined. + // (remove this condition to make trailing seps ok) + if idx == len { + let mut new_pos = ei.up.clone().unwrap(); + + // update matches (the MBE "parse tree") by appending + // each tree as a subtree. + + // I bet this is a perf problem: we're preemptively + // doing a lot of array work that will get thrown away + // most of the time. + + // Only touch the binders we have actually bound + for idx in ei.match_lo..ei.match_hi { + let sub = ei.matches[idx].clone(); + new_pos.matches[idx] + .push(Rc::new(MatchedSeq(sub, mk_sp(ei.sp_lo, + span.hi)))); } - // can we go around again? - - // Check if we need a separator - if idx == len && ei.sep.is_some() { - if ei.sep.as_ref().map(|ref sep| token_name_eq(&parser.token, sep)) - .unwrap_or(false) { - // i'm conflicted about whether this should be hygienic.... though in - // this case, if the separators are never legal idents, it shouldn't - // matter. - // ei.match_cur = ei.match_lo; - ei.idx += 1; - next_eis.push(ei); - } - } else { // we don't need a separator - ei.match_cur = ei.match_lo; - ei.idx = 0; - cur_eis.push(ei); - } - } else { - eof_eis.push(ei); + new_pos.match_cur = ei.match_hi; + new_pos.idx += 1; + cur_eis.push(new_pos); } - } else { - match ei.top_elts.get_tt(idx) { - /* need to descend into sequence */ - TokenTree::Sequence(sp, seq) => { - if seq.op == tokenstream::KleeneOp::ZeroOrMore { - let mut new_ei = ei.clone(); - new_ei.match_cur += seq.num_captures; - new_ei.idx += 1; - //we specifically matched zero repeats. - for idx in ei.match_cur..ei.match_cur + seq.num_captures { - new_ei.matches[idx].push(Rc::new(MatchedSeq(vec![], sp))); - } - - cur_eis.push(new_ei); - } - let matches: Vec<_> = (0..ei.matches.len()) - .map(|_| Vec::new()).collect(); - cur_eis.push(Box::new(MatcherPos { - stack: vec![], - sep: seq.separator.clone(), - idx: 0, - matches: matches, - match_lo: ei.match_cur, - match_cur: ei.match_cur, - match_hi: ei.match_cur + seq.num_captures, - up: Some(ei), - sp_lo: sp.lo, - top_elts: Tt(TokenTree::Sequence(sp, seq)), - })); + // Check if we need a separator + if idx == len && ei.sep.is_some() { + // We have a separator, and it is the current token. + if ei.sep.as_ref().map(|ref sep| token_name_eq(&token, sep)).unwrap_or(false) { + ei.idx += 1; + next_eis.push(ei); } - TokenTree::Token(_, MatchNt(..)) => { - // Built-in nonterminals never start with these tokens, - // so we can eliminate them from consideration. - match parser.token { - token::CloseDelim(_) => {}, - _ => bb_eis.push(ei), + } else { // we don't need a separator + ei.match_cur = ei.match_lo; + ei.idx = 0; + cur_eis.push(ei); + } + } else { + // We aren't repeating, so we must be potentially at the end of the input. + eof_eis.push(ei); + } + } else { + match ei.top_elts.get_tt(idx) { + /* need to descend into sequence */ + TokenTree::Sequence(sp, seq) => { + if seq.op == tokenstream::KleeneOp::ZeroOrMore { + // Examine the case where there are 0 matches of this sequence + let mut new_ei = ei.clone(); + new_ei.match_cur += seq.num_captures; + new_ei.idx += 1; + for idx in ei.match_cur..ei.match_cur + seq.num_captures { + new_ei.matches[idx].push(Rc::new(MatchedSeq(vec![], sp))); } + cur_eis.push(new_ei); } - TokenTree::Token(sp, SubstNt(..)) => { - return Error(sp, "missing fragment specifier".to_string()) - } - seq @ TokenTree::Delimited(..) | seq @ TokenTree::Token(_, DocComment(..)) => { - let lower_elts = mem::replace(&mut ei.top_elts, Tt(seq)); - let idx = ei.idx; - ei.stack.push(MatcherTtFrame { - elts: lower_elts, - idx: idx, - }); - ei.idx = 0; - cur_eis.push(ei); + + // Examine the case where there is at least one match of this sequence + let matches = create_matches(ei.matches.len()); + cur_eis.push(Box::new(MatcherPos { + stack: vec![], + sep: seq.separator.clone(), + idx: 0, + matches: matches, + match_lo: ei.match_cur, + match_cur: ei.match_cur, + match_hi: ei.match_cur + seq.num_captures, + up: Some(ei), + sp_lo: sp.lo, + top_elts: Tt(TokenTree::Sequence(sp, seq)), + })); + } + TokenTree::Token(_, MatchNt(..)) => { + // Built-in nonterminals never start with these tokens, + // so we can eliminate them from consideration. + match *token { + token::CloseDelim(_) => {}, + _ => bb_eis.push(ei), } - TokenTree::Token(_, ref t) => { - if token_name_eq(t, &parser.token) { - ei.idx += 1; - next_eis.push(ei); - } + } + TokenTree::Token(sp, SubstNt(..)) => { + return Error(sp, "missing fragment specifier".to_string()) + } + seq @ TokenTree::Delimited(..) | seq @ TokenTree::Token(_, DocComment(..)) => { + let lower_elts = mem::replace(&mut ei.top_elts, Tt(seq)); + let idx = ei.idx; + ei.stack.push(MatcherTtFrame { + elts: lower_elts, + idx: idx, + }); + ei.idx = 0; + cur_eis.push(ei); + } + TokenTree::Token(_, ref t) => { + if token_name_eq(t, &token) { + ei.idx += 1; + next_eis.push(ei); } } } } + } + + Success(()) +} + +pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseResult { + let mut parser = Parser::new_with_doc_flag(sess, Box::new(rdr), true); + let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo)); + + loop { + let mut bb_eis = Vec::new(); // black-box parsed by parser.rs + let mut next_eis = Vec::new(); // or proceed normally + + // FIXME: Use SmallVector since in the successful case we will only have one + let mut eof_eis = Vec::new(); + + match inner_parse_loop(&mut cur_eis, &mut next_eis, &mut eof_eis, &mut bb_eis, + &parser.token, &parser.span) { + Success(_) => {}, + Failure(sp, tok) => return Failure(sp, tok), + Error(sp, msg) => return Error(sp, msg), + } + + // inner parse loop handled all cur_eis, so it's empty + assert!(cur_eis.is_empty()); /* error messages here could be improved with links to orig. rules */ if token_name_eq(&parser.token, &token::Eof) { if eof_eis.len() == 1 { - let v = eof_eis[0].matches.iter_mut() - .map(|dv| dv.pop().unwrap()).collect::>(); - return nameize(ms, &v[..]); + return nameize(ms, eof_eis[0].matches.iter_mut().map(|mut dv| dv.pop().unwrap())); } else if eof_eis.len() > 1 { return Error(parser.span, "ambiguity: multiple successful parses".to_string()); } else { @@ -473,7 +480,7 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes } } -pub fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { +fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { match name { "tt" => { p.quote_depth += 1; //but in theory, non-quoted tts might be useful From 6046595e34737d7bec851bfd1352a01e58fe99e9 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Fri, 11 Nov 2016 16:37:08 -0700 Subject: [PATCH 08/10] Use SmallVector for eof and bb eis. --- src/libsyntax/ext/tt/macro_parser.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 3c57f7a05c29d..a072f2ba948c7 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -279,8 +279,8 @@ fn create_matches(len: usize) -> Vec>> { fn inner_parse_loop(cur_eis: &mut SmallVector>, next_eis: &mut Vec>, - eof_eis: &mut Vec>, - bb_eis: &mut Vec>, + eof_eis: &mut SmallVector>, + bb_eis: &mut SmallVector>, token: &Token, span: &syntax_pos::Span) -> ParseResult<()> { while let Some(mut ei) = cur_eis.pop() { // When unzipped trees end, remove them @@ -412,12 +412,10 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo)); loop { - let mut bb_eis = Vec::new(); // black-box parsed by parser.rs + let mut bb_eis = SmallVector::new(); // black-box parsed by parser.rs + let mut eof_eis = SmallVector::new(); let mut next_eis = Vec::new(); // or proceed normally - // FIXME: Use SmallVector since in the successful case we will only have one - let mut eof_eis = Vec::new(); - match inner_parse_loop(&mut cur_eis, &mut next_eis, &mut eof_eis, &mut bb_eis, &parser.token, &parser.span) { Success(_) => {}, From 38912ee3d43868651e8ac6fc3da1153f45ba5cf4 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Sat, 12 Nov 2016 07:41:26 -0700 Subject: [PATCH 09/10] Move next_eis out of main loop to avoid re-allocating and dropping it. --- src/libsyntax/ext/tt/macro_parser.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index a072f2ba948c7..d43718305622c 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -410,11 +410,12 @@ fn inner_parse_loop(cur_eis: &mut SmallVector>, pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseResult { let mut parser = Parser::new_with_doc_flag(sess, Box::new(rdr), true); let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo)); + let mut next_eis = Vec::new(); // or proceed normally loop { let mut bb_eis = SmallVector::new(); // black-box parsed by parser.rs let mut eof_eis = SmallVector::new(); - let mut next_eis = Vec::new(); // or proceed normally + assert!(next_eis.is_empty()); match inner_parse_loop(&mut cur_eis, &mut next_eis, &mut eof_eis, &mut bb_eis, &parser.token, &parser.span) { From 2189f573caf93e389a56aefe0aeaa027feafd281 Mon Sep 17 00:00:00 2001 From: Mark-Simulacrum Date: Sat, 12 Nov 2016 07:41:47 -0700 Subject: [PATCH 10/10] Remove extra level of nesting. --- src/libsyntax/ext/tt/macro_parser.rs | 67 +++++++++++++--------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index d43718305622c..39ffab4dc17a7 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -436,43 +436,40 @@ pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree]) -> NamedParseRes } else { return Failure(parser.span, token::Eof); } - } else { - if (!bb_eis.is_empty() && !next_eis.is_empty()) - || bb_eis.len() > 1 { - let nts = bb_eis.iter().map(|ei| match ei.top_elts.get_tt(ei.idx) { - TokenTree::Token(_, MatchNt(bind, name)) => { - format!("{} ('{}')", name, bind) - } - _ => panic!() - }).collect::>().join(" or "); - - return Error(parser.span, format!( - "local ambiguity: multiple parsing options: {}", - match next_eis.len() { - 0 => format!("built-in NTs {}.", nts), - 1 => format!("built-in NTs {} or 1 other option.", nts), - n => format!("built-in NTs {} or {} other options.", nts, n), - } - )) - } else if bb_eis.is_empty() && next_eis.is_empty() { - return Failure(parser.span, parser.token); - } else if !next_eis.is_empty() { - /* Now process the next token */ - cur_eis.extend(next_eis.drain(..)); - parser.bump(); - } else /* bb_eis.len() == 1 */ { - let mut ei = bb_eis.pop().unwrap(); - if let TokenTree::Token(span, MatchNt(_, ident)) = ei.top_elts.get_tt(ei.idx) { - let match_cur = ei.match_cur; - ei.matches[match_cur].push(Rc::new(MatchedNonterminal( - Rc::new(parse_nt(&mut parser, span, &ident.name.as_str()))))); - ei.idx += 1; - ei.match_cur += 1; - } else { - unreachable!() + } else if (!bb_eis.is_empty() && !next_eis.is_empty()) || bb_eis.len() > 1 { + let nts = bb_eis.iter().map(|ei| match ei.top_elts.get_tt(ei.idx) { + TokenTree::Token(_, MatchNt(bind, name)) => { + format!("{} ('{}')", name, bind) + } + _ => panic!() + }).collect::>().join(" or "); + + return Error(parser.span, format!( + "local ambiguity: multiple parsing options: {}", + match next_eis.len() { + 0 => format!("built-in NTs {}.", nts), + 1 => format!("built-in NTs {} or 1 other option.", nts), + n => format!("built-in NTs {} or {} other options.", nts, n), } - cur_eis.push(ei); + )); + } else if bb_eis.is_empty() && next_eis.is_empty() { + return Failure(parser.span, parser.token); + } else if !next_eis.is_empty() { + /* Now process the next token */ + cur_eis.extend(next_eis.drain(..)); + parser.bump(); + } else /* bb_eis.len() == 1 */ { + let mut ei = bb_eis.pop().unwrap(); + if let TokenTree::Token(span, MatchNt(_, ident)) = ei.top_elts.get_tt(ei.idx) { + let match_cur = ei.match_cur; + ei.matches[match_cur].push(Rc::new(MatchedNonterminal( + Rc::new(parse_nt(&mut parser, span, &ident.name.as_str()))))); + ei.idx += 1; + ei.match_cur += 1; + } else { + unreachable!() } + cur_eis.push(ei); } assert!(!cur_eis.is_empty());