lalrpop-util-0.20.0/.cargo_vcs_info.json0000644000000001520000000000100135350ustar { "git": { "sha1": "1584ddb243726195b540fdd2b3ccf693876288e0" }, "path_in_vcs": "lalrpop-util" }lalrpop-util-0.20.0/Cargo.toml0000644000000017550000000000100115450ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.64" name = "lalrpop-util" version = "0.20.0" authors = ["Niko Matsakis "] description = "Runtime library for parsers generated by LALRPOP" license = "Apache-2.0 OR MIT" repository = "https://github.com/lalrpop/lalrpop" [package.metadata.docs.rs] features = ["lexer"] [dependencies.regex] version = "1.3" features = ["std"] optional = true default_features = false [features] default = ["std"] lexer = [ "regex/std", "std", ] std = [] unicode = ["regex?/unicode"] lalrpop-util-0.20.0/Cargo.toml.orig000064400000000000000000000007411046102023000152200ustar 00000000000000[package] name = "lalrpop-util" description = "Runtime library for parsers generated by LALRPOP" workspace = ".." repository.workspace = true edition.workspace = true authors.workspace = true license.workspace = true version.workspace = true rust-version.workspace = true [dependencies] regex = { workspace = true, optional = true } [features] lexer = ["regex/std", "std"] unicode = ["regex?/unicode"] std = [] default = ["std"] [package.metadata.docs.rs] features = ["lexer"] lalrpop-util-0.20.0/src/lexer.rs000064400000000000000000000076661046102023000146220ustar 00000000000000use std::{fmt, marker::PhantomData}; use crate::ParseError; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Token<'input>(pub usize, pub &'input str); impl<'a> fmt::Display for Token<'a> { fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { fmt::Display::fmt(self.1, formatter) } } struct RegexEntry { regex: regex::Regex, skip: bool, } pub struct MatcherBuilder { regex_set: regex::RegexSet, regex_vec: Vec, } impl MatcherBuilder { pub fn new( exprs: impl IntoIterator, ) -> Result where S: AsRef, { let exprs = exprs.into_iter(); let mut regex_vec = Vec::with_capacity(exprs.size_hint().0); let mut first_error = None; let regex_set_result = regex::RegexSet::new(exprs.scan((), |_, (s, skip)| { regex_vec.push(match regex::Regex::new(s.as_ref()) { Ok(regex) => RegexEntry { regex, skip }, Err(err) => { first_error = Some(err); return None; } }); Some(s) })); if let Some(err) = first_error { return Err(err); } let regex_set = regex_set_result?; Ok(MatcherBuilder { regex_set, regex_vec, }) } pub fn matcher<'input, 'builder, E>( &'builder self, s: &'input str, ) -> Matcher<'input, 'builder, E> { Matcher { text: s, consumed: 0, regex_set: &self.regex_set, regex_vec: &self.regex_vec, _marker: PhantomData, } } } pub struct Matcher<'input, 'builder, E> { text: &'input str, consumed: usize, regex_set: &'builder regex::RegexSet, regex_vec: &'builder Vec, _marker: PhantomData E>, } impl<'input, 'builder, E> Iterator for Matcher<'input, 'builder, E> { type Item = Result<(usize, Token<'input>, usize), ParseError, E>>; fn next(&mut self) -> Option { loop { let text = self.text; let start_offset = self.consumed; if text.is_empty() { self.consumed = start_offset; return None; } else { let matches = self.regex_set.matches(text); if !matches.matched_any() { return Some(Err(ParseError::InvalidToken { location: start_offset, })); } else { let mut longest_match = 0; let mut index = 0; let mut skip = false; for i in matches.iter() { let entry = &self.regex_vec[i]; let match_ = entry.regex.find(text).unwrap(); let len = match_.end(); if len >= longest_match { longest_match = len; index = i; skip = entry.skip; } } let result = &text[..longest_match]; let remaining = &text[longest_match..]; let end_offset = start_offset + longest_match; self.text = remaining; self.consumed = end_offset; // Skip any whitespace matches if skip { if longest_match == 0 { return Some(Err(ParseError::InvalidToken { location: start_offset, })); } continue; } return Some(Ok((start_offset, Token(index, result), end_offset))); } } } } } lalrpop-util-0.20.0/src/lib.rs000064400000000000000000000151241046102023000142350ustar 00000000000000#![cfg_attr(not(feature = "std"), no_std)] extern crate alloc; use alloc::{string::String, vec::Vec}; use core::fmt; #[cfg(feature = "std")] use std::error::Error; #[cfg(feature = "lexer")] pub mod lexer; pub mod state_machine; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ParseError { /// Generated by the parser when it encounters a token (or EOF) it did not /// expect. InvalidToken { location: L }, /// Generated by the parser when it encounters an EOF it did not expect. UnrecognizedEof { /// The end of the final token location: L, /// The set of expected tokens: these names are taken from the /// grammar and hence may not necessarily be suitable for /// presenting to the user. expected: Vec, }, /// Generated by the parser when it encounters a token it did not expect. UnrecognizedToken { /// The unexpected token of type `T` with a span given by the two `L` values. token: (L, T, L), /// The set of expected tokens: these names are taken from the /// grammar and hence may not necessarily be suitable for /// presenting to the user. expected: Vec, }, /// Generated by the parser when it encounters additional, unexpected tokens. ExtraToken { token: (L, T, L) }, /// Custom error type. User { error: E }, } impl ParseError { fn map_intern( self, mut loc_op: impl FnMut(L) -> LL, tok_op: impl FnOnce(T) -> TT, err_op: impl FnOnce(E) -> EE, ) -> ParseError { let maptok = |(s, t, e): (L, T, L)| (loc_op(s), tok_op(t), loc_op(e)); match self { ParseError::InvalidToken { location } => ParseError::InvalidToken { location: loc_op(location), }, ParseError::UnrecognizedEof { location, expected } => ParseError::UnrecognizedEof { location: loc_op(location), expected, }, ParseError::UnrecognizedToken { token, expected } => ParseError::UnrecognizedToken { token: maptok(token), expected, }, ParseError::ExtraToken { token } => ParseError::ExtraToken { token: maptok(token), }, ParseError::User { error } => ParseError::User { error: err_op(error), }, } } pub fn map_location(self, op: impl FnMut(L) -> LL) -> ParseError { self.map_intern(op, |x| x, |x| x) } pub fn map_token(self, op: impl FnOnce(T) -> TT) -> ParseError { self.map_intern(|x| x, op, |x| x) } pub fn map_error(self, op: impl FnOnce(E) -> EE) -> ParseError { self.map_intern(|x| x, |x| x, op) } } /// Format a list of expected tokens. fn fmt_expected(f: &mut fmt::Formatter<'_>, expected: &[String]) -> fmt::Result { if !expected.is_empty() { writeln!(f)?; for (i, e) in expected.iter().enumerate() { let sep = match i { 0 => "Expected one of", _ if i < expected.len() - 1 => ",", // Last expected message to be written _ => " or", }; write!(f, "{} {}", sep, e)?; } } Ok(()) } impl fmt::Display for ParseError where L: fmt::Display, T: fmt::Display, E: fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use self::ParseError::*; match *self { User { ref error } => write!(f, "{}", error), InvalidToken { ref location } => write!(f, "Invalid token at {}", location), UnrecognizedEof { ref location, ref expected, } => { write!(f, "Unrecognized EOF found at {}", location)?; fmt_expected(f, expected) } UnrecognizedToken { token: (ref start, ref token, ref end), ref expected, } => { write!( f, "Unrecognized token `{}` found at {}:{}", token, start, end )?; fmt_expected(f, expected) } ExtraToken { token: (ref start, ref token, ref end), } => write!(f, "Extra token {} found at {}:{}", token, start, end), } } } impl From for ParseError { fn from(error: E) -> Self { ParseError::User { error } } } #[cfg(feature = "std")] impl Error for ParseError where L: fmt::Debug + fmt::Display, T: fmt::Debug + fmt::Display, E: fmt::Debug + fmt::Display, { fn description(&self) -> &str { "parse error" } } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct ErrorRecovery { pub error: ParseError, pub dropped_tokens: Vec<(L, T, L)>, } /// Define a module using the generated parse from a `.lalrpop` file. /// /// You have to specify the name of the module and the path of the file /// generated by LALRPOP. If the input is in the root directory, you can /// omit it. /// /// # Example /// ```ignore /// // load parser in src/parser.lalrpop /// lalrpop_mod!(parser); /// /// // load parser in src/lex/parser.lalrpop /// lalrpop_mod!(parser, "/lex/parser.rs"); /// /// // define a public module /// lalrpop_mod!(pub parser); /// ``` #[macro_export] macro_rules! lalrpop_mod { ($(#[$attr:meta])* $vis:vis $modname:ident) => { lalrpop_mod!($(#[$attr])* $vis $modname, concat!("/", stringify!($modname), ".rs")); }; ($(#[$attr:meta])* $vis:vis $modname:ident, $source:expr) => { #[rustfmt::skip] #[allow(clippy::extra_unused_lifetimes)] #[allow(clippy::needless_lifetimes)] #[allow(clippy::let_unit_value)] #[allow(clippy::just_underscores_and_digits)] $(#[$attr])* $vis mod $modname { include!(concat!(env!("OUT_DIR"), $source)); } }; } #[cfg(test)] mod tests { use super::*; use alloc::{format, string::ToString, vec}; #[test] fn test() { let err = ParseError::UnrecognizedToken:: { token: (1, "t0", 2), expected: vec!["t1", "t2", "t3"] .into_iter() .map(|s| s.to_string()) .collect(), }; assert_eq!( format!("{}", err), "Unrecognized token `t0` found at 1:2\n\ Expected one of t1, t2 or t3" ); } } lalrpop-util-0.20.0/src/state_machine.rs000064400000000000000000000620401046102023000162720ustar 00000000000000use alloc::{string::String, vec, vec::Vec}; use core::fmt::Debug; #[cfg(feature = "std")] const DEBUG_ENABLED: bool = false; macro_rules! debug { ($($args:expr),* $(,)*) => { #[cfg(feature = "std")] if DEBUG_ENABLED { eprintln!($($args),*); } } } pub trait ParserDefinition: Sized { /// Represents a location in the input text. If you are using the /// default tokenizer, this will be a `usize`. type Location: Clone + Debug; /// Represents a "user error" -- this can get produced by /// `reduce()` if the grammar includes `=>?` actions. type Error; /// The type emitted by the user's tokenizer (excluding the /// location information). type Token: Clone + Debug; /// We assign a unique index to each token in the grammar, which /// we call its *index*. When we pull in a new `Token` from the /// input, we then match against it to determine its index. Note /// that the actual `Token` is retained too, as it may carry /// additional information (e.g., an `ID` terminal often has a /// string value associated with it; this is not important to the /// parser, but the semantic analyzer will want it). type TokenIndex: Copy + Clone + Debug; /// The type representing things on the LALRPOP stack. Represents /// the union of terminals and nonterminals. type Symbol; /// Type produced by reducing the start symbol. type Success; /// Identifies a state. Typically an i8, i16, or i32 (depending on /// how many states you have). type StateIndex: Copy + Clone + Debug; /// Identifies an action. type Action: ParserAction; /// Identifies a reduction. type ReduceIndex: Copy + Clone + Debug; /// Identifies a nonterminal. type NonterminalIndex: Copy + Clone + Debug; /// Returns a location representing the "start of the input". fn start_location(&self) -> Self::Location; /// Returns the initial state. fn start_state(&self) -> Self::StateIndex; /// Converts the user's tokens into an internal index; this index /// is then used to index into actions and the like. When using an /// internal tokenizer, these indices are directly produced. When /// using an **external** tokenier, however, this function matches /// against the patterns given by the user: it is fallible /// therefore as these patterns may not be exhaustive. If a token /// value is found that doesn't match any of the patterns the user /// supplied, then this function returns `None`, which is /// translated into a parse error by LALRPOP ("unrecognized /// token"). fn token_to_index(&self, token: &Self::Token) -> Option; /// Given the top-most state and the pending terminal, returns an /// action. This can be either SHIFT(state), REDUCE(action), or /// ERROR. fn action(&self, state: Self::StateIndex, token_index: Self::TokenIndex) -> Self::Action; /// Returns the action to take if an error occurs in the given /// state. This function is the same as the ordinary `action`, /// except that it applies not to the user's terminals but to the /// "special terminal" `!`. fn error_action(&self, state: Self::StateIndex) -> Self::Action; /// Action to take if EOF occurs in the given state. This function /// is the same as the ordinary `action`, except that it applies /// not to the user's terminals but to the "special terminal" `$`. fn eof_action(&self, state: Self::StateIndex) -> Self::Action; /// If we reduce to a nonterminal in the given state, what state /// do we go to? This is infallible due to the nature of LR(1) /// grammars. fn goto(&self, state: Self::StateIndex, nt: Self::NonterminalIndex) -> Self::StateIndex; /// "Upcast" a terminal into a symbol so we can push it onto the /// parser stack. fn token_to_symbol(&self, token_index: Self::TokenIndex, token: Self::Token) -> Self::Symbol; /// Returns the expected tokens in a given state. This is used for /// error reporting. fn expected_tokens(&self, state: Self::StateIndex) -> Vec; /// Returns the expected tokens in a given state. This is used in the /// same way as `expected_tokens` but allows more precise reporting /// of accepted tokens in some cases. fn expected_tokens_from_states(&self, states: &[Self::StateIndex]) -> Vec { // Default to using the preexisting `expected_tokens` method self.expected_tokens(*states.last().unwrap()) } /// True if this grammar supports error recovery. fn uses_error_recovery(&self) -> bool; /// Given error information, creates an error recovery symbol that /// we push onto the stack (and supply to user actions). fn error_recovery_symbol(&self, recovery: ErrorRecovery) -> Self::Symbol; /// Execute a reduction in the given state: that is, execute user /// code. The start location indicates the "starting point" of the /// current lookahead that is triggering the reduction (it is /// `None` for EOF). /// /// The `states` and `symbols` vectors represent the internal /// state machine vectors; they are given to `reduce` so that it /// can pop off states that no longer apply (and consume their /// symbols). At the end, it should also push the new state and /// symbol produced. /// /// Returns a `Some` if we reduced the start state and hence /// parsing is complete, or if we encountered an irrecoverable /// error. /// /// FIXME. It would be nice to not have so much logic live in /// reduce. It should just be given an iterator of popped symbols /// and return the newly produced symbol (or error). We can use /// `simulate_reduce` and our own information to drive the rest, /// right? This would also allow us -- I think -- to extend error /// recovery to cover user-produced errors. fn reduce( &mut self, reduce_index: Self::ReduceIndex, start_location: Option<&Self::Location>, states: &mut Vec, symbols: &mut Vec>, ) -> Option>; /// Returns information about how many states will be popped /// during a reduction, and what nonterminal would be produced as /// a result. fn simulate_reduce(&self, action: Self::ReduceIndex) -> SimulatedReduce; } pub trait ParserAction: Copy + Clone + Debug { fn as_shift(self) -> Option; fn as_reduce(self) -> Option; fn is_shift(self) -> bool; fn is_reduce(self) -> bool; fn is_error(self) -> bool; } pub enum SimulatedReduce { Reduce { states_to_pop: usize, nonterminal_produced: D::NonterminalIndex, }, // This reduce is the "start" fn, so the parse is done. Accept, } // These aliases are an elaborate hack to get around // the warnings when you define a type alias like `type Foo` #[doc(hidden)] pub type Location = ::Location; #[doc(hidden)] pub type Token = ::Token; #[doc(hidden)] pub type Error = ::Error; #[doc(hidden)] pub type Success = ::Success; #[doc(hidden)] pub type Symbol = ::Symbol; pub type ParseError = crate::ParseError, Token, Error>; pub type ParseResult = Result, ParseError>; pub type TokenTriple = (Location, Token, Location); pub type SymbolTriple = (Location, Symbol, Location); pub type ErrorRecovery = crate::ErrorRecovery, Token, Error>; pub struct Parser where D: ParserDefinition, I: Iterator, ParseError>>, { definition: D, tokens: I, states: Vec, symbols: Vec>, last_location: D::Location, } enum NextToken { FoundToken(TokenTriple, D::TokenIndex), Eof, Done(ParseResult), } impl Parser where D: ParserDefinition, I: Iterator, ParseError>>, { pub fn drive(definition: D, tokens: I) -> ParseResult { let last_location = definition.start_location(); let start_state = definition.start_state(); Parser { definition, tokens, states: vec![start_state], symbols: vec![], last_location, } .parse() } fn top_state(&self) -> D::StateIndex { *self.states.last().unwrap() } fn parse(&mut self) -> ParseResult { // Outer loop: each time we continue around this loop, we // shift a new token from the input. We break from the loop // when the end of the input is reached (we return early if an // error occurs). 'shift: loop { let (mut lookahead, mut token_index) = match self.next_token() { NextToken::FoundToken(l, i) => (l, i), NextToken::Eof => return self.parse_eof(), NextToken::Done(e) => return e, }; debug!("+ SHIFT: {:?}", lookahead); debug!("\\ token_index: {:?}", token_index); 'inner: loop { let top_state = self.top_state(); let action = self.definition.action(top_state, token_index); debug!("\\ action: {:?}", action); if let Some(target_state) = action.as_shift() { debug!("\\ shift to: {:?}", target_state); // Shift and transition to state `action - 1` let symbol = self.definition.token_to_symbol(token_index, lookahead.1); self.states.push(target_state); self.symbols.push((lookahead.0, symbol, lookahead.2)); continue 'shift; } else if let Some(reduce_index) = action.as_reduce() { debug!("\\ reduce to: {:?}", reduce_index); if let Some(r) = self.reduce(reduce_index, Some(&lookahead.0)) { return match r { // we reached eof, but still have lookahead Ok(_) => Err(crate::ParseError::ExtraToken { token: lookahead }), Err(e) => Err(e), }; } } else { debug!("\\ error -- initiating error recovery!"); match self.error_recovery(Some(lookahead), Some(token_index)) { NextToken::FoundToken(l, i) => { lookahead = l; token_index = i; continue 'inner; } NextToken::Eof => return self.parse_eof(), NextToken::Done(e) => return e, } } } } } /// Invoked when we have no more tokens to consume. fn parse_eof(&mut self) -> ParseResult { loop { let top_state = self.top_state(); let action = self.definition.eof_action(top_state); if let Some(reduce_index) = action.as_reduce() { if let Some(result) = self.definition .reduce(reduce_index, None, &mut self.states, &mut self.symbols) { return result; } } else { match self.error_recovery(None, None) { NextToken::FoundToken(..) => panic!("cannot find token at EOF"), NextToken::Done(e) => return e, NextToken::Eof => continue, } } } } fn error_recovery( &mut self, mut opt_lookahead: Option>, mut opt_token_index: Option, ) -> NextToken { debug!( "\\+ error_recovery(opt_lookahead={:?}, opt_token_index={:?})", opt_lookahead, opt_token_index, ); if !self.definition.uses_error_recovery() { debug!("\\ error -- no error recovery!"); return NextToken::Done(Err( self.unrecognized_token_error(opt_lookahead, &self.states) )); } let error = self.unrecognized_token_error(opt_lookahead.clone(), &self.states); let mut dropped_tokens = vec![]; // We are going to insert ERROR into the lookahead. So, first, // perform all reductions from current state triggered by having // ERROR in the lookahead. loop { let state = self.top_state(); let action = self.definition.error_action(state); if let Some(reduce_index) = action.as_reduce() { debug!("\\\\ reducing: {:?}", reduce_index); if let Some(result) = self.reduce(reduce_index, opt_lookahead.as_ref().map(|l| &l.0)) { debug!("\\\\ reduced to a result"); return NextToken::Done(result); } } else { break; } } // Now try to find the recovery state. let states_len = self.states.len(); let top = 'find_state: loop { // Go backwards through the states... debug!( "\\\\+ error_recovery: find_state loop, {:?} states = {:?}", self.states.len(), self.states, ); for top in (0..states_len).rev() { let state = self.states[top]; debug!("\\\\\\ top = {:?}, state = {:?}", top, state); // ...fetch action for error token... let action = self.definition.error_action(state); debug!("\\\\\\ action = {:?}", action); if let Some(error_state) = action.as_shift() { // If action is a shift that takes us into `error_state`, // and `error_state` can accept this lookahead, we are done. if self.accepts(error_state, &self.states[..=top], opt_token_index) { debug!("\\\\\\ accepted!"); break 'find_state top; } } else { // ...else, if action is error or reduce, go to next state. continue; } } // Otherwise, if we couldn't find a state that would -- // after shifting the error token -- accept the lookahead, // then drop the lookahead and advance to next token in // the input. match opt_lookahead.take() { // If the lookahead is EOF, we can't drop any more // tokens, abort error recovery and just report the // original error (it might be nice if we would // propagate back the dropped tokens, though). None => { debug!("\\\\\\ no more lookahead, report error"); return NextToken::Done(Err(error)); } // Else, drop the current token and shift to the // next. If there is a next token, we will `continue` // to the start of the `'find_state` loop. Some(lookahead) => { debug!("\\\\\\ dropping lookahead token"); dropped_tokens.push(lookahead); match self.next_token() { NextToken::FoundToken(next_lookahead, next_token_index) => { opt_lookahead = Some(next_lookahead); opt_token_index = Some(next_token_index); } NextToken::Eof => { debug!("\\\\\\ reached EOF"); opt_lookahead = None; opt_token_index = None; } NextToken::Done(e) => { debug!("\\\\\\ no more tokens"); return NextToken::Done(e); } } } } }; // If we get here, we are ready to push the error recovery state. // We have to compute the span for the error recovery // token. We do this first, before we pop any symbols off the // stack. There are several possibilities, in order of // preference. // // For the **start** of the message, we prefer to use the start of any // popped states. This represents parts of the input we had consumed but // had to roll back and ignore. // // Example: // // a + (b + /) // ^ start point is here, since this `+` will be popped off // // If there are no popped states, but there *are* dropped tokens, we can use // the start of those. // // Example: // // a + (b + c e) // ^ start point would be here // // Finally, if there are no popped states *nor* dropped tokens, we can use // the end of the top-most state. let start = if let Some(popped_sym) = self.symbols.get(top) { popped_sym.0.clone() } else if let Some(dropped_token) = dropped_tokens.first() { dropped_token.0.clone() } else if top > 0 { self.symbols[top - 1].2.clone() } else { self.definition.start_location() }; // For the end span, here are the possibilities: // // We prefer to use the end of the last dropped token. // // Examples: // // a + (b + /) // --- // a + (b c) // - // // But, if there are no dropped tokens, we will use the end of the popped states, // if any: // // a + / // - // // If there are neither dropped tokens *or* popped states, // then the user is simulating insertion of an operator. In // this case, we prefer the start of the lookahead, but // fallback to the start if we are at EOF. // // Examples: // // a + (b c) // - let end = if let Some(dropped_token) = dropped_tokens.last() { dropped_token.2.clone() } else if states_len - 1 > top { self.symbols.last().unwrap().2.clone() } else if let Some(lookahead) = opt_lookahead.as_ref() { lookahead.0.clone() } else { start.clone() }; self.states.truncate(top + 1); self.symbols.truncate(top); let recover_state = self.states[top]; let error_action = self.definition.error_action(recover_state); let error_state = error_action.as_shift().unwrap(); self.states.push(error_state); let recovery = self.definition.error_recovery_symbol(crate::ErrorRecovery { error, dropped_tokens, }); self.symbols.push((start, recovery, end)); match (opt_lookahead, opt_token_index) { (Some(l), Some(i)) => NextToken::FoundToken(l, i), (None, None) => NextToken::Eof, (l, i) => panic!("lookahead and token_index mismatched: {:?}, {:?}", l, i), } } /// The `accepts` function has the job of figuring out whether the /// given error state would "accept" the given lookahead. We /// basically trace through the LR automaton looking for one of /// two outcomes: /// /// - the lookahead is eventually shifted /// - we reduce to the end state successfully (in the case of EOF). /// /// If we used the pure LR(1) algorithm, we wouldn't need this /// function, because we would be guaranteed to error immediately /// (and not after some number of reductions). But with an LALR /// (or Lane Table) generated automaton, it is possible to reduce /// some number of times before encountering an error. Failing to /// take this into account can lead error recovery into an /// infinite loop (see the `error_recovery_lalr_loop` test) or /// produce crappy results (see `error_recovery_lock_in`). fn accepts( &self, error_state: D::StateIndex, states: &[D::StateIndex], opt_token_index: Option, ) -> bool { debug!( "\\\\\\+ accepts(error_state={:?}, states={:?}, opt_token_index={:?})", error_state, states, opt_token_index, ); let mut states = states.to_vec(); states.push(error_state); loop { let mut states_len = states.len(); let top = states[states_len - 1]; let action = match opt_token_index { None => self.definition.eof_action(top), Some(i) => self.definition.action(top, i), }; // If we encounter an error action, we do **not** accept. if action.is_error() { debug!("\\\\\\\\ accepts: error"); return false; } // If we encounter a reduce action, we need to simulate its // effect on the state stack. if let Some(reduce_action) = action.as_reduce() { match self.definition.simulate_reduce(reduce_action) { SimulatedReduce::Reduce { states_to_pop, nonterminal_produced, } => { states_len -= states_to_pop; states.truncate(states_len); let top = states[states_len - 1]; let next_state = self.definition.goto(top, nonterminal_produced); states.push(next_state); } SimulatedReduce::Accept => { debug!("\\\\\\\\ accepts: reduce accepts!"); return true; } } } else { // If we encounter a shift action, we DO accept. debug!("\\\\\\\\ accepts: shift accepts!"); assert!(action.is_shift()); return true; } } } fn reduce( &mut self, action: D::ReduceIndex, lookahead_start: Option<&D::Location>, ) -> Option> { self.definition .reduce(action, lookahead_start, &mut self.states, &mut self.symbols) } fn unrecognized_token_error( &self, token: Option>, states: &[D::StateIndex], ) -> ParseError { match token { Some(token) => crate::ParseError::UnrecognizedToken { token, expected: self.definition.expected_tokens_from_states(states), }, None => crate::ParseError::UnrecognizedEof { location: self.last_location.clone(), expected: self.definition.expected_tokens_from_states(states), }, } } /// Consume the next token from the input and classify it into a /// token index. Classification can fail with an error. If there /// are no more tokens, signal EOF. fn next_token(&mut self) -> NextToken { let token = match self.tokens.next() { Some(Ok(v)) => v, Some(Err(e)) => return NextToken::Done(Err(e)), None => return NextToken::Eof, }; self.last_location = token.2.clone(); let token_index = match self.definition.token_to_index(&token.1) { Some(i) => i, None => { return NextToken::Done(Err( self.unrecognized_token_error(Some(token), &self.states) )) } }; NextToken::FoundToken(token, token_index) } } /// In LALRPOP generated rules, we actually use `i32`, `i16`, or `i8` /// to represent all of the various indices (we use the smallest one /// that will fit). So implement `ParserAction` for each of those. macro_rules! integral_indices { ($t:ty) => { impl> ParserAction for $t { fn as_shift(self) -> Option { if self > 0 { Some(self - 1) } else { None } } fn as_reduce(self) -> Option { if self < 0 { Some(-(self + 1)) } else { None } } fn is_shift(self) -> bool { self > 0 } fn is_reduce(self) -> bool { self < 0 } fn is_error(self) -> bool { self == 0 } } }; } integral_indices!(i32); integral_indices!(i16); integral_indices!(i8);