ungrammar-1.16.1/.cargo_vcs_info.json0000644000000001360000000000100131130ustar { "git": { "sha1": "814f04797bbd67f224cbb1b48c7541fdd8bc3d79" }, "path_in_vcs": "" }ungrammar-1.16.1/.gitignore000064400000000000000000000000300072674642500137140ustar 00000000000000/ci /Cargo.lock /target ungrammar-1.16.1/Cargo.toml0000644000000013260000000000100111130ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "ungrammar" version = "1.16.1" exclude = ["/bors.toml", "/.github"] description = "A DSL for describing concrete syntax trees" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" [dependencies] ungrammar-1.16.1/Cargo.toml.orig000064400000000000000000000004650072674642500146270ustar 00000000000000[package] name = "ungrammar" description = "A DSL for describing concrete syntax trees" version = "1.16.1" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/ungrammar" edition = "2018" exclude = ["/bors.toml", "/.github"] [workspace] members = ["ungrammar2json"] [dependencies] # nope ungrammar-1.16.1/README.md000064400000000000000000000010140072674642500132060ustar 00000000000000# ungrammar A DSL for specifying concrete syntax trees. See the [blog post][post] for an introduction. See [./rust.ungram](./rust.ungram) for an example. ## Editor support - Vim - [vim-ungrammar][] - [ungrammar.vim][] - VSCode - [ungrammar-tools][] [post]: https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html [vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar [ungrammar.vim]: https://github.com/drtychai/ungrammar.vim [ungrammar-tools]: https://github.com/azdavis/ungrammar-tools ungrammar-1.16.1/rust.ungram000064400000000000000000000235040072674642500141470ustar 00000000000000// Rust Un-Grammar. // // This grammar specifies the structure of Rust's concrete syntax tree. // It does not specify parsing rules (ambiguities, precedence, etc are out of scope). // Tokens are processed -- contextual keywords are recognised, compound operators glued. // // Legend: // // // -- comment // Name = -- non-terminal definition // 'ident' -- token (terminal) // A B -- sequence // A | B -- alternation // A* -- zero or more repetition // A? -- zero or one repetition // (A) -- same as A // label:A -- suggested name for field of AST node //*************************// // Names, Paths and Macros // //*************************// Name = 'ident' | 'self' NameRef = 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' Lifetime = 'lifetime_ident' Path = (qualifier:Path '::')? segment:PathSegment PathSegment = '::'? NameRef | NameRef GenericArgList? | NameRef ParamList RetType? | '<' PathType ('as' PathType)? '>' GenericArgList = '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' GenericArg = TypeArg | AssocTypeArg | LifetimeArg | ConstArg TypeArg = Type AssocTypeArg = NameRef GenericParamList? (':' TypeBoundList | '=' Type) LifetimeArg = Lifetime ConstArg = Expr MacroCall = Attr* Path '!' TokenTree ';'? TokenTree = '(' ')' | '{' '}' | '[' ']' MacroItems = Item* MacroStmts = statements:Stmt* Expr? //*************************// // Items // //*************************// SourceFile = 'shebang'? Attr* Item* Item = Const | Enum | ExternBlock | ExternCrate | Fn | Impl | MacroCall | MacroRules | MacroDef | Module | Static | Struct | Trait | TypeAlias | Union | Use MacroRules = Attr* Visibility? 'macro_rules' '!' Name TokenTree MacroDef = Attr* Visibility? 'macro' Name args:TokenTree? body:TokenTree Module = Attr* Visibility? 'mod' Name (ItemList | ';') ItemList = '{' Attr* Item* '}' ExternCrate = Attr* Visibility? 'extern' 'crate' NameRef Rename? ';' Rename = 'as' (Name | '_') Use = Attr* Visibility? 'use' UseTree ';' UseTree = (Path? '::')? ('*' | UseTreeList) | Path Rename? UseTreeList = '{' (UseTree (',' UseTree)* ','?)? '}' Fn = Attr* Visibility? 'default'? 'const'? 'async'? 'unsafe'? Abi? 'fn' Name GenericParamList? ParamList RetType? WhereClause? (body:BlockExpr | ';') Abi = 'extern' 'string'? ParamList = '('( SelfParam | (SelfParam ',')? (Param (',' Param)* ','?)? )')' | '|' (Param (',' Param)* ','?)? '|' SelfParam = Attr* ( ('&' Lifetime?)? 'mut'? Name | 'mut'? Name ':' Type ) Param = Attr* ( Pat (':' Type)? | Type | '...' ) RetType = '->' Type TypeAlias = Attr* Visibility? 'default'? 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? ('=' Type)? ';' Struct = Attr* Visibility? 'struct' Name GenericParamList? ( WhereClause? (RecordFieldList | ';') | TupleFieldList WhereClause? ';' ) RecordFieldList = '{' fields:(RecordField (',' RecordField)* ','?)? '}' RecordField = Attr* Visibility? Name ':' Type TupleFieldList = '(' fields:(TupleField (',' TupleField)* ','?)? ')' TupleField = Attr* Visibility? Type FieldList = RecordFieldList | TupleFieldList Enum = Attr* Visibility? 'enum' Name GenericParamList? WhereClause? VariantList VariantList = '{' (Variant (',' Variant)* ','?)? '}' Variant = Attr* Visibility? Name FieldList? ('=' Expr)? Union = Attr* Visibility? 'union' Name GenericParamList? WhereClause? RecordFieldList // A Data Type. // // Not used directly in the grammar, but handy to have anyway. Adt = Enum | Struct | Union Const = Attr* Visibility? 'default'? 'const' (Name | '_') ':' Type ('=' body:Expr)? ';' Static = Attr* Visibility? 'static' 'mut'? Name ':' Type ('=' body:Expr)? ';' Trait = Attr* Visibility? 'unsafe'? 'auto'? 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? AssocItemList AssocItemList = '{' Attr* AssocItem* '}' AssocItem = Const | Fn | MacroCall | TypeAlias Impl = Attr* Visibility? 'default'? 'unsafe'? 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? AssocItemList ExternBlock = Attr* 'unsafe'? Abi ExternItemList ExternItemList = '{' Attr* ExternItem* '}' ExternItem = Fn | MacroCall | Static | TypeAlias GenericParamList = '<' (GenericParam (',' GenericParam)* ','?)? '>' GenericParam = ConstParam | LifetimeParam | TypeParam TypeParam = Attr* Name (':' TypeBoundList?)? ('=' default_type:Type)? ConstParam = Attr* 'const' Name ':' Type ('=' default_val:Expr)? LifetimeParam = Attr* Lifetime (':' TypeBoundList?)? WhereClause = 'where' predicates:(WherePred (',' WherePred)* ','?) WherePred = ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? Visibility = 'pub' ('(' 'in'? Path ')')? Attr = '#' '!'? '[' Meta ']' Meta = Path ('=' Expr | TokenTree)? //****************************// // Statements and Expressions // //****************************// Stmt = ';' | ExprStmt | Item | LetStmt LetStmt = Attr* 'let' Pat (':' Type)? '=' initializer:Expr LetElse? ';' LetElse = 'else' BlockExpr ExprStmt = Expr ';'? Expr = ArrayExpr | AwaitExpr | BinExpr | BlockExpr | BoxExpr | BreakExpr | CallExpr | CastExpr | ClosureExpr | ContinueExpr | FieldExpr | ForExpr | IfExpr | IndexExpr | Literal | LoopExpr | MacroCall | MacroStmts | MatchExpr | MethodCallExpr | ParenExpr | PathExpr | PrefixExpr | RangeExpr | RecordExpr | RefExpr | ReturnExpr | TryExpr | TupleExpr | WhileExpr | YieldExpr | LetExpr | UnderscoreExpr Literal = Attr* value:( 'int_number' | 'float_number' | 'string' | 'raw_string' | 'byte_string' | 'raw_byte_string' | 'true' | 'false' | 'char' | 'byte' ) PathExpr = Attr* Path StmtList = '{' Attr* statements:Stmt* tail_expr:Expr? '}' RefExpr = Attr* '&' ('raw' | 'mut' | 'const') Expr TryExpr = Attr* Expr '?' BlockExpr = Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList PrefixExpr = Attr* op:('-' | '!' | '*') Expr BinExpr = Attr* lhs:Expr op:( '||' | '&&' | '==' | '!=' | '<=' | '>=' | '<' | '>' | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' ) rhs:Expr CastExpr = Attr* Expr 'as' Type ParenExpr = Attr* '(' Attr* Expr ')' ArrayExpr = Attr* '[' Attr* ( (Expr (',' Expr)* ','?)? | Expr ';' Expr ) ']' IndexExpr = Attr* base:Expr '[' index:Expr ']' TupleExpr = Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')' RecordExpr = Path RecordExprFieldList RecordExprFieldList = '{' Attr* fields:(RecordExprField (',' RecordExprField)* ','?)? ('..' spread:Expr?)? '}' RecordExprField = Attr* (NameRef ':')? Expr CallExpr = Attr* Expr ArgList ArgList = '(' args:(Expr (',' Expr)* ','?)? ')' MethodCallExpr = Attr* receiver:Expr '.' NameRef GenericArgList? ArgList FieldExpr = Attr* Expr '.' NameRef ClosureExpr = Attr* 'static'? 'async'? 'move'? ParamList RetType? body:Expr IfExpr = Attr* 'if' condition:Expr then_branch:BlockExpr ('else' else_branch:(IfExpr | BlockExpr))? LoopExpr = Attr* Label? 'loop' loop_body:BlockExpr ForExpr = Attr* Label? 'for' Pat 'in' iterable:Expr loop_body:BlockExpr WhileExpr = Attr* Label? 'while' condition:Expr loop_body:BlockExpr Label = Lifetime ':' BreakExpr = Attr* 'break' Lifetime? Expr? ContinueExpr = Attr* 'continue' Lifetime? RangeExpr = Attr* start:Expr? op:('..' | '..=') end:Expr? MatchExpr = Attr* 'match' Expr MatchArmList MatchArmList = '{' Attr* arms:MatchArm* '}' MatchArm = Attr* Pat guard:MatchGuard? '=>' Expr ','? MatchGuard = 'if' condition:Expr ReturnExpr = Attr* 'return' Expr? YieldExpr = Attr* 'yield' Expr? LetExpr = Attr* 'let' Pat '=' Expr UnderscoreExpr = Attr* '_' AwaitExpr = Attr* Expr '.' 'await' BoxExpr = Attr* 'box' Expr //*************************// // Types // //*************************// Type = ArrayType | DynTraitType | FnPtrType | ForType | ImplTraitType | InferType | MacroType | NeverType | ParenType | PathType | PtrType | RefType | SliceType | TupleType ParenType = '(' Type ')' NeverType = '!' MacroType = MacroCall PathType = Path TupleType = '(' fields:(Type (',' Type)* ','?)? ')' PtrType = '*' ('const' | 'mut') Type RefType = '&' Lifetime? 'mut'? Type ArrayType = '[' Type ';' Expr ']' SliceType = '[' Type ']' InferType = '_' FnPtrType = 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? ForType = 'for' GenericParamList Type ImplTraitType = 'impl' TypeBoundList DynTraitType = 'dyn' TypeBoundList TypeBoundList = bounds:(TypeBound ('+' TypeBound)* '+'?) TypeBound = Lifetime | ('?' | '~' 'const')? Type //************************// // Patterns // //************************// Pat = IdentPat | BoxPat | RestPat | LiteralPat | MacroPat | OrPat | ParenPat | PathPat | WildcardPat | RangePat | RecordPat | RefPat | SlicePat | TuplePat | TupleStructPat | ConstBlockPat LiteralPat = Literal IdentPat = Attr* 'ref'? 'mut'? Name ('@' Pat)? WildcardPat = '_' RangePat = // 1.. start:Pat op:('..' | '..=') // 1..2 | start:Pat op:('..' | '..=') end:Pat // ..2 | op:('..' | '..=') end:Pat RefPat = '&' 'mut'? Pat RecordPat = Path RecordPatFieldList RecordPatFieldList = '{' fields:(RecordPatField (',' RecordPatField)* ','?)? RestPat? '}' RecordPatField = Attr* (NameRef ':')? Pat TupleStructPat = Path '(' fields:(Pat (',' Pat)* ','?)? ')' TuplePat = '(' fields:(Pat (',' Pat)* ','?)? ')' ParenPat = '(' Pat ')' SlicePat = '[' (Pat (',' Pat)* ','?)? ']' PathPat = Path OrPat = (Pat ('|' Pat)* '|'?) BoxPat = 'box' Pat RestPat = Attr* '..' MacroPat = MacroCall ConstBlockPat = 'const' BlockExpr ungrammar-1.16.1/src/error.rs000064400000000000000000000023220072674642500142200ustar 00000000000000//! Boilerplate error definitions. use std::fmt; use crate::lexer::Location; /// A type alias for std's Result with the Error as our error type. pub type Result = std::result::Result; /// An error encountered when parsing a Grammar. #[derive(Debug)] pub struct Error { pub(crate) message: String, pub(crate) location: Option, } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(loc) = self.location { // Report 1-based indices, to match text editors write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)? } write!(f, "{}", self.message) } } impl std::error::Error for Error {} impl Error { pub(crate) fn with_location(self, location: Location) -> Error { Error { location: Some(location), ..self } } } macro_rules! _format_err { ($($tt:tt)*) => { $crate::error::Error { message: format!($($tt)*), location: None, } }; } pub(crate) use _format_err as format_err; macro_rules! _bail { ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) }; } pub(crate) use _bail as bail; ungrammar-1.16.1/src/lexer.rs000064400000000000000000000066550072674642500142230ustar 00000000000000//! Simple hand-written ungrammar lexer use crate::error::{bail, Result}; #[derive(Debug, Eq, PartialEq)] pub(crate) enum TokenKind { Node(String), Token(String), Eq, Star, Pipe, QMark, Colon, LParen, RParen, } #[derive(Debug)] pub(crate) struct Token { pub(crate) kind: TokenKind, pub(crate) loc: Location, } #[derive(Copy, Clone, Default, Debug)] pub(crate) struct Location { pub(crate) line: usize, pub(crate) column: usize, } impl Location { fn advance(&mut self, text: &str) { match text.rfind('\n') { Some(idx) => { self.line += text.chars().filter(|&it| it == '\n').count(); self.column = text[idx + 1..].chars().count(); } None => self.column += text.chars().count(), } } } pub(crate) fn tokenize(mut input: &str) -> Result> { let mut res = Vec::new(); let mut loc = Location::default(); while !input.is_empty() { let old_input = input; skip_ws(&mut input); skip_comment(&mut input); if old_input.len() == input.len() { match advance(&mut input) { Ok(kind) => { res.push(Token { kind, loc }); } Err(err) => return Err(err.with_location(loc)), } } let consumed = old_input.len() - input.len(); loc.advance(&old_input[..consumed]); } Ok(res) } fn skip_ws(input: &mut &str) { *input = input.trim_start_matches(is_whitespace) } fn skip_comment(input: &mut &str) { if input.starts_with("//") { let idx = input.find('\n').map_or(input.len(), |it| it + 1); *input = &input[idx..] } } fn advance(input: &mut &str) -> Result { let mut chars = input.chars(); let c = chars.next().unwrap(); let res = match c { '=' => TokenKind::Eq, '*' => TokenKind::Star, '?' => TokenKind::QMark, '(' => TokenKind::LParen, ')' => TokenKind::RParen, '|' => TokenKind::Pipe, ':' => TokenKind::Colon, '\'' => { let mut buf = String::new(); loop { match chars.next() { None => bail!("unclosed token literal"), Some('\\') => match chars.next() { Some(c) if is_escapable(c) => buf.push(c), _ => bail!("invalid escape in token literal"), }, Some('\'') => break, Some(c) => buf.push(c), } } TokenKind::Token(buf) } c if is_ident_char(c) => { let mut buf = String::new(); buf.push(c); loop { match chars.clone().next() { Some(c) if is_ident_char(c) => { chars.next(); buf.push(c); } _ => break, } } TokenKind::Node(buf) } '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"), c => bail!("unexpected character: `{}`", c), }; *input = chars.as_str(); Ok(res) } fn is_escapable(c: char) -> bool { matches!(c, '\\' | '\'') } fn is_whitespace(c: char) -> bool { matches!(c, ' ' | '\t' | '\n') } fn is_ident_char(c: char) -> bool { matches!(c, 'a'..='z' | 'A'..='Z' | '_') } ungrammar-1.16.1/src/lib.rs000064400000000000000000000064720072674642500136470ustar 00000000000000//! Ungrammar -- a DSL for specifying concrete syntax tree grammar. //! //! Producing a parser is an explicit non-goal -- it's ok for this grammar to be //! ambiguous, non LL, non LR, etc. //! //! See this //! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) //! for details. #![deny(missing_debug_implementations)] #![deny(missing_docs)] #![deny(rust_2018_idioms)] mod error; mod lexer; mod parser; use std::{ops, str::FromStr}; pub use error::{Error, Result}; /// Returns a Rust grammar. pub fn rust_grammar() -> Grammar { let src = include_str!("../rust.ungram"); src.parse().unwrap() } /// A node, like `A = 'b' | 'c'`. /// /// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a /// [`NodeData`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Node(usize); /// A token, denoted with single quotes, like `'+'` or `'struct'`. /// /// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a /// [`TokenData`]. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Token(usize); /// An Ungrammar grammar. #[derive(Default, Debug)] pub struct Grammar { nodes: Vec, tokens: Vec, } impl FromStr for Grammar { type Err = Error; fn from_str(s: &str) -> Result { let tokens = lexer::tokenize(s)?; parser::parse(tokens) } } impl Grammar { /// Returns an iterator over all nodes in the grammar. pub fn iter(&self) -> impl Iterator + '_ { (0..self.nodes.len()).map(Node) } /// Returns an iterator over all tokens in the grammar. pub fn tokens(&self) -> impl Iterator + '_ { (0..self.tokens.len()).map(Token) } } impl ops::Index for Grammar { type Output = NodeData; fn index(&self, Node(index): Node) -> &NodeData { &self.nodes[index] } } impl ops::Index for Grammar { type Output = TokenData; fn index(&self, Token(index): Token) -> &TokenData { &self.tokens[index] } } /// Data about a node. #[derive(Debug)] pub struct NodeData { /// The name of the node. /// /// In the rule `A = 'b' | 'c'`, this is `"A"`. pub name: String, /// The rule for this node. /// /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`. pub rule: Rule, } /// Data about a token. #[derive(Debug)] pub struct TokenData { /// The name of the token. pub name: String, } /// A production rule. #[derive(Debug, Eq, PartialEq)] pub enum Rule { /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). Labeled { /// The label. label: String, /// The rule. rule: Box, }, /// A node, like `A`. Node(Node), /// A token, like `'struct'`. Token(Token), /// A sequence of rules, like `'while' '(' Expr ')' Stmt`. Seq(Vec), /// An alternative between many rules, like `'+' | '-' | '*' | '/'`. Alt(Vec), /// An optional rule, like `A?`. Opt(Box), /// A repeated rule, like `A*`. Rep(Box), } #[test] fn smoke() { let grammar = include_str!("../ungrammar.ungram"); let grammar = grammar.parse::().unwrap(); drop(grammar) } #[test] fn test_rust_grammar() { let _ = rust_grammar(); } ungrammar-1.16.1/src/parser.rs000064400000000000000000000137050072674642500143720ustar 00000000000000//! Simple hand-written ungrammar parser. use std::collections::HashMap; use crate::{ error::{bail, format_err, Result}, lexer::{self, TokenKind}, Grammar, Node, NodeData, Rule, Token, TokenData, }; macro_rules! bail { ($loc:expr, $($tt:tt)*) => {{ let err = $crate::error::format_err!($($tt)*) .with_location($loc); return Err(err); }}; } pub(crate) fn parse(tokens: Vec) -> Result { let mut p = Parser::new(tokens); while !p.is_eof() { node(&mut p)?; } p.finish() } #[derive(Default)] struct Parser { grammar: Grammar, tokens: Vec, node_table: HashMap, token_table: HashMap, } const DUMMY_RULE: Rule = Rule::Node(Node(!0)); impl Parser { fn new(mut tokens: Vec) -> Parser { tokens.reverse(); Parser { tokens, ..Parser::default() } } fn peek(&self) -> Option<&lexer::Token> { self.peek_n(0) } fn peek_n(&self, n: usize) -> Option<&lexer::Token> { self.tokens.iter().nth_back(n) } fn bump(&mut self) -> Result { self.tokens .pop() .ok_or_else(|| format_err!("unexpected EOF")) } fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { let token = self.bump()?; if token.kind != kind { bail!(token.loc, "unexpected token, expected `{}`", what); } Ok(()) } fn is_eof(&self) -> bool { self.tokens.is_empty() } fn finish(self) -> Result { for node_data in &self.grammar.nodes { if matches!(node_data.rule, DUMMY_RULE) { crate::error::bail!("Undefined node: {}", node_data.name) } } Ok(self.grammar) } fn intern_node(&mut self, name: String) -> Node { let len = self.node_table.len(); let grammar = &mut self.grammar; *self.node_table.entry(name.clone()).or_insert_with(|| { grammar.nodes.push(NodeData { name, rule: DUMMY_RULE, }); Node(len) }) } fn intern_token(&mut self, name: String) -> Token { let len = self.token_table.len(); let grammar = &mut self.grammar; *self.token_table.entry(name.clone()).or_insert_with(|| { grammar.tokens.push(TokenData { name }); Token(len) }) } } fn node(p: &mut Parser) -> Result<()> { let token = p.bump()?; let node = match token.kind { TokenKind::Node(it) => p.intern_node(it), _ => bail!(token.loc, "expected ident"), }; p.expect(TokenKind::Eq, "=")?; if !matches!(p.grammar[node].rule, DUMMY_RULE) { bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name) } let rule = rule(p)?; p.grammar.nodes[node.0].rule = rule; Ok(()) } fn rule(p: &mut Parser) -> Result { if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() { bail!( *loc, "The first element in a sequence of productions or alternatives \ must not have a leading pipe (`|`)" ); } let lhs = seq_rule(p)?; let mut alt = vec![lhs]; while let Some(token) = p.peek() { if token.kind != TokenKind::Pipe { break; } p.bump()?; let rule = seq_rule(p)?; alt.push(rule) } let res = if alt.len() == 1 { alt.pop().unwrap() } else { Rule::Alt(alt) }; Ok(res) } fn seq_rule(p: &mut Parser) -> Result { let lhs = atom_rule(p)?; let mut seq = vec![lhs]; while let Some(rule) = opt_atom_rule(p)? { seq.push(rule) } let res = if seq.len() == 1 { seq.pop().unwrap() } else { Rule::Seq(seq) }; Ok(res) } fn atom_rule(p: &mut Parser) -> Result { match opt_atom_rule(p)? { Some(it) => Ok(it), None => { let token = p.bump()?; bail!(token.loc, "unexpected token") } } } fn opt_atom_rule(p: &mut Parser) -> Result> { let token = match p.peek() { Some(it) => it, None => return Ok(None), }; let mut res = match &token.kind { TokenKind::Node(name) => { if let Some(lookahead) = p.peek_n(1) { match lookahead.kind { TokenKind::Eq => return Ok(None), TokenKind::Colon => { let label = name.clone(); p.bump()?; p.bump()?; let rule = atom_rule(p)?; let res = Rule::Labeled { label, rule: Box::new(rule), }; return Ok(Some(res)); } _ => (), } } match p.peek_n(1) { Some(token) if token.kind == TokenKind::Eq => return Ok(None), _ => (), } let name = name.clone(); p.bump()?; let node = p.intern_node(name); Rule::Node(node) } TokenKind::Token(name) => { let name = name.clone(); p.bump()?; let token = p.intern_token(name); Rule::Token(token) } TokenKind::LParen => { p.bump()?; let rule = rule(p)?; p.expect(TokenKind::RParen, ")")?; rule } _ => return Ok(None), }; if let Some(token) = p.peek() { match &token.kind { TokenKind::QMark => { p.bump()?; res = Rule::Opt(Box::new(res)); } TokenKind::Star => { p.bump()?; res = Rule::Rep(Box::new(res)); } _ => (), } } Ok(Some(res)) } ungrammar-1.16.1/ungrammar.ungram000064400000000000000000000003100072674642500151310ustar 00000000000000/// ungrammar for ungrammar Grammar = Node * Node = name:'ident' '=' Rule Rule = 'ident' | 'token_ident' | Rule * | Rule ( '|' Rule) * | Rule '?' | Rule '*' | '(' Rule ')' | label:'ident' ':'