makefile-lossless-0.1.2/.cargo_vcs_info.json0000644000000001360000000000100144560ustar { "git": { "sha1": "af25be4154a9380870c322c27837980f797b9aef" }, "path_in_vcs": "" }makefile-lossless-0.1.2/.github/workflows/rust.yml000064400000000000000000000004121046102023000203600ustar 00000000000000name: Rust on: push: pull_request: env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Build run: cargo build --verbose - name: Run tests run: cargo test --verbose makefile-lossless-0.1.2/Cargo.toml0000644000000015370000000000100124620ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "makefile-lossless" version = "0.1.2" authors = ["Jelmer Vernooij "] description = "Lossless Parser for Makefiles" readme = "README.md" license = "Apache-2.0" repository = "https://github.com/jelmer/makefile-lossless-rs" [dependencies.log] version = "0.4" [dependencies.rowan] version = "0.15.11" [dev-dependencies.maplit] version = "1.0.2" makefile-lossless-0.1.2/Cargo.toml.orig000064400000000000000000000005461046102023000161420ustar 00000000000000[package] name = "makefile-lossless" repository = "https://github.com/jelmer/makefile-lossless-rs" description = "Lossless Parser for Makefiles" version = "0.1.2" edition = "2021" license = "Apache-2.0" readme = "README.md" authors = [ "Jelmer Vernooij ",] [dependencies] log = "0.4" rowan = "0.15.11" [dev-dependencies] maplit = "1.0.2" makefile-lossless-0.1.2/README.md000064400000000000000000000004741046102023000145320ustar 00000000000000Lossless parser for Makefiles ============================= This crate provides a lossless parser for makefiles, creating a modifiable CST. Example: ```rust let mf = Makefile::read("Makefile").unwrap(); println!("Rules in the makefile: {:?}", mf.rules().map(|r| r.targets().join(" ")).collect::>()); ``` makefile-lossless-0.1.2/TODO000064400000000000000000000001671046102023000137420ustar 00000000000000- Handle split lines (https://www.gnu.org/software/make/manual/make.html#Splitting-Lines) - Support variables in rules makefile-lossless-0.1.2/disperse.conf000064400000000000000000000000461046102023000157330ustar 00000000000000timeout_days: 5 tag_name: "v$VERSION" makefile-lossless-0.1.2/src/lex.rs000064400000000000000000000201571046102023000152000ustar 00000000000000use crate::SyntaxKind; use std::iter::Peekable; use std::str::Chars; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum LineType { Recipe, Other, } pub struct Lexer<'a> { input: Peekable>, line_type: Option, } impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { Lexer { input: input.chars().peekable(), line_type: None, } } fn is_whitespace(c: char) -> bool { c == ' ' || c == '\t' } fn is_newline(c: char) -> bool { c == '\n' || c == '\r' } fn is_valid_identifier_char(c: char) -> bool { c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '-' } fn read_while(&mut self, predicate: F) -> String where F: Fn(char) -> bool, { let mut result = String::new(); while let Some(&c) = self.input.peek() { if predicate(c) { result.push(c); self.input.next(); } else { break; } } result } fn next_token(&mut self) -> Option<(SyntaxKind, String)> { if let Some(&c) = self.input.peek() { match (c, self.line_type) { ('\t', None) => { self.input.next(); self.line_type = Some(LineType::Recipe); return Some((SyntaxKind::INDENT, "\t".to_string())); } (_, None) => { self.line_type = Some(LineType::Other); } (_, _) => {} } match c { c if Self::is_newline(c) => { self.line_type = None; return Some((SyntaxKind::NEWLINE, self.input.next()?.to_string())); } '#' => { return Some((SyntaxKind::COMMENT, self.read_while(|c| !Self::is_newline(c)))); } _ => {} } match self.line_type.unwrap() { LineType::Recipe => { Some((SyntaxKind::TEXT, self.read_while(|c| !Self::is_newline(c)))) } LineType::Other => match c { c if Self::is_whitespace(c) => { Some((SyntaxKind::WHITESPACE, self.read_while(Self::is_whitespace))) } c if Self::is_valid_identifier_char(c) => { Some((SyntaxKind::IDENTIFIER, self.read_while(Self::is_valid_identifier_char))) } ':' | '=' | '?'| '+' => { Some((SyntaxKind::OPERATOR, self.read_while(|c| c == ':' || c == '=' || c == '?'))) } '(' => { self.input.next(); Some((SyntaxKind::LPAREN, "(".to_string())) } ')' => { self.input.next(); Some((SyntaxKind::RPAREN, ")".to_string())) } '$' => { self.input.next(); Some((SyntaxKind::DOLLAR, "$".to_string())) } ',' => { self.input.next(); Some((SyntaxKind::COMMA, ",".to_string())) } '\\' => { self.input.next(); Some((SyntaxKind::BACKSLASH, "\\".to_string())) } '"' => { self.input.next(); Some((SyntaxKind::QUOTE, "\"".to_string())) } _ => { self.input.next(); Some((SyntaxKind::ERROR, c.to_string())) } } } } else { None } } } impl Iterator for Lexer<'_> { type Item = (crate::SyntaxKind, String); fn next(&mut self) -> Option { self.next_token() } } pub(crate) fn lex(input: &str) -> Vec<(SyntaxKind, String)> { let mut lexer = Lexer::new(input); lexer.by_ref().collect::>() } #[cfg(test)] mod tests { use crate::SyntaxKind::*; #[test] fn test_empty() { assert_eq!(super::lex(""), vec![]); } #[test] fn test_simple() { assert_eq!( super::lex( r#"VARIABLE = value rule: prerequisite recipe "# ) .iter() .map(|(kind, text)| (*kind, text.as_str())) .collect::>(), vec![ (IDENTIFIER, "VARIABLE"), (WHITESPACE, " "), (OPERATOR, "="), (WHITESPACE, " "), (IDENTIFIER, "value"), (NEWLINE, "\n"), (NEWLINE, "\n"), (IDENTIFIER, "rule"), (OPERATOR, ":"), (WHITESPACE, " "), (IDENTIFIER, "prerequisite"), (NEWLINE, "\n"), (INDENT, "\t"), (TEXT, "recipe"), (NEWLINE, "\n"), ] ); } #[test] fn test_multiple_prerequisites() { assert_eq!( super::lex( r#"rule: prerequisite1 prerequisite2 recipe "# ) .iter() .map(|(kind, text)| (*kind, text.as_str())) .collect::>(), vec![ (IDENTIFIER, "rule"), (OPERATOR, ":"), (WHITESPACE, " "), (IDENTIFIER, "prerequisite1"), (WHITESPACE, " "), (IDENTIFIER, "prerequisite2"), (NEWLINE, "\n"), (INDENT, "\t"), (TEXT, "recipe"), (NEWLINE, "\n"), (NEWLINE, "\n"), ] ); } #[test] fn test_variable_question() { assert_eq!( super::lex("VARIABLE ?= value\n").iter().map(|(kind, text)| (*kind, text.as_str())).collect::>(), vec![ (IDENTIFIER, "VARIABLE"), (WHITESPACE, " "), (OPERATOR, "?="), (WHITESPACE, " "), (IDENTIFIER, "value"), (NEWLINE, "\n"), ] ); } #[test] fn test_conditional() { assert_eq!( super::lex(r#"ifneq (a, b) endif "#).iter().map(|(kind, text)| (*kind, text.as_str())).collect::>(), vec![ (IDENTIFIER, "ifneq"), (WHITESPACE, " "), (LPAREN, "("), (IDENTIFIER, "a"), (COMMA, ","), (WHITESPACE, " "), (IDENTIFIER, "b"), (RPAREN, ")"), (NEWLINE, "\n"), (IDENTIFIER, "endif"), (NEWLINE, "\n"), ] ); } #[test] fn test_variable_paren() { assert_eq!( super::lex("VARIABLE = $(value)\n").iter().map(|(kind, text)| (*kind, text.as_str())).collect::>(), vec![ (IDENTIFIER, "VARIABLE"), (WHITESPACE, " "), (OPERATOR, "="), (WHITESPACE, " "), (DOLLAR, "$"), (LPAREN, "("), (IDENTIFIER, "value"), (RPAREN, ")"), (NEWLINE, "\n"), ] ); } #[test] fn test_variable_paren2() { assert_eq!( super::lex("VARIABLE = $(value)$(value2)\n").iter().map(|(kind, text)| (*kind, text.as_str())).collect::>(), vec![ (IDENTIFIER, "VARIABLE"), (WHITESPACE, " "), (OPERATOR, "="), (WHITESPACE, " "), (DOLLAR, "$"), (LPAREN, "("), (IDENTIFIER, "value"), (RPAREN, ")"), (DOLLAR, "$"), (LPAREN, "("), (IDENTIFIER, "value2"), (RPAREN, ")"), (NEWLINE, "\n"), ] ); } } makefile-lossless-0.1.2/src/lib.rs000064400000000000000000000021301046102023000151450ustar 00000000000000//! A lossless parser for Makefiles //! //! Example: //! //! ```rust //! use std::io::Read; //! let contents = r#"PYTHON = python3 //! //! .PHONY: all //! //! all: build //! //! build: //! $(PYTHON) setup.py build //! "#; //! let makefile: makefile_lossless::Makefile = contents.parse().unwrap(); //! //! assert_eq!(makefile.rules().count(), 3); //! ``` mod lex; mod parse; pub use parse::Makefile; /// Let's start with defining all kinds of tokens and /// composite nodes. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[allow(non_camel_case_types)] #[repr(u16)] pub enum SyntaxKind { IDENTIFIER = 0, INDENT, TEXT, WHITESPACE, NEWLINE, DOLLAR, LPAREN, RPAREN, QUOTE, BACKSLASH, COMMA, OPERATOR, COMMENT, ERROR, // composite nodes ROOT, // The entire file RULE, // A single rule PREREQUISITES, RECIPE, VARIABLE, EXPR, } /// Convert our `SyntaxKind` into the rowan `SyntaxKind`. impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { Self(kind as u16) } } makefile-lossless-0.1.2/src/parse.rs000064400000000000000000000346721046102023000155310ustar 00000000000000use crate::lex::lex; use crate::SyntaxKind; use crate::SyntaxKind::*; use rowan::ast::AstNode; use std::str::FromStr; #[derive(Debug)] pub enum Error { Io(std::io::Error), Parse(ParseError), } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match &self { Error::Io(e) => write!(f, "IO error: {}", e), Error::Parse(e) => write!(f, "Parse error: {}", e), } } } impl From for Error { fn from(e: std::io::Error) -> Self { Error::Io(e) } } impl std::error::Error for Error {} #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParseError(Vec); impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for err in &self.0 { writeln!(f, "{}", err)?; } Ok(()) } } impl std::error::Error for ParseError {} impl From for Error { fn from(e: ParseError) -> Self { Error::Parse(e) } } /// Second, implementing the `Language` trait teaches rowan to convert between /// these two SyntaxKind types, allowing for a nicer SyntaxNode API where /// "kinds" are values from our `enum SyntaxKind`, instead of plain u16 values. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Lang {} impl rowan::Language for Lang { type Kind = SyntaxKind; fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { kind.into() } } /// GreenNode is an immutable tree, which is cheap to change, /// but doesn't contain offsets and parent pointers. use rowan::GreenNode; /// You can construct GreenNodes by hand, but a builder /// is helpful for top-down parsers: it maintains a stack /// of currently in-progress nodes use rowan::GreenNodeBuilder; /// The parse results are stored as a "green tree". /// We'll discuss working with the results later #[derive(Debug)] struct Parse { green_node: GreenNode, #[allow(unused)] errors: Vec, } fn parse(text: &str) -> Parse { struct Parser { /// input tokens, including whitespace, /// in *reverse* order. tokens: Vec<(SyntaxKind, String)>, /// the in-progress tree. builder: GreenNodeBuilder<'static>, /// the list of syntax errors we've accumulated /// so far. errors: Vec, } impl Parser { fn error(&mut self, msg: String) { self.builder.start_node(ERROR.into()); if self.current().is_some() { self.bump(); } self.errors.push(msg); self.builder.finish_node(); } fn parse_expr(&mut self) { self.builder.start_node(EXPR.into()); loop { match self.current() { Some(NEWLINE) => { break; } Some(_t) => { self.bump(); } None => { break; } } } self.builder.finish_node(); } fn parse_recipe_line(&mut self) { self.builder.start_node(RECIPE.into()); self.expect(INDENT); self.expect(TEXT); self.expect(NEWLINE); self.builder.finish_node(); } fn parse_rule(&mut self) { self.builder.start_node(RULE.into()); self.skip_ws(); self.expect(IDENTIFIER); self.skip_ws(); if self.tokens.pop().map(|(k, t)| (k, t)) == Some((OPERATOR, ":".to_string())) { self.builder.token(OPERATOR.into(), ":"); } else { self.error("expected ':'".into()); } self.skip_ws(); self.parse_expr(); self.expect(NEWLINE); loop { match self.current() { Some(INDENT) => { self.parse_recipe_line(); } Some(NEWLINE) => { self.bump(); break; } _ => { break; } } } self.builder.finish_node(); } fn parse_assignment(&mut self) { self.builder.start_node(VARIABLE.into()); self.skip_ws(); self.expect(IDENTIFIER); self.skip_ws(); self.expect(OPERATOR); self.skip_ws(); self.parse_expr(); self.expect(NEWLINE); self.builder.finish_node(); } fn parse(mut self) -> Parse { self.builder.start_node(ROOT.into()); loop { match self.find(|&&(k, _)| k == OPERATOR || k == NEWLINE || k == LPAREN) { Some((OPERATOR, ":")) => { self.parse_rule(); } Some((OPERATOR, "?=")) | Some((OPERATOR, "=")) | Some((OPERATOR, ":=")) | Some((OPERATOR, "::=")) | Some((OPERATOR, ":::=")) | Some((OPERATOR, "+=")) | Some((OPERATOR, "!=")) => { self.parse_assignment(); } Some((NEWLINE, _)) => { self.bump(); } Some(_) | None => { self.error(format!("unexpected token {:?}", self.current())); if self.current().is_some() { self.bump(); } } } if self.current().is_none() { break; } } // Close the root node. self.builder.finish_node(); // Turn the builder into a GreenNode Parse { green_node: self.builder.finish(), errors: self.errors, } } /// Advance one token, adding it to the current branch of the tree builder. fn bump(&mut self) { let (kind, text) = self.tokens.pop().unwrap(); self.builder.token(kind.into(), text.as_str()); } /// Peek at the first unprocessed token fn current(&self) -> Option { self.tokens.last().map(|(kind, _)| *kind) } fn find(&self, finder: impl FnMut(&&(SyntaxKind, String)) -> bool) -> Option<(SyntaxKind, &str)> { self.tokens.iter().rev().find(finder).map(|(kind, text)| (*kind, text.as_str())) } fn expect(&mut self, expected: SyntaxKind) { if self.current() != Some(expected) { self.error(format!("expected {:?}, got {:?}", expected, self.current())); } else { self.bump(); } } fn skip_ws(&mut self) { while self.current() == Some(WHITESPACE) { self.bump() } } } let mut tokens = lex(text); tokens.reverse(); Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new(), } .parse() } /// To work with the parse results we need a view into the /// green tree - the Syntax tree. /// It is also immutable, like a GreenNode, /// but it contains parent pointers, offsets, and /// has identity semantics. type SyntaxNode = rowan::SyntaxNode; #[allow(unused)] type SyntaxToken = rowan::SyntaxToken; #[allow(unused)] type SyntaxElement = rowan::NodeOrToken; impl Parse { fn syntax(&self) -> SyntaxNode { SyntaxNode::new_root(self.green_node.clone()) } fn root(&self) -> Makefile { Makefile::cast(self.syntax()).unwrap() } } macro_rules! ast_node { ($ast:ident, $kind:ident) => { #[derive(PartialEq, Eq, Hash)] #[repr(transparent)] pub struct $ast(SyntaxNode); impl AstNode for $ast { type Language = Lang; fn can_cast(kind: SyntaxKind) -> bool { kind == $kind } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { Some(Self(syntax)) } else { None } } fn syntax(&self) -> &SyntaxNode { &self.0 } } impl ToString for $ast { fn to_string(&self) -> String { self.0.text().to_string() } } }; } ast_node!(Makefile, ROOT); ast_node!(Rule, RULE); ast_node!(Identifier, IDENTIFIER); ast_node!(VariableDefinition, VARIABLE); impl VariableDefinition { pub fn name(&self) -> Option { self.syntax().children_with_tokens().find_map(|it| it.as_token().and_then(|it| { if it.kind() == IDENTIFIER { Some(it.text().to_string()) } else { None } })) } pub fn raw_value(&self) -> Option { self.syntax().children().find(|it| it.kind() == EXPR).map(|it| it.text().to_string()) } } impl Makefile { pub fn new() -> Makefile { let mut builder = GreenNodeBuilder::new(); builder.start_node(ROOT.into()); builder.finish_node(); let syntax = SyntaxNode::new_root(builder.finish()); Makefile(syntax.clone_for_update()) } /// Read a changelog file from a reader pub fn read(mut r: R) -> Result { let mut buf = String::new(); r.read_to_string(&mut buf)?; Ok(buf.parse()?) } pub fn read_relaxed(mut r: R) -> Result { let mut buf = String::new(); r.read_to_string(&mut buf)?; let parsed = parse(&buf); Ok(parsed.root().clone_for_update()) } pub fn rules(&self) -> impl Iterator { self.syntax() .children() .filter_map(Rule::cast) } pub fn variable_definitions(&self) -> impl Iterator { self.syntax() .children() .filter_map(VariableDefinition::cast) } pub fn add_rule(&mut self, target: &str) -> Rule { let mut builder = GreenNodeBuilder::new(); builder.start_node(RULE.into()); builder.token(IDENTIFIER.into(), target); builder.token(OPERATOR.into(), ":"); builder.token(NEWLINE.into(), "\n"); builder.finish_node(); let syntax = SyntaxNode::new_root(builder.finish()).clone_for_update(); let pos = self.0.children().count(); self.0.splice_children(pos..pos, vec![syntax.clone().into()]); Rule(syntax) } } impl Rule { pub fn targets(&self) -> impl Iterator { self.syntax() .children_with_tokens() .take_while(|it| it.as_token().map_or(true, |t| t.kind() != OPERATOR)) .filter_map(|it| it.as_token().map(|t| t.text().to_string())) } pub fn prerequisites(&self) -> impl Iterator { self.syntax() .children() .find(|it| it.kind() == EXPR) .into_iter().flat_map(|it| it.children_with_tokens().filter_map(|it| it.as_token().and_then(|t| if t.kind() == IDENTIFIER { Some(t.text().to_string()) } else { None }))) } } impl Default for Makefile { fn default() -> Self { Self::new() } } impl FromStr for Makefile { type Err = ParseError; fn from_str(s: &str) -> Result { let parsed = parse(s); if parsed.errors.is_empty() { Ok(parsed.root().clone_for_update()) } else { Err(ParseError(parsed.errors)) } } } #[test] fn test_parse_simple() { const SIMPLE: &str = r#"VARIABLE = value rule: dependency command "#; let parsed = parse(SIMPLE); assert_eq!(parsed.errors, Vec::::new()); let node = parsed.syntax(); assert_eq!( format!("{:#?}", node), r#"ROOT@0..44 VARIABLE@0..17 IDENTIFIER@0..8 "VARIABLE" WHITESPACE@8..9 " " OPERATOR@9..10 "=" WHITESPACE@10..11 " " EXPR@11..16 IDENTIFIER@11..16 "value" NEWLINE@16..17 "\n" NEWLINE@17..18 "\n" RULE@18..44 IDENTIFIER@18..22 "rule" OPERATOR@22..23 ":" WHITESPACE@23..24 " " EXPR@24..34 IDENTIFIER@24..34 "dependency" NEWLINE@34..35 "\n" RECIPE@35..44 INDENT@35..36 "\t" TEXT@36..43 "command" NEWLINE@43..44 "\n" "# ); let root = parsed.root().clone_for_update(); let mut rules = root.rules().collect::>(); assert_eq!(rules.len(), 1); let rule = rules.pop().unwrap(); assert_eq!(rule.targets().collect::>(), vec!["rule"]); assert_eq!(rule.prerequisites().collect::>(), vec!["dependency"]); let mut variables = root.variable_definitions().collect::>(); assert_eq!(variables.len(), 1); let variable = variables.pop().unwrap(); assert_eq!(variable.name(), Some("VARIABLE".to_string())); assert_eq!(variable.raw_value(), Some("value".to_string())); } #[test] fn test_parse_multiple_prerequisites() { const MULTIPLE_PREREQUISITES: &str = r#"rule: dependency1 dependency2 command "#; let parsed = parse(MULTIPLE_PREREQUISITES); assert_eq!(parsed.errors, Vec::::new()); let node = parsed.syntax(); assert_eq!( format!("{:#?}", node), r#"ROOT@0..40 RULE@0..40 IDENTIFIER@0..4 "rule" OPERATOR@4..5 ":" WHITESPACE@5..6 " " EXPR@6..29 IDENTIFIER@6..17 "dependency1" WHITESPACE@17..18 " " IDENTIFIER@18..29 "dependency2" NEWLINE@29..30 "\n" RECIPE@30..39 INDENT@30..31 "\t" TEXT@31..38 "command" NEWLINE@38..39 "\n" NEWLINE@39..40 "\n" "#); let root = parsed.root().clone_for_update(); let rule = root.rules().next().unwrap(); assert_eq!(rule.targets().collect::>(), vec!["rule"]); assert_eq!( rule.prerequisites().collect::>(), vec!["dependency1", "dependency2"] ); } #[test] fn test_add_rule() { let mut makefile = Makefile::new(); let rule = makefile.add_rule("rule"); assert_eq!(rule.targets().collect::>(), vec!["rule"]); assert_eq!(rule.prerequisites().collect::>(), Vec::::new()); assert_eq!(makefile.to_string(), "rule:\n"); }