deb822-lossless-0.1.23/.cargo_vcs_info.json0000644000000001360000000000100137520ustar { "git": { "sha1": "7aa983fe393bffe6f56752e186cd11531effb886" }, "path_in_vcs": "" }deb822-lossless-0.1.23/.github/CODEOWNERS000064400000000000000000000000121046102023000154660ustar 00000000000000* @jelmer deb822-lossless-0.1.23/.github/FUNDING.yml000064400000000000000000000000171046102023000157150ustar 00000000000000github: jelmer deb822-lossless-0.1.23/.github/dependabot.yml000064400000000000000000000006251046102023000167350ustar 00000000000000# Please see the documentation for all configuration options: # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" rebase-strategy: "disabled" - package-ecosystem: "github-actions" directory: "/" schedule: interval: weekly deb822-lossless-0.1.23/.github/workflows/rust.yml000064400000000000000000000005141046102023000176570ustar 00000000000000name: Rust on: push: branches: [ "master" ] pull_request: branches: [ "master" ] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Build run: cargo build --verbose --all - name: Run tests run: cargo test --verbose --all deb822-lossless-0.1.23/.gitignore000064400000000000000000000000131046102023000145240ustar 00000000000000/target *~ deb822-lossless-0.1.23/Cargo.lock0000644000000157600000000000100117360ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "countme" version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" [[package]] name = "deb822-lossless" version = "0.1.23" dependencies = [ "pyo3", "regex", "rowan", "serde", ] [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indoc" version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "libc" version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memoffset" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "portable-atomic" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" [[package]] name = "proc-macro2" version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" dependencies = [ "cfg-if", "indoc", "libc", "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", "unindent", ] [[package]] name = "pyo3-build-config" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" dependencies = [ "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" dependencies = [ "libc", "pyo3-build-config", ] [[package]] name = "pyo3-macros" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", "syn", ] [[package]] name = "pyo3-macros-backend" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", "syn", ] [[package]] name = "quote" version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] name = "regex" version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rowan" version = "0.15.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a542b0253fa46e632d27a1dc5cf7b930de4df8659dc6e720b647fc72147ae3d" dependencies = [ "countme", "hashbrown", "rustc-hash", "text-size", ] [[package]] name = "rustc-hash" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "serde" version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "syn" version = "2.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "target-lexicon" version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "text-size" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unindent" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" deb822-lossless-0.1.23/Cargo.toml0000644000000025100000000000100117460ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "deb822-lossless" version = "0.1.23" authors = ["Jelmer Vernooij "] build = false autobins = false autoexamples = false autotests = false autobenches = false description = "A lossless parser for deb822 files" homepage = "https://github.com/jelmer/deb822-lossless" readme = "README.md" license = "Apache-2.0" repository = "https://github.com/jelmer/deb822-lossless" [lib] name = "deb822_lossless" path = "src/lib.rs" [[example]] name = "edit-field" path = "examples/edit-field.rs" [[example]] name = "readonly" path = "examples/readonly.rs" [dependencies.pyo3] version = "0.22" optional = true [dependencies.regex] version = "1" [dependencies.rowan] version = "0.15.11" [dependencies.serde] version = "1" features = ["derive"] optional = true [features] default = ["serde"] python-debian = ["dep:pyo3"] serde = ["dep:serde"] deb822-lossless-0.1.23/Cargo.toml.orig000064400000000000000000000014441046102023000154340ustar 00000000000000[package] name = "deb822-lossless" authors = ["Jelmer Vernooij "] version = { workspace = true } edition = "2021" license = "Apache-2.0" description = "A lossless parser for deb822 files" repository = { workspace = true } homepage = { workspace = true } [workspace] members = ["debian-control", "debian-copyright", "dep3"] [workspace.package] version = "0.1.23" repository = "https://github.com/jelmer/deb822-lossless" homepage = "https://github.com/jelmer/deb822-lossless" [workspace.dependencies] rowan = "0.15.11" pyo3 = "0.22" [dependencies] regex = "1" rowan = { workspace = true } serde = { version = "1", features = ["derive"], optional = true } pyo3 = { workspace = true, optional = true } [features] default = ["serde"] serde = ["dep:serde"] python-debian = ["dep:pyo3"] deb822-lossless-0.1.23/README.md000064400000000000000000000015101046102023000140160ustar 00000000000000Lossless parser for deb822 style files ====================================== This crate contains lossless parsers and editors for RFC822 style file as used in Debian. Three related crates that build on this one are: * ``debian-control`` * ``debian-copyright`` * ``dep3`` Example ```rust use deb822_lossless::Deb822; use std::str::FromStr; let input = r#"Package: deb822-lossless Maintainer: Jelmer Vernooij Section: rust Package: deb822-lossless Architecture: any Description: Lossless parser for deb822 style files. This parser can be used to parse files in the deb822 format, while preserving all whitespace and comments. It is based on the [rowan] library, which is a lossless parser library for Rust. "#; let deb822 = Deb822::from_str(input).unwrap(); assert_eq!(deb822.paragraphs().count(), 2); ``` deb822-lossless-0.1.23/disperse.conf000064400000000000000000000000461046102023000152270ustar 00000000000000timeout_days: 5 tag_name: "v$VERSION" deb822-lossless-0.1.23/examples/edit-field.rs000064400000000000000000000020641046102023000167360ustar 00000000000000fn main() { let d: deb822_lossless::Deb822 = r#"Source: golang-github-blah-blah Section: devel Priority: optional Standards-Version: 4.2.0 Maintainer: Some Maintainer Build-Depends: debhelper (>= 11~), # comment dh-golang, golang-any Homepage: https://github.com/j-keck/arping "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); assert_eq!( "Some Maintainer ", p.get("Maintainer").unwrap() ); p.insert("Maintainer", "Some Other Maintainer "); assert_eq!( "Some Other Maintainer ", p.get("Maintainer").unwrap() ); assert_eq!( d.to_string(), r#"Source: golang-github-blah-blah Section: devel Priority: optional Standards-Version: 4.2.0 Maintainer: Some Other Maintainer Build-Depends: debhelper (>= 11~), # comment dh-golang, golang-any Homepage: https://github.com/j-keck/arping "# ); } deb822-lossless-0.1.23/examples/readonly.rs000064400000000000000000000014521046102023000165450ustar 00000000000000fn main() { use deb822_lossless::Deb822; use std::str::FromStr; let input = r#"Package: deb822-lossless Maintainer: Jelmer Vernooij Homepage: https://github.com/jelmer/deb822-lossless Section: rust Package: deb822-lossless Architecture: any Description: Lossless parser for deb822 style files. This parser can be used to parse files in the deb822 format, while preserving all whitespace and comments. It is based on the [rowan] library, which is a lossless parser library for Rust. "#; let deb822 = Deb822::from_str(input).unwrap(); assert_eq!(deb822.paragraphs().count(), 2); let homepage = deb822.paragraphs().next().unwrap().get("Homepage"); assert_eq!( homepage.as_deref(), Some("https://github.com/jelmer/deb822-lossless") ); } deb822-lossless-0.1.23/src/lex.rs000064400000000000000000000230051046102023000144670ustar 00000000000000use crate::SyntaxKind; use std::iter::Peekable; use std::str::Chars; pub struct Lexer<'a> { input: Peekable>, start_of_line: bool, indent: usize, colon_count: usize, } impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { Lexer { input: input.chars().peekable(), start_of_line: true, colon_count: 0, indent: 0, } } fn is_whitespace(c: char) -> bool { c == ' ' || c == '\t' } fn is_newline(c: char) -> bool { c == '\n' || c == '\r' } fn is_valid_key_char(c: char) -> bool { c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' } fn read_while(&mut self, predicate: F) -> String where F: Fn(char) -> bool, { let mut result = String::new(); while let Some(&c) = self.input.peek() { if predicate(c) { result.push(c); self.input.next(); } else { break; } } result } fn next_token(&mut self) -> Option<(SyntaxKind, String)> { if let Some(&c) = self.input.peek() { match c { ':' if self.colon_count == 0 => { self.colon_count += 1; self.input.next(); Some((SyntaxKind::COLON, ":".to_owned())) } _ if Self::is_newline(c) => { self.input.next(); self.start_of_line = true; self.colon_count = 0; self.indent = 0; Some((SyntaxKind::NEWLINE, c.to_string())) } _ if Self::is_whitespace(c) => { let whitespace = self.read_while(Self::is_whitespace); if self.start_of_line { self.indent = whitespace.len(); Some((SyntaxKind::INDENT, whitespace)) } else { Some((SyntaxKind::WHITESPACE, whitespace)) } } '#' if self.start_of_line => { self.input.next(); let comment = self.read_while(|c| c != '\n' && c != '\r'); self.start_of_line = true; self.colon_count = 0; Some((SyntaxKind::COMMENT, format!("#{}", comment))) } _ if Self::is_valid_key_char(c) && self.start_of_line && self.indent == 0 => { let key = self.read_while(Self::is_valid_key_char); self.start_of_line = false; Some((SyntaxKind::KEY, key)) } _ if !self.start_of_line || self.indent > 0 => { let value = self.read_while(|c| !Self::is_newline(c)); Some((SyntaxKind::VALUE, value)) } _ => { self.input.next(); Some((SyntaxKind::ERROR, c.to_string())) } } } else { None } } } impl Iterator for Lexer<'_> { type Item = (crate::SyntaxKind, String); fn next(&mut self) -> Option { self.next_token() } } pub(crate) fn lex(input: &str) -> Vec<(SyntaxKind, String)> { let mut lexer = Lexer::new(input); lexer.by_ref().collect::>() } #[cfg(test)] mod tests { use crate::SyntaxKind::*; #[test] fn test_empty() { assert_eq!(super::lex(""), vec![]); } #[test] fn test_simple() { assert_eq!( super::lex( r#"Source: syncthing-gtk Maintainer: Jelmer Vernooij Section: net # This is the first binary package: Package: syncthing-gtk Architecture: all Depends: foo, bar, blah (= 1.0) Description: a package with a loooong . long . description "# ) .iter() .map(|(kind, text)| (*kind, text.as_str())) .collect::>(), vec![ (KEY, "Source"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "syncthing-gtk"), (NEWLINE, "\n"), (KEY, "Maintainer"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "Jelmer Vernooij "), (NEWLINE, "\n"), (KEY, "Section"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "net "), (NEWLINE, "\n"), (NEWLINE, "\n"), (COMMENT, "# This is the first binary package:"), (NEWLINE, "\n"), (NEWLINE, "\n"), (KEY, "Package"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "syncthing-gtk"), (NEWLINE, "\n"), (KEY, "Architecture"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "all"), (NEWLINE, "\n"), (KEY, "Depends"), (COLON, ":"), (WHITESPACE, " "), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "foo,"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "bar,"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "blah (= 1.0)"), (NEWLINE, "\n"), (KEY, "Description"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "a package"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "with a loooong"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "."), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "long"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "."), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "description"), (NEWLINE, "\n") ] ); } #[test] fn test_apt() { let text = r#"Package: cvsd Binary: cvsd Version: 1.0.24 Maintainer: Arthur de Jong Build-Depends: debhelper (>= 9), po-debconf Architecture: any Standards-Version: 3.9.3 Format: 3.0 (native) Files: b7a7d67a02974c52c408fdb5e118406d 890 cvsd_1.0.24.dsc b73ee40774c3086cb8490cdbb96ac883 258139 cvsd_1.0.24.tar.gz Vcs-Browser: http://arthurdejong.org/viewvc/cvsd/ Vcs-Cvs: :pserver:anonymous@arthurdejong.org:/arthur/ Checksums-Sha256: a7bb7a3aacee19cd14ce5c26cb86e348b1608e6f1f6e97c6ea7c58efa440ac43 890 cvsd_1.0.24.dsc 46bc517760c1070ae408693b89603986b53e6f068ae6bdc744e2e830e46b8cba 258139 cvsd_1.0.24.tar.gz Homepage: http://arthurdejong.org/cvsd/ Package-List: cvsd deb vcs optional Directory: pool/main/c/cvsd Priority: source Section: vcs "#; let tokens = super::lex(text); assert_eq!( tokens .iter() .map(|(kind, text)| (*kind, text.as_str())) .collect::>(), vec![ (KEY, "Package"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "cvsd"), (NEWLINE, "\n"), (KEY, "Binary"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "cvsd"), (NEWLINE, "\n"), (KEY, "Version"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "1.0.24"), (NEWLINE, "\n"), (KEY, "Maintainer"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "Arthur de Jong "), (NEWLINE, "\n"), (KEY, "Build-Depends"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "debhelper (>= 9), po-debconf"), (NEWLINE, "\n"), (KEY, "Architecture"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "any"), (NEWLINE, "\n"), (KEY, "Standards-Version"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "3.9.3"), (NEWLINE, "\n"), (KEY, "Format"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "3.0 (native)"), (NEWLINE, "\n"), (KEY, "Files"), (COLON, ":"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "b7a7d67a02974c52c408fdb5e118406d 890 cvsd_1.0.24.dsc"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "b73ee40774c3086cb8490cdbb96ac883 258139 cvsd_1.0.24.tar.gz"), (NEWLINE, "\n"), (KEY, "Vcs-Browser"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "http://arthurdejong.org/viewvc/cvsd/"), (NEWLINE, "\n"), (KEY, "Vcs-Cvs"), (COLON, ":"), (WHITESPACE, " "), (VALUE, ":pserver:anonymous@arthurdejong.org:/arthur/"), (NEWLINE, "\n"), (KEY, "Checksums-Sha256"), (COLON, ":"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "a7bb7a3aacee19cd14ce5c26cb86e348b1608e6f1f6e97c6ea7c58efa440ac43 890 cvsd_1.0.24.dsc"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "46bc517760c1070ae408693b89603986b53e6f068ae6bdc744e2e830e46b8cba 258139 cvsd_1.0.24.tar.gz"), (NEWLINE, "\n"), (KEY, "Homepage"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "http://arthurdejong.org/cvsd/"), (NEWLINE, "\n"), (KEY, "Package-List"), (COLON, ":"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "cvsd deb vcs optional"), (NEWLINE, "\n"), (KEY, "Directory"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "pool/main/c/cvsd"), (NEWLINE, "\n"), (KEY, "Priority"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "source"), (NEWLINE, "\n"), (KEY, "Section"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "vcs"), (NEWLINE, "\n"), (NEWLINE, "\n") ] ); } } deb822-lossless-0.1.23/src/lib.rs000064400000000000000000001221101046102023000144420ustar 00000000000000#![allow(clippy::type_complexity)] //! Lossless parser for deb822 style files. //! //! This parser can be used to parse files in the deb822 format, while preserving //! all whitespace and comments. It is based on the [rowan] library, which is a //! lossless parser library for Rust. //! //! Once parsed, the file can be traversed or modified, and then written back to //! a file. //! //! # Example //! //! ```rust //! use deb822_lossless::Deb822; //! use std::str::FromStr; //! //! let input = r#"Package: deb822-lossless //! Maintainer: Jelmer Vernooij //! Homepage: https://github.com/jelmer/deb822-lossless //! Section: rust //! //! Package: deb822-lossless //! Architecture: any //! Description: Lossless parser for deb822 style files. //! This parser can be used to parse files in the deb822 format, while preserving //! all whitespace and comments. It is based on the [rowan] library, which is a //! lossless parser library for Rust. //! "#; //! //! let deb822 = Deb822::from_str(input).unwrap(); //! assert_eq!(deb822.paragraphs().count(), 2); //! let homepage = deb822.paragraphs().nth(0).unwrap().get("Homepage"); //! assert_eq!(homepage.as_deref(), Some("https://github.com/jelmer/deb822-lossless")); //! ``` mod lex; use crate::lex::lex; use rowan::ast::AstNode; use std::path::Path; use std::str::FromStr; /// Let's start with defining all kinds of tokens and /// composite nodes. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[allow(non_camel_case_types)] #[repr(u16)] pub enum SyntaxKind { KEY = 0, VALUE, COLON, INDENT, NEWLINE, WHITESPACE, // whitespaces is explicit COMMENT, // comments ERROR, // as well as errors // composite nodes ROOT, // The entire file PARAGRAPH, // A deb822 paragraph ENTRY, // A single key-value pair EMPTY_LINE, // An empty line } use SyntaxKind::*; /// Convert our `SyntaxKind` into the rowan `SyntaxKind`. impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { Self(kind as u16) } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParseError(Vec); impl std::fmt::Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for err in &self.0 { writeln!(f, "{}", err)?; } Ok(()) } } impl std::error::Error for ParseError {} #[derive(Debug)] pub enum Error { ParseError(ParseError), IoError(std::io::Error), } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match &self { Error::ParseError(err) => write!(f, "{}", err), Error::IoError(err) => write!(f, "{}", err), } } } impl From for Error { fn from(err: ParseError) -> Self { Self::ParseError(err) } } impl From for Error { fn from(err: std::io::Error) -> Self { Self::IoError(err) } } impl std::error::Error for Error {} /// Second, implementing the `Language` trait teaches rowan to convert between /// these two SyntaxKind types, allowing for a nicer SyntaxNode API where /// "kinds" are values from our `enum SyntaxKind`, instead of plain u16 values. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Lang {} impl rowan::Language for Lang { type Kind = SyntaxKind; fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { unsafe { std::mem::transmute::(raw.0) } } fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { kind.into() } } /// GreenNode is an immutable tree, which is cheap to change, /// but doesn't contain offsets and parent pointers. use rowan::GreenNode; /// You can construct GreenNodes by hand, but a builder /// is helpful for top-down parsers: it maintains a stack /// of currently in-progress nodes use rowan::GreenNodeBuilder; /// The parse results are stored as a "green tree". /// We'll discuss working with the results later struct Parse { green_node: GreenNode, #[allow(unused)] errors: Vec, } fn parse(text: &str) -> Parse { struct Parser { /// input tokens, including whitespace, /// in *reverse* order. tokens: Vec<(SyntaxKind, String)>, /// the in-progress tree. builder: GreenNodeBuilder<'static>, /// the list of syntax errors we've accumulated /// so far. errors: Vec, } impl Parser { fn parse_entry(&mut self) { self.builder.start_node(ENTRY.into()); // First, parse the key and colon if self.current() == Some(KEY) { self.bump(); self.skip_ws(); } else { self.builder.start_node(ERROR.into()); self.bump(); self.errors.push("expected key".to_string()); self.builder.finish_node(); } if self.current() == Some(COLON) { self.bump(); self.skip_ws(); } else { self.builder.start_node(ERROR.into()); if self.current().is_some() { self.bump(); } self.errors.push("expected ':'".to_string()); self.builder.finish_node(); } loop { while self.current() == Some(WHITESPACE) || self.current() == Some(VALUE) { self.bump(); } match self.current() { None => { break; } Some(NEWLINE) => { self.bump(); } Some(g) => { self.builder.start_node(ERROR.into()); self.bump(); self.errors.push(format!("expected newline, got {:?}", g)); self.builder.finish_node(); } } if self.current() == Some(INDENT) { self.bump(); self.skip_ws(); } else { break; } } self.builder.finish_node(); } fn parse_paragraph(&mut self) { self.builder.start_node(PARAGRAPH.into()); while self.current() != Some(NEWLINE) && self.current().is_some() { self.parse_entry(); } self.builder.finish_node(); } fn parse(mut self) -> Parse { // Make sure that the root node covers all source self.builder.start_node(ROOT.into()); while self.current().is_some() { self.skip_ws_and_newlines(); if self.current().is_some() { self.parse_paragraph(); } } // Don't forget to eat *trailing* whitespace self.skip_ws_and_newlines(); // Close the root node. self.builder.finish_node(); // Turn the builder into a GreenNode Parse { green_node: self.builder.finish(), errors: self.errors, } } /// Advance one token, adding it to the current branch of the tree builder. fn bump(&mut self) { let (kind, text) = self.tokens.pop().unwrap(); self.builder.token(kind.into(), text.as_str()); } /// Peek at the first unprocessed token fn current(&self) -> Option { self.tokens.last().map(|(kind, _)| *kind) } fn skip_ws(&mut self) { while self.current() == Some(WHITESPACE) || self.current() == Some(COMMENT) { self.bump() } } fn skip_ws_and_newlines(&mut self) { while self.current() == Some(WHITESPACE) || self.current() == Some(COMMENT) || self.current() == Some(NEWLINE) { self.builder.start_node(EMPTY_LINE.into()); while self.current() != Some(NEWLINE) && self.current().is_some() { self.bump(); } if self.current() == Some(NEWLINE) { self.bump(); } self.builder.finish_node(); } } } let mut tokens = lex(text); tokens.reverse(); Parser { tokens, builder: GreenNodeBuilder::new(), errors: Vec::new(), } .parse() } /// To work with the parse results we need a view into the /// green tree - the Syntax tree. /// It is also immutable, like a GreenNode, /// but it contains parent pointers, offsets, and /// has identity semantics. type SyntaxNode = rowan::SyntaxNode; #[allow(unused)] type SyntaxToken = rowan::SyntaxToken; #[allow(unused)] type SyntaxElement = rowan::NodeOrToken; impl Parse { fn syntax(&self) -> SyntaxNode { SyntaxNode::new_root(self.green_node.clone()) } fn root(&self) -> Deb822 { Deb822::cast(self.syntax()).unwrap() } } macro_rules! ast_node { ($ast:ident, $kind:ident) => { #[derive(PartialEq, Eq, Hash)] #[repr(transparent)] pub struct $ast(SyntaxNode); impl $ast { #[allow(unused)] fn cast(node: SyntaxNode) -> Option { if node.kind() == $kind { Some(Self(node)) } else { None } } } impl AstNode for $ast { type Language = Lang; fn can_cast(kind: SyntaxKind) -> bool { kind == $kind } fn cast(syntax: SyntaxNode) -> Option { Self::cast(syntax) } fn syntax(&self) -> &SyntaxNode { &self.0 } } impl std::fmt::Display for $ast { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0.text()) } } }; } impl std::fmt::Debug for Deb822 { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Deb822").finish() } } ast_node!(Deb822, ROOT); ast_node!(Paragraph, PARAGRAPH); ast_node!(Entry, ENTRY); impl Default for Deb822 { fn default() -> Self { Self::new() } } impl Deb822 { pub fn new() -> Deb822 { let mut builder = GreenNodeBuilder::new(); builder.start_node(ROOT.into()); builder.finish_node(); Deb822(SyntaxNode::new_root(builder.finish()).clone_for_update()) } /// Provide a formatter that can handle indentation and trailing separators /// /// # Arguments /// * `control` - The control file to format /// * `indentation` - The indentation to use /// * `immediate_empty_line` - Whether the value should always start with an empty line. If true, /// then the result becomes something like "Field:\n value". This parameter /// only applies to the values that will be formatted over more than one line. /// * `max_line_length_one_liner` - If set, then this is the max length of the value /// if it is crammed into a "one-liner" value. If the value(s) fit into /// one line, this parameter will overrule immediate_empty_line. /// * `sort_paragraphs` - If set, then this function will sort the paragraphs according to the /// given function. /// * `sort_entries` - If set, then this function will sort the entries according to the /// given function. #[must_use] pub fn wrap_and_sort( self: &Deb822, indentation: Indentation, immediate_empty_line: bool, max_line_length_one_liner: Option, sort_paragraphs: Option<&dyn Fn(&Paragraph, &Paragraph) -> std::cmp::Ordering>, sort_entries: Option<&dyn Fn(&Entry, &Entry) -> std::cmp::Ordering>, ) -> Deb822 { let mut builder = GreenNodeBuilder::new(); builder.start_node(ROOT.into()); let mut current = vec![]; let mut paragraphs = vec![]; for c in self.0.children_with_tokens() { match c.kind() { PARAGRAPH => { paragraphs.push(( current, Paragraph::cast(c.as_node().unwrap().clone()).unwrap(), )); current = vec![]; } COMMENT | ERROR => { current.push(c); } EMPTY_LINE => { current.extend( c.as_node() .unwrap() .children_with_tokens() .skip_while(|c| matches!(c.kind(), EMPTY_LINE | NEWLINE | WHITESPACE)), ); } _ => {} } } if let Some(sort_paragraph) = sort_paragraphs { paragraphs.sort_by(|a, b| { let a_key = &a.1; let b_key = &b.1; sort_paragraph(a_key, b_key) }); } for (i, mut paragraph) in paragraphs.into_iter().enumerate() { if i > 0 { builder.start_node(EMPTY_LINE.into()); builder.token(NEWLINE.into(), "\n"); builder.finish_node(); } for c in paragraph.0.into_iter() { builder.token(c.kind().into(), c.as_token().unwrap().text()); } inject( &mut builder, paragraph .1 .wrap_and_sort( indentation, immediate_empty_line, max_line_length_one_liner, sort_entries, ) .0, ); } for c in current { builder.token(c.kind().into(), c.as_token().unwrap().text()); } builder.finish_node(); Self(SyntaxNode::new_root(builder.finish()).clone_for_update()) } /// Returns an iterator over all paragraphs in the file. pub fn paragraphs(&self) -> impl Iterator { self.0.children().filter_map(Paragraph::cast) } /// Add a new empty paragraph to the end of the file. pub fn add_paragraph(&mut self) -> Paragraph { let paragraph = Paragraph::new(); let mut to_insert = vec![]; if self.0.children().count() > 0 { let mut builder = GreenNodeBuilder::new(); builder.start_node(EMPTY_LINE.into()); builder.token(NEWLINE.into(), "\n"); builder.finish_node(); to_insert.push( SyntaxNode::new_root(builder.finish()) .clone_for_update() .into(), ); } to_insert.push(paragraph.0.clone().into()); self.0.splice_children( self.0.children().count()..self.0.children().count(), to_insert, ); paragraph } /// Read a deb822 file from the given path. pub fn from_file(path: impl AsRef) -> Result { let text = std::fs::read_to_string(path)?; Ok(Self::from_str(&text)?) } /// Read a deb822 file from the given path, ignoring any syntax errors. pub fn from_file_relaxed( path: impl AsRef, ) -> Result<(Self, Vec), std::io::Error> { let text = std::fs::read_to_string(path)?; Ok(Self::from_str_relaxed(&text)) } pub fn from_str_relaxed(s: &str) -> (Self, Vec) { let parsed = parse(s); (parsed.root().clone_for_update(), parsed.errors) } pub fn read(mut r: R) -> Result { let mut buf = String::new(); r.read_to_string(&mut buf)?; Ok(Self::from_str(&buf)?) } pub fn read_relaxed(mut r: R) -> Result<(Self, Vec), std::io::Error> { let mut buf = String::new(); r.read_to_string(&mut buf)?; Ok(Self::from_str_relaxed(&buf)) } } fn inject(builder: &mut GreenNodeBuilder, node: SyntaxNode) { builder.start_node(node.kind().into()); for child in node.children_with_tokens() { match child { rowan::NodeOrToken::Node(child) => { inject(builder, child); } rowan::NodeOrToken::Token(token) => { builder.token(token.kind().into(), token.text()); } } } builder.finish_node(); } impl Paragraph { pub fn new() -> Paragraph { let mut builder = GreenNodeBuilder::new(); builder.start_node(PARAGRAPH.into()); builder.finish_node(); Paragraph(SyntaxNode::new_root(builder.finish()).clone_for_update()) } #[must_use] pub fn wrap_and_sort( &mut self, indentation: Indentation, immediate_empty_line: bool, max_line_length_one_liner: Option, sort_entries: Option<&dyn Fn(&Entry, &Entry) -> std::cmp::Ordering>, ) -> Paragraph { let mut builder = GreenNodeBuilder::new(); let mut current = vec![]; let mut entries = vec![]; builder.start_node(PARAGRAPH.into()); for c in self.0.children_with_tokens() { match c.kind() { ENTRY => { entries.push((current, Entry::cast(c.as_node().unwrap().clone()).unwrap())); current = vec![]; } ERROR | COMMENT => { current.push(c); } _ => {} } } if let Some(sort_entry) = sort_entries { entries.sort_by(|a, b| { let a_key = &a.1; let b_key = &b.1; sort_entry(a_key, b_key) }); } for (pre, mut entry) in entries.into_iter() { for c in pre.into_iter() { builder.token(c.kind().into(), c.as_token().unwrap().text()); } inject( &mut builder, entry .wrap_and_sort(indentation, immediate_empty_line, max_line_length_one_liner) .0, ); } for c in current { builder.token(c.kind().into(), c.as_token().unwrap().text()); } builder.finish_node(); Self(SyntaxNode::new_root(builder.finish()).clone_for_update()) } /// Returns the value of the given key in the paragraph. pub fn get(&self, key: &str) -> Option { self.entries() .find(|e| e.key().as_deref() == Some(key)) .map(|e| e.value()) } /// Returns whether the paragraph contains the given key. pub fn contains_key(&self, key: &str) -> bool { self.get(key).is_some() } /// Returns an iterator over all entries in the paragraph. fn entries(&self) -> impl Iterator + '_ { self.0.children().filter_map(Entry::cast) } /// Returns an iterator over all items in the paragraph. pub fn items(&self) -> impl Iterator + '_ { self.entries() .filter_map(|e| e.key().map(|k| (k, e.value()))) } /// Returns an iterator over all values for the given key in the paragraph. pub fn get_all<'a>(&'a self, key: &'a str) -> impl Iterator + '_ { self.items() .filter_map(move |(k, v)| if k.as_str() == key { Some(v) } else { None }) } #[deprecated(note = "use `contains_key` instead")] /// Returns true if the paragraph contains the given key. pub fn contains(&self, key: &str) -> bool { self.get_all(key).any(|_| true) } /// Returns an iterator over all keys in the paragraph. pub fn keys(&self) -> impl Iterator + '_ { self.entries().filter_map(|e| e.key()) } /// Remove the given field from the paragraph. pub fn remove(&mut self, key: &str) { for mut entry in self.entries() { if entry.key().as_deref() == Some(key) { entry.detach(); } } } /// Add a new field to the paragraph. pub fn insert(&mut self, key: &str, value: &str) { let new_entry = Entry::new(key, value); for entry in self.entries() { if entry.key().as_deref() == Some(key) { self.0.splice_children( entry.0.index()..entry.0.index() + 1, vec![new_entry.0.clone_for_update().into()], ); return; } } let entry = Entry::new(key, value); self.0.splice_children( self.0.children().count()..self.0.children().count(), vec![entry.0.clone_for_update().into()], ); } /// Rename the given field in the paragraph. pub fn rename(&mut self, old_key: &str, new_key: &str) -> bool { for entry in self.entries() { if entry.key().as_deref() == Some(old_key) { self.0.splice_children( entry.0.index()..entry.0.index() + 1, vec![Entry::new(new_key, entry.value().as_str()) .0 .clone_for_update() .into()], ); return true; } } false } } impl Default for Paragraph { fn default() -> Self { Self::new() } } impl std::str::FromStr for Paragraph { type Err = ParseError; fn from_str(text: &str) -> Result { let deb822 = Deb822::from_str(text)?; let mut paragraphs = deb822.paragraphs(); paragraphs .next() .ok_or_else(|| ParseError(vec!["no paragraphs".to_string()])) } } #[cfg(feature = "python-debian")] impl pyo3::ToPyObject for Paragraph { fn to_object(&self, py: pyo3::Python) -> pyo3::PyObject { use pyo3::prelude::*; let d = pyo3::types::PyDict::new_bound(py); for (k, v) in self.items() { d.set_item(k, v).unwrap(); } let m = py.import_bound("debian.deb822").unwrap(); let cls = m.getattr("Deb822").unwrap(); cls.call1((d,)).unwrap().to_object(py) } } #[cfg(feature = "python-debian")] impl pyo3::FromPyObject<'_> for Paragraph { fn extract_bound(obj: &pyo3::Bound) -> pyo3::PyResult { use pyo3::prelude::*; let d = obj.call_method0("__str__")?.extract::()?; Ok(Paragraph::from_str(&d) .map_err(|e| pyo3::exceptions::PyValueError::new_err((e.to_string(),)))?) } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Indentation { /// Use the same indentation as the original line for the value. FieldNameLength, /// The number of spaces to use for indentation. FixedIndentation(u32), } impl Entry { pub fn new(key: &str, value: &str) -> Entry { let mut builder = GreenNodeBuilder::new(); builder.start_node(ENTRY.into()); builder.token(KEY.into(), key); builder.token(COLON.into(), ":"); builder.token(WHITESPACE.into(), " "); for (i, line) in value.split("\n").enumerate() { if i > 0 { builder.token(INDENT.into(), " "); } builder.token(VALUE.into(), line); builder.token(NEWLINE.into(), "\n"); } builder.finish_node(); Entry(SyntaxNode::new_root(builder.finish())) } #[must_use] pub fn wrap_and_sort( &mut self, mut indentation: Indentation, immediate_empty_line: bool, max_line_length_one_liner: Option, ) -> Entry { let mut builder = GreenNodeBuilder::new(); let mut content = vec![]; builder.start_node(ENTRY.into()); for c in self.0.children_with_tokens() { let text = c.as_token().map(|t| t.text()); match c.kind() { KEY => { builder.token(KEY.into(), text.unwrap()); if indentation == Indentation::FieldNameLength { indentation = Indentation::FixedIndentation(text.unwrap().len() as u32); } } COLON => { builder.token(COLON.into(), ":"); } INDENT => { // Discard original whitespace } ERROR | COMMENT | VALUE | WHITESPACE | NEWLINE => { content.push(c); } EMPTY_LINE | ENTRY | ROOT | PARAGRAPH => unreachable!(), } } let indentation = if let Indentation::FixedIndentation(i) = indentation { i } else { 1 }; assert!(indentation > 0); // Strip trailing whitespace and newlines while let Some(c) = content.last() { if c.kind() == NEWLINE || c.kind() == WHITESPACE { content.pop(); } else { break; } } let first_line_len = content .iter() .take_while(|c| c.kind() != NEWLINE) .map(|c| c.as_token().unwrap().text().len()) .sum::() + self.key().map_or(0, |k| k.len()) + 2 /* ": " */; let has_newline = content.iter().any(|c| c.kind() == NEWLINE); let mut last_was_newline = false; if max_line_length_one_liner .map(|mll| first_line_len <= mll) .unwrap_or(false) && !has_newline { for c in content { builder.token(c.kind().into(), c.as_token().unwrap().text()); } } else { if immediate_empty_line && has_newline { builder.token(NEWLINE.into(), "\n"); last_was_newline = true; } else { builder.token(WHITESPACE.into(), " "); } // Strip leading whitespace and newlines while let Some(c) = content.first() { if c.kind() == NEWLINE || c.kind() == WHITESPACE { content.remove(0); } else { break; } } for c in content { if last_was_newline { builder.token(INDENT.into(), &" ".repeat(indentation as usize)); } builder.token(c.kind().into(), c.as_token().unwrap().text()); last_was_newline = c.kind() == NEWLINE; } } if !last_was_newline { builder.token(NEWLINE.into(), "\n"); } builder.finish_node(); Self(SyntaxNode::new_root(builder.finish()).clone_for_update()) } pub fn key(&self) -> Option { self.0 .children_with_tokens() .filter_map(|it| it.into_token()) .find(|it| it.kind() == KEY) .map(|it| it.text().to_string()) } pub fn value(&self) -> String { self.0 .children_with_tokens() .filter_map(|it| it.into_token()) .filter(|it| it.kind() == VALUE) .map(|it| it.text().to_string()) .collect::>() .join("\n") } pub fn detach(&mut self) { self.0.detach(); } } impl FromStr for Deb822 { type Err = ParseError; fn from_str(s: &str) -> Result { let parsed = parse(s); if parsed.errors.is_empty() { Ok(parsed.root().clone_for_update()) } else { Err(ParseError(parsed.errors)) } } } #[test] fn test_parse_simple() { const CONTROLV1: &str = r#"Source: foo Maintainer: Foo Bar Section: net # This is a comment Package: foo Architecture: all Depends: bar, blah Description: This is a description And it is . multiple lines "#; let parsed = parse(CONTROLV1); let node = parsed.syntax(); assert_eq!( format!("{:#?}", node), r###"ROOT@0..203 PARAGRAPH@0..63 ENTRY@0..12 KEY@0..6 "Source" COLON@6..7 ":" WHITESPACE@7..8 " " VALUE@8..11 "foo" NEWLINE@11..12 "\n" ENTRY@12..50 KEY@12..22 "Maintainer" COLON@22..23 ":" WHITESPACE@23..24 " " VALUE@24..49 "Foo Bar ::new()); let root = parsed.root(); assert_eq!(root.paragraphs().count(), 2); let source = root.paragraphs().next().unwrap(); assert_eq!( source.keys().collect::>(), vec!["Source", "Maintainer", "Section"] ); assert_eq!(source.get("Source").as_deref(), Some("foo")); assert_eq!( source.get("Maintainer").as_deref(), Some("Foo Bar ") ); assert_eq!(source.get("Section").as_deref(), Some("net")); assert_eq!( source.items().collect::>(), vec![ ("Source".into(), "foo".into()), ("Maintainer".into(), "Foo Bar ".into()), ("Section".into(), "net".into()), ] ); let binary = root.paragraphs().nth(1).unwrap(); assert_eq!( binary.keys().collect::>(), vec!["Package", "Architecture", "Depends", "Description"] ); assert_eq!(binary.get("Package").as_deref(), Some("foo")); assert_eq!(binary.get("Architecture").as_deref(), Some("all")); assert_eq!(binary.get("Depends").as_deref(), Some("bar,\nblah")); assert_eq!( binary.get("Description").as_deref(), Some("This is a description\nAnd it is\n.\nmultiple\nlines") ); assert_eq!(node.text(), CONTROLV1); } #[test] fn test_with_trailing_whitespace() { const CONTROLV1: &str = r#"Source: foo Maintainer: Foo Bar "#; let parsed = parse(CONTROLV1); let node = parsed.syntax(); assert_eq!( format!("{:#?}", node), r###"ROOT@0..52 PARAGRAPH@0..50 ENTRY@0..12 KEY@0..6 "Source" COLON@6..7 ":" WHITESPACE@7..8 " " VALUE@8..11 "foo" NEWLINE@11..12 "\n" ENTRY@12..50 KEY@12..22 "Maintainer" COLON@22..23 ":" WHITESPACE@23..24 " " VALUE@24..49 "Foo Bar ::new()); let root = parsed.root(); assert_eq!(root.paragraphs().count(), 1); let source = root.paragraphs().next().unwrap(); assert_eq!( source.items().collect::>(), vec![ ("Source".into(), "foo".into()), ("Maintainer".into(), "Foo Bar ".into()), ] ); } #[cfg(test)] mod tests { #[test] fn test_parse() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar Section: net Package: foo Architecture: all Depends: libc6 Description: This is a description With details "# .parse() .unwrap(); let mut ps = d.paragraphs(); let p = ps.next().unwrap(); assert_eq!(p.get("Source").as_deref(), Some("foo")); assert_eq!( p.get("Maintainer").as_deref(), Some("Foo Bar ") ); assert_eq!(p.get("Section").as_deref(), Some("net")); let b = ps.next().unwrap(); assert_eq!(b.get("Package").as_deref(), Some("foo")); } #[test] fn test_after_multi_line() { let d: super::Deb822 = r#"Source: golang-github-blah-blah Section: devel Priority: optional Standards-Version: 4.2.0 Maintainer: Some Maintainer Build-Depends: debhelper (>= 11~), dh-golang, golang-any Homepage: https://github.com/j-keck/arping "# .parse() .unwrap(); let mut ps = d.paragraphs(); let p = ps.next().unwrap(); assert_eq!(p.get("Source").as_deref(), Some("golang-github-blah-blah")); assert_eq!(p.get("Section").as_deref(), Some("devel")); assert_eq!(p.get("Priority").as_deref(), Some("optional")); assert_eq!(p.get("Standards-Version").as_deref(), Some("4.2.0")); assert_eq!( p.get("Maintainer").as_deref(), Some("Some Maintainer ") ); assert_eq!( p.get("Build-Depends").as_deref(), Some("debhelper (>= 11~),\ndh-golang,\ngolang-any") ); assert_eq!( p.get("Homepage").as_deref(), Some("https://github.com/j-keck/arping") ); } #[test] fn test_remove_field() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar Section: net Package: foo Architecture: all Depends: libc6 Description: This is a description With details "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); p.insert("Foo", "Bar"); p.remove("Section"); p.remove("Nonexistant"); assert_eq!(p.get("Foo").as_deref(), Some("Bar")); assert_eq!( p.to_string(), r#"Source: foo Maintainer: Foo Bar Foo: Bar "# ); } #[test] fn test_rename_field() { let d: super::Deb822 = r#"Source: foo Vcs-Browser: https://salsa.debian.org/debian/foo "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); assert!(p.rename("Vcs-Browser", "Homepage")); assert_eq!( p.to_string(), r#"Source: foo Homepage: https://salsa.debian.org/debian/foo "# ); assert_eq!( p.get("Homepage").as_deref(), Some("https://salsa.debian.org/debian/foo") ); assert_eq!(p.get("Vcs-Browser").as_deref(), None); } #[test] fn test_set_field() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); p.insert("Maintainer", "Somebody Else "); assert_eq!( p.get("Maintainer").as_deref(), Some("Somebody Else ") ); assert_eq!( p.to_string(), r#"Source: foo Maintainer: Somebody Else "# ); } #[test] fn test_set_new_field() { let d: super::Deb822 = r#"Source: foo "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); p.insert("Maintainer", "Somebody "); assert_eq!( p.get("Maintainer").as_deref(), Some("Somebody ") ); assert_eq!( p.to_string(), r#"Source: foo Maintainer: Somebody "# ); } #[test] fn test_add_paragraph() { let mut d = super::Deb822::new(); let mut p = d.add_paragraph(); p.insert("Foo", "Bar"); assert_eq!(p.get("Foo").as_deref(), Some("Bar")); assert_eq!( p.to_string(), r#"Foo: Bar "# ); assert_eq!( d.to_string(), r#"Foo: Bar "# ); let mut p = d.add_paragraph(); p.insert("Foo", "Blah"); assert_eq!(p.get("Foo").as_deref(), Some("Blah")); assert_eq!( d.to_string(), r#"Foo: Bar Foo: Blah "# ); } #[test] fn test_multiline_entry() { use super::SyntaxKind::*; use rowan::ast::AstNode; let entry = super::Entry::new("foo", "bar\nbaz"); let tokens: Vec<_> = entry .syntax() .descendants_with_tokens() .filter_map(|tok| tok.into_token()) .collect(); assert_eq!("foo: bar\n baz\n", entry.to_string()); assert_eq!("bar\nbaz", entry.value()); assert_eq!( vec![ (KEY, "foo"), (COLON, ":"), (WHITESPACE, " "), (VALUE, "bar"), (NEWLINE, "\n"), (INDENT, " "), (VALUE, "baz"), (NEWLINE, "\n"), ], tokens .iter() .map(|token| (token.kind(), token.text())) .collect::>() ); } #[test] fn test_apt_entry() { let text = r#"Package: cvsd Binary: cvsd Version: 1.0.24 Maintainer: Arthur de Jong Build-Depends: debhelper (>= 9), po-debconf Architecture: any Standards-Version: 3.9.3 Format: 3.0 (native) Files: b7a7d67a02974c52c408fdb5e118406d 890 cvsd_1.0.24.dsc b73ee40774c3086cb8490cdbb96ac883 258139 cvsd_1.0.24.tar.gz Vcs-Browser: http://arthurdejong.org/viewvc/cvsd/ Vcs-Cvs: :pserver:anonymous@arthurdejong.org:/arthur/ Checksums-Sha256: a7bb7a3aacee19cd14ce5c26cb86e348b1608e6f1f6e97c6ea7c58efa440ac43 890 cvsd_1.0.24.dsc 46bc517760c1070ae408693b89603986b53e6f068ae6bdc744e2e830e46b8cba 258139 cvsd_1.0.24.tar.gz Homepage: http://arthurdejong.org/cvsd/ Package-List: cvsd deb vcs optional Directory: pool/main/c/cvsd Priority: source Section: vcs "#; let d: super::Deb822 = text.parse().unwrap(); let p = d.paragraphs().next().unwrap(); assert_eq!(p.get("Binary").as_deref(), Some("cvsd")); assert_eq!(p.get("Version").as_deref(), Some("1.0.24")); assert_eq!( p.get("Maintainer").as_deref(), Some("Arthur de Jong ") ); } #[test] fn test_format() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar Section: net Blah: blah # comment Multi-Line: Ahoi! Matey! "# .parse() .unwrap(); let mut ps = d.paragraphs(); let mut p = ps.next().unwrap(); let result = p.wrap_and_sort( super::Indentation::FieldNameLength, false, None, None::<&dyn Fn(&super::Entry, &super::Entry) -> std::cmp::Ordering>, ); assert_eq!( result.to_string(), r#"Source: foo Maintainer: Foo Bar Section: net Blah: blah # comment Multi-Line: Ahoi! Matey! "# ); } #[test] fn test_format_sort_paragraphs() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar # This is a comment Source: bar Maintainer: Bar Foo "# .parse() .unwrap(); let result = d.wrap_and_sort( super::Indentation::FieldNameLength, false, None, Some(&|a: &super::Paragraph, b: &super::Paragraph| { a.get("Source").cmp(&b.get("Source")) }), None, ); assert_eq!( result.to_string(), r#"# This is a comment Source: bar Maintainer: Bar Foo Source: foo Maintainer: Foo Bar "#, ); } #[test] fn test_format_sort_fields() { let d: super::Deb822 = r#"Source: foo Maintainer: Foo Bar Build-Depends: debhelper (>= 9), po-debconf Homepage: https://example.com/ "# .parse() .unwrap(); let result = d.wrap_and_sort( super::Indentation::FieldNameLength, false, None, None, Some(&|a: &super::Entry, b: &super::Entry| a.key().cmp(&b.key())), ); assert_eq!( result.to_string(), r#"Build-Depends: debhelper (>= 9), po-debconf Homepage: https://example.com/ Maintainer: Foo Bar Source: foo "# ); } }