deb822-fast-0.2.3/.cargo_vcs_info.json0000644000000001510000000000100127540ustar { "git": { "sha1": "96c86ae791228826a9be7c8b9926240769615206" }, "path_in_vcs": "deb822-fast" }deb822-fast-0.2.3/Cargo.lock0000644000000025070000000000100107360ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "deb822-derive" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bf2d0fa4ce2457e94bd7efb15aeadc115297f04b660bd0da706729e0d91442" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "deb822-fast" version = "0.2.3" dependencies = [ "deb822-derive", ] [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] [[package]] name = "syn" version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" deb822-fast-0.2.3/Cargo.toml0000644000000024070000000000100107600ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "deb822-fast" version = "0.2.3" authors = ["Jelmer Vernooij "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Fast parsing of Debian control files in the deb822 format" homepage = "https://github.com/jelmer/deb822-lossless" readme = "README.md" keywords = [ "debian", "deb822", "control", "rfc822", ] categories = ["parser-implementations"] license = "Apache-2.0" repository = "https://github.com/jelmer/deb822-lossless" [features] derive = ["dep:deb822-derive"] [lib] name = "deb822_fast" path = "src/lib.rs" [[test]] name = "field_formatting" path = "tests/field_formatting.rs" required-features = ["derive"] [dependencies.deb822-derive] version = "^0.3" optional = true deb822-fast-0.2.3/Cargo.toml.orig000064400000000000000000000011321046102023000144330ustar 00000000000000[package] name = "deb822-fast" version = "0.2.3" edition = "2021" repository.workspace = true homepage.workspace = true description = "Fast parsing of Debian control files in the deb822 format" authors = ["Jelmer Vernooij "] categories = ["parser-implementations"] license = "Apache-2.0" keywords = ["debian", "deb822", "control", "rfc822"] [dependencies] deb822-derive = { path = "../deb822-derive", version = "^0.3", optional = true } [features] derive = ["dep:deb822-derive"] [[test]] path = "tests/field_formatting.rs" name = "field_formatting" required-features = ["derive"] deb822-fast-0.2.3/README.md000064400000000000000000000012731046102023000130310ustar 00000000000000Lossy parser for deb822 format. This parser is lossy in the sense that it will discard whitespace and comments in the input. This parser is optimized for speed and memory usage. It provides two APIs: - **Owned API (default)**: Returns owned `String` values. Easy to use, no lifetime management. - **Borrowed API** (`borrowed` module): Returns borrowed string slices. Lower allocation overhead (avoids String allocations for field data, but still allocates Vec structures for paragraphs and fields). Requires lifetime management. For editing purposes where you need to preserve formatting, whitespace and comments, you may want to use a more feature-complete parser like ``deb822-lossless``. deb822-fast-0.2.3/src/borrowed.rs000064400000000000000000000665011046102023000145370ustar 00000000000000//! Low-allocation borrowed API for deb822 parsing. //! //! This module provides a borrowed-data parser that avoids allocating owned Strings, //! instead returning borrowed string slices from the source. This is significantly //! faster than the owned API but requires lifetime management. //! //! ## Allocations //! //! While string data is borrowed (zero string allocations), the parser does allocate: //! - `Vec` to hold paragraphs //! - `Vec` to hold fields within each paragraph //! - `Vec<&str>` for multi-line field values (single-line fields avoid this) //! //! Despite these allocations, this is still much faster than the owned API since //! it avoids copying all the field names and values into owned Strings. use crate::Error; /// Field value representation that avoids allocation for single-line fields. #[derive(Debug, Clone, PartialEq, Eq)] enum FieldValue<'a> { /// Single-line field (no allocation) Single(&'a str), /// Multi-line field with continuation lines Multi(Vec<&'a str>), } /// A borrowed field that references data in the source string. /// /// The name is always borrowed. The value is either a single line or multiple lines. /// Single-line fields (the common case) avoid Vec allocation entirely. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Package: test\nDescription: short\n long description\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// /// let pkg_field = paragraphs[0].get_field("Package").unwrap(); /// assert_eq!(pkg_field.name(), "Package"); /// assert_eq!(pkg_field.as_single_line(), Some("test")); /// /// let desc_field = paragraphs[0].get_field("Description").unwrap(); /// assert_eq!(desc_field.lines(), &["short", "long description"]); /// assert_eq!(desc_field.join(), "short\nlong description"); /// ``` #[derive(Debug, Clone, PartialEq, Eq)] pub struct BorrowedField<'a> { /// The field name (borrowed from source) name: &'a str, /// The field value (single or multiple lines) value: FieldValue<'a>, } impl<'a> BorrowedField<'a> { /// Get the field name. pub fn name(&self) -> &'a str { self.name } /// Get the value as a single line (for single-line fields). /// Returns None if the field has multiple lines. pub fn as_single_line(&self) -> Option<&'a str> { match &self.value { FieldValue::Single(s) => Some(s), FieldValue::Multi(_) => None, } } /// Get the value lines as a slice. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Description: line1\n line2\n line3\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// let field = paragraphs[0].get_field("Description").unwrap(); /// /// assert_eq!(field.lines(), &["line1", "line2", "line3"]); /// ``` pub fn lines(&self) -> &[&'a str] { match &self.value { FieldValue::Single(s) => std::slice::from_ref(s), FieldValue::Multi(v) => v.as_slice(), } } /// Join the value lines into an owned String with newlines. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Description: line1\n line2\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// let field = paragraphs[0].get_field("Description").unwrap(); /// /// assert_eq!(field.join(), "line1\nline2"); /// ``` pub fn join(&self) -> String { match &self.value { FieldValue::Single(s) => s.to_string(), FieldValue::Multi(v) => v.join("\n"), } } /// Check if this is a single-line field. pub fn is_single_line(&self) -> bool { matches!(self.value, FieldValue::Single(_)) } /// Check if this is a multi-line field. pub fn is_multi_line(&self) -> bool { matches!(self.value, FieldValue::Multi(_)) } } /// A borrowed paragraph that references data in the source string. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Package: hello\nVersion: 1.0\nDescription: short\n long description\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// let para = ¶graphs[0]; /// /// // Get single-line fields /// assert_eq!(para.get_single("Package"), Some("hello")); /// assert_eq!(para.get_single("Version"), Some("1.0")); /// /// // Get multi-line field as lines /// let desc = para.get("Description").unwrap(); /// assert_eq!(desc, &["short", "long description"]); /// /// // Iterate over all fields /// for field in para.iter() { /// println!("{}: {:?}", field.name(), field.lines()); /// } /// ``` #[derive(Debug, Clone, PartialEq, Eq)] pub struct BorrowedParagraph<'a> { fields: Vec>, } impl<'a> BorrowedParagraph<'a> { /// Get a field by name. /// /// Field names are compared case-insensitively. pub fn get_field(&self, name: &str) -> Option<&BorrowedField<'a>> { self.fields .iter() .find(|f| f.name.eq_ignore_ascii_case(name)) } /// Get a field value by name as lines. /// /// Field names are compared case-insensitively. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Package: test\nDescription: short\n long description\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// let para = ¶graphs[0]; /// /// // Single-line field returns slice with one element /// assert_eq!(para.get("Package"), Some(&["test"][..])); /// /// // Multi-line field returns all lines /// let desc = para.get("Description").unwrap(); /// assert_eq!(desc, &["short", "long description"]); /// ``` pub fn get(&self, name: &str) -> Option<&[&'a str]> { self.fields .iter() .find(|f| f.name.eq_ignore_ascii_case(name)) .map(|f| f.lines()) } /// Get a single-line field value by name. /// /// Returns None if the field doesn't exist or has multiple lines. /// Field names are compared case-insensitively. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = "Package: test\nDescription: short\n long\n"; /// let paragraphs = parse_borrowed(input).unwrap(); /// let para = ¶graphs[0]; /// /// // Works for single-line fields /// assert_eq!(para.get_single("Package"), Some("test")); /// /// // Returns None for multi-line fields /// assert_eq!(para.get_single("Description"), None); /// /// // Case-insensitive /// assert_eq!(para.get_single("package"), Some("test")); /// ``` pub fn get_single(&self, name: &str) -> Option<&'a str> { self.fields .iter() .find(|f| f.name.eq_ignore_ascii_case(name)) .and_then(|f| f.as_single_line()) } /// Iterate over all fields. pub fn iter(&self) -> impl Iterator> + '_ { self.fields.iter() } /// Number of fields in the paragraph. pub fn len(&self) -> usize { self.fields.len() } /// Check if the paragraph is empty. pub fn is_empty(&self) -> bool { self.fields.is_empty() } } /// Low-allocation parser that returns borrowed paragraphs. /// /// This parser borrows all string data from the input, avoiding String allocations. /// It does allocate Vec structures to hold the paragraph and field lists. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::BorrowedParser; /// /// let input = "Package: hello\nVersion: 1.0\n\nPackage: world\nVersion: 2.0\n"; /// let parser = BorrowedParser::new(input); /// let paragraphs = parser.parse_all().unwrap(); /// /// assert_eq!(paragraphs.len(), 2); /// assert_eq!(paragraphs[0].get_single("Package"), Some("hello")); /// assert_eq!(paragraphs[1].get_single("Package"), Some("world")); /// ``` pub struct BorrowedParser<'a> { input: &'a str, bytes: &'a [u8], pos: usize, } impl<'a> BorrowedParser<'a> { /// Create a new borrowed parser from a string slice. pub fn new(input: &'a str) -> Self { Self { input, bytes: input.as_bytes(), pos: 0, } } /// Parse all paragraphs and return them as a Vec. /// /// Note: This still allocates the Vec and field lists, but the strings /// themselves are borrowed. pub fn parse_all(mut self) -> Result>, Error> { let mut paragraphs = Vec::with_capacity(16); while let Some(para) = self.next_paragraph()? { paragraphs.push(para); } Ok(paragraphs) } /// Parse the next paragraph, returning None if at end. fn next_paragraph(&mut self) -> Result>, Error> { let len = self.bytes.len(); // Skip leading whitespace and comments between paragraphs loop { if self.pos >= len { return Ok(None); } let b = self.bytes[self.pos]; if b == b'#' { // Skip comment line while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } if self.pos < len { self.pos += 1; } } else if b == b'\n' || b == b'\r' { // Skip newline self.pos += 1; } else if b == b' ' || b == b'\t' { // Skip whitespace-only line (but check if it contains actual content) let line_start = self.pos; while self.pos < len && (self.bytes[self.pos] == b' ' || self.bytes[self.pos] == b'\t') { self.pos += 1; } if self.pos < len && self.bytes[self.pos] != b'\n' && self.bytes[self.pos] != b'\r' { // There's non-whitespace content after spaces - this is an error while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } let token = &self.input[line_start..self.pos]; return Err(Error::UnexpectedToken(token.to_string())); } // Just whitespace on the line, skip past it if self.pos < len { self.pos += 1; // Skip newline } } else { // Found start of content break; } } let mut fields: Vec> = Vec::with_capacity(8); loop { if self.pos >= len { break; } // Check for blank line (end of paragraph) if self.bytes[self.pos] == b'\n' { self.pos += 1; break; } // Skip comment lines if self.bytes[self.pos] == b'#' { while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } if self.pos < len { self.pos += 1; } continue; } // Check for continuation line if self.bytes[self.pos] == b' ' || self.bytes[self.pos] == b'\t' { if fields.is_empty() { // Indented line before any field - this is an error let line_start = self.pos; while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } let token = &self.input[line_start..self.pos]; return Err(Error::UnexpectedToken(token.to_string())); } // This is a continuation line - append to the last field's value vec // Skip all leading whitespace (deb822 format strips leading spaces) while self.pos < len && (self.bytes[self.pos] == b' ' || self.bytes[self.pos] == b'\t') { self.pos += 1; } // Read the continuation line let line_start = self.pos; while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } if let Some(last_field) = fields.last_mut() { // Add the continuation line - convert Single to Multi if needed match &mut last_field.value { FieldValue::Single(first) => { // Convert to Multi with two lines let first = *first; last_field.value = FieldValue::Multi(vec![first, &self.input[line_start..self.pos]]); } FieldValue::Multi(lines) => { lines.push(&self.input[line_start..self.pos]); } } } if self.pos < len { self.pos += 1; // Skip newline } continue; } // Parse field name let name_start = self.pos; while self.pos < len && self.bytes[self.pos] != b':' && self.bytes[self.pos] != b'\n' { self.pos += 1; } if self.pos >= len || self.bytes[self.pos] != b':' { // Invalid line - return error let line_start = name_start; while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } let token = &self.input[line_start..self.pos]; return Err(Error::UnexpectedToken(token.to_string())); } let name = &self.input[name_start..self.pos]; // Check for empty field name if name.is_empty() { let line_start = name_start; let mut end = self.pos; while end < len && self.bytes[end] != b'\n' { end += 1; } let token = &self.input[line_start..end]; return Err(Error::UnexpectedToken(token.to_string())); } self.pos += 1; // Skip colon // Skip whitespace after colon while self.pos < len && (self.bytes[self.pos] == b' ' || self.bytes[self.pos] == b'\t') { self.pos += 1; } // Parse field value (first line) let value_start = self.pos; while self.pos < len && self.bytes[self.pos] != b'\n' { self.pos += 1; } let value = FieldValue::Single(&self.input[value_start..self.pos]); fields.push(BorrowedField { name, value }); if self.pos < len { self.pos += 1; // Skip newline } } if fields.is_empty() { Ok(None) } else { Ok(Some(BorrowedParagraph { fields })) } } } /// Iterator that yields borrowed paragraphs. pub struct BorrowedParagraphIter<'a> { parser: BorrowedParser<'a>, done: bool, } impl<'a> BorrowedParagraphIter<'a> { /// Create a new iterator from input. pub fn new(input: &'a str) -> Self { Self { parser: BorrowedParser::new(input), done: false, } } } impl<'a> Iterator for BorrowedParagraphIter<'a> { type Item = Result, Error>; fn next(&mut self) -> Option { if self.done { return None; } match self.parser.next_paragraph() { Ok(Some(para)) => Some(Ok(para)), Ok(None) => { self.done = true; None } Err(e) => { self.done = true; Some(Err(e)) } } } } /// Parse borrowed paragraphs from input. /// /// This is the main entry point for parsing deb822 data with the borrowed API. /// All string data is borrowed from the input without allocation. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::parse_borrowed; /// /// let input = r#"Package: hello /// Version: 2.10 /// Description: classic greeting program /// The GNU hello program produces a familiar, friendly greeting. /// /// Package: world /// Version: 1.0 /// "#; /// /// let paragraphs = parse_borrowed(input).unwrap(); /// assert_eq!(paragraphs.len(), 2); /// assert_eq!(paragraphs[0].get_single("Package"), Some("hello")); /// assert_eq!(paragraphs[0].get_single("Version"), Some("2.10")); /// /// // Multi-line field /// let desc = paragraphs[0].get("Description").unwrap(); /// assert_eq!(desc[0], "classic greeting program"); /// assert_eq!(desc[1], "The GNU hello program produces a familiar, friendly greeting."); /// ``` pub fn parse_borrowed(input: &str) -> Result>, Error> { BorrowedParser::new(input).parse_all() } /// Iterate over borrowed paragraphs. /// /// Returns an iterator that yields paragraphs one at a time without /// allocating a Vec to hold all paragraphs upfront. /// /// # Examples /// /// ``` /// use deb822_fast::borrowed::iter_paragraphs_borrowed; /// /// let input = "Package: test1\nVersion: 1.0\n\nPackage: test2\nVersion: 2.0\n"; /// /// for result in iter_paragraphs_borrowed(input) { /// let para = result.unwrap(); /// let pkg = para.get_single("Package").unwrap(); /// let ver = para.get_single("Version").unwrap(); /// println!("{}: {}", pkg, ver); /// } /// ``` pub fn iter_paragraphs_borrowed(input: &str) -> BorrowedParagraphIter<'_> { BorrowedParagraphIter::new(input) } #[cfg(test)] mod tests { use super::*; #[test] fn test_borrowed_parser_simple() { let input = "Package: hello\nVersion: 1.0\n\nPackage: world\nVersion: 2.0\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 2); assert_eq!(paragraphs[0].get_single("Package"), Some("hello")); assert_eq!(paragraphs[0].get_single("Version"), Some("1.0")); assert_eq!(paragraphs[1].get_single("Package"), Some("world")); assert_eq!(paragraphs[1].get_single("Version"), Some("2.0")); } #[test] fn test_borrowed_iter() { let input = "Package: test\nVersion: 1.0\n"; let mut iter = iter_paragraphs_borrowed(input); let para = iter.next().unwrap().unwrap(); assert_eq!(para.get_single("Package"), Some("test")); assert!(iter.next().is_none()); } #[test] fn test_borrowed_field_iter() { let input = "A: 1\nB: 2\nC: 3\n"; let paragraphs = parse_borrowed(input).unwrap(); let fields: Vec<_> = paragraphs[0].iter().collect(); assert_eq!(fields.len(), 3); assert_eq!(fields[0].name(), "A"); assert_eq!(fields[0].value, FieldValue::Single("1")); assert_eq!(fields[0].as_single_line(), Some("1")); } #[test] fn test_borrowed_multiline_values() { let input = "Package: test\nDescription: short desc\n continuation line\n another line\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 1); assert_eq!(paragraphs[0].get_single("Package"), Some("test")); // For multiline fields, get() returns a slice of lines let desc_lines = paragraphs[0].get("Description").unwrap(); assert_eq!(desc_lines.len(), 3); assert_eq!(desc_lines[0], "short desc"); assert_eq!(desc_lines[1], "continuation line"); assert_eq!(desc_lines[2], "another line"); // Can also join to get the full value assert_eq!( paragraphs[0].get_field("Description").unwrap().join(), "short desc\ncontinuation line\nanother line" ); } #[test] fn test_borrowed_with_comments() { let input = "# Comment at start\nPackage: hello\n# Mid comment\nVersion: 1.0\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 1); assert_eq!(paragraphs[0].get_single("Package"), Some("hello")); assert_eq!(paragraphs[0].get_single("Version"), Some("1.0")); } #[test] fn test_borrowed_empty_value() { let input = "Package: test\nDescription:\n extra line\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 1); let desc_lines = paragraphs[0].get("Description").unwrap(); assert_eq!(desc_lines.len(), 2); assert_eq!(desc_lines[0], ""); assert_eq!(desc_lines[1], "extra line"); } #[test] fn test_borrowed_multiple_paragraphs() { let input = "A: 1\n\nB: 2\n\n\nC: 3\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 3); assert_eq!(paragraphs[0].get_single("A"), Some("1")); assert_eq!(paragraphs[1].get_single("B"), Some("2")); assert_eq!(paragraphs[2].get_single("C"), Some("3")); } #[test] fn test_borrowed_error_unexpected_indent() { let input = " Indented: value\n"; let result = parse_borrowed(input); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); } #[test] fn test_borrowed_error_missing_colon() { let input = "Package test\n"; let result = parse_borrowed(input); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); } #[test] fn test_borrowed_error_empty_field_name() { let input = "Package: test\n:\n"; let result = parse_borrowed(input); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); } #[test] fn test_borrowed_continuation_with_colon() { let input = "Package: test\nDescription: short\n line: with colon\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 1); let desc_lines = paragraphs[0].get("Description").unwrap(); assert_eq!(desc_lines.len(), 2); assert_eq!(desc_lines[0], "short"); assert_eq!(desc_lines[1], "line: with colon"); } #[test] fn test_borrowed_paragraph_len() { let input = "A: 1\nB: 2\nC: 3\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs[0].len(), 3); assert!(!paragraphs[0].is_empty()); } #[test] fn test_borrowed_iter_paragraphs() { let input = "A: 1\n\nB: 2\n\nC: 3\n"; let result: Result, _> = iter_paragraphs_borrowed(input).collect(); let paragraphs = result.unwrap(); assert_eq!(paragraphs.len(), 3); assert_eq!(paragraphs[0].get_single("A"), Some("1")); assert_eq!(paragraphs[1].get_single("B"), Some("2")); assert_eq!(paragraphs[2].get_single("C"), Some("3")); } #[test] fn test_borrowed_empty_input() { let input = ""; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 0); } #[test] fn test_borrowed_only_whitespace() { let input = "\n\n \n\t\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 0); } #[test] fn test_borrowed_only_comments() { let input = "# Comment 1\n# Comment 2\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 0); } #[test] fn test_borrowed_complex_debian_control() { let input = r#"Source: test-package Section: utils Priority: optional Maintainer: Test User Build-Depends: debhelper (>= 10) Standards-Version: 4.1.3 Package: test-package Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends} Description: A test package This is a longer description that spans multiple lines. . It even has a paragraph break. "#; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 2); // Source paragraph assert_eq!(paragraphs[0].get_single("Source"), Some("test-package")); assert_eq!(paragraphs[0].get_single("Section"), Some("utils")); assert_eq!(paragraphs[0].get_single("Priority"), Some("optional")); assert_eq!( paragraphs[0].get_single("Maintainer"), Some("Test User ") ); // Binary paragraph assert_eq!(paragraphs[1].get_single("Package"), Some("test-package")); assert_eq!(paragraphs[1].get_single("Architecture"), Some("any")); // Description is multi-line let desc_lines = paragraphs[1].get("Description").unwrap(); assert_eq!(desc_lines[0], "A test package"); assert_eq!(desc_lines[1], "This is a longer description"); assert_eq!(desc_lines[2], "that spans multiple lines."); assert_eq!(desc_lines[3], "."); assert_eq!(desc_lines[4], "It even has a paragraph break."); let full_desc = paragraphs[1].get_field("Description").unwrap().join(); assert!(full_desc.contains("test package")); assert!(full_desc.contains("paragraph break")); } #[test] fn test_borrowed_case_insensitive_get() { let input = "Package: test\nVersion: 1.0\n"; let paragraphs = parse_borrowed(input).unwrap(); assert_eq!(paragraphs.len(), 1); let para = ¶graphs[0]; // Test different case variations for get_single assert_eq!(para.get_single("Package"), Some("test")); assert_eq!(para.get_single("package"), Some("test")); assert_eq!(para.get_single("PACKAGE"), Some("test")); assert_eq!(para.get_single("PaCkAgE"), Some("test")); assert_eq!(para.get_single("Version"), Some("1.0")); assert_eq!(para.get_single("version"), Some("1.0")); assert_eq!(para.get_single("VERSION"), Some("1.0")); // Test case variations for get_field assert!(para.get_field("Package").is_some()); assert!(para.get_field("package").is_some()); assert!(para.get_field("PACKAGE").is_some()); // Test case variations for get (returns lines) assert!(para.get("Package").is_some()); assert!(para.get("package").is_some()); assert!(para.get("PACKAGE").is_some()); } #[test] fn test_borrowed_case_insensitive_multiline() { let input = "Package: test\nDescription: short desc\n continuation line\n"; let paragraphs = parse_borrowed(input).unwrap(); let para = ¶graphs[0]; // Test with different cases let desc_lower = para.get("description"); let desc_upper = para.get("DESCRIPTION"); let desc_mixed = para.get("Description"); assert!(desc_lower.is_some()); assert!(desc_upper.is_some()); assert!(desc_mixed.is_some()); assert_eq!(desc_lower, desc_upper); assert_eq!(desc_lower, desc_mixed); assert_eq!(desc_lower.unwrap().len(), 2); assert_eq!(desc_lower.unwrap()[0], "short desc"); assert_eq!(desc_lower.unwrap()[1], "continuation line"); } #[test] fn test_borrowed_case_preservation() { let input = "Package: test\nVersion: 1.0\n"; let paragraphs = parse_borrowed(input).unwrap(); let para = ¶graphs[0]; // Get field with lowercase query let field = para.get_field("package").unwrap(); // But the original case should be preserved in the field name assert_eq!(field.name(), "Package"); } } deb822-fast-0.2.3/src/convert.rs000064400000000000000000000206551046102023000143740ustar 00000000000000//! Conversion between Deb822-like paragraphs and Rust objects. /// Abstract trait for accessing and modifying key-value pairs in a paragraph. pub trait Deb822LikeParagraph: FromIterator<(String, String)> { /// Get the value for the given key. fn get(&self, key: &str) -> Option; /// Insert a key-value pair. fn set(&mut self, key: &str, value: &str); /// Remove a key-value pair. fn remove(&mut self, key: &str); } impl Deb822LikeParagraph for crate::Paragraph { fn get(&self, key: &str) -> Option { crate::Paragraph::get(self, key).map(|v| v.to_string()) } fn set(&mut self, key: &str, value: &str) { crate::Paragraph::set(self, key, value); } fn remove(&mut self, key: &str) { crate::Paragraph::remove(self, key); } } /// Convert a paragraph to this object. pub trait FromDeb822Paragraph { /// Convert a paragraph to this object. fn from_paragraph(paragraph: &P) -> Result where Self: Sized; } /// Convert this object to a paragraph. pub trait ToDeb822Paragraph { /// Convert this object to a paragraph. fn to_paragraph(&self) -> P; /// Update the given paragraph with the values from this object. fn update_paragraph(&self, paragraph: &mut P); } /// Format a field value as a single line. /// /// # Panics /// /// Panics if the value contains newline characters. pub fn format_single_line(value: &str, field_name: &str) -> String { assert!( !value.contains('\n'), "Field '{}' is marked as single_line but contains newlines", field_name ); value.to_string() } /// Format a field value as multi-line, ensuring continuation lines start with a space. /// /// If the value is already single-line, it is returned as-is. /// For multi-line values: /// - The first line is kept as-is /// - Empty continuation lines are replaced with " ." (space followed by dot) /// - Non-empty continuation lines are prefixed with a space pub fn format_multi_line(value: &str) -> String { if !value.contains('\n') { value.to_string() } else { value .lines() .enumerate() .map(|(i, line)| { if i == 0 { line.to_string() } else if line.is_empty() { " .".to_string() } else { format!(" {}", line) } }) .collect::>() .join("\n") } } /// Format a field value as folded, stripping whitespace and joining lines with spaces. /// /// This implements RFC 822 folding behavior by: /// - Trimming leading and trailing whitespace from each line /// - Filtering out empty lines /// - Joining the remaining lines with single spaces pub fn format_folded(value: &str) -> String { value .lines() .map(|line| line.trim()) .filter(|line| !line.is_empty()) .collect::>() .join(" ") } #[cfg(test)] mod tests { use super::*; #[test] fn test_trait_impl_directly() { // Test the trait methods directly to improve coverage let mut para = crate::Paragraph { fields: vec![crate::Field { name: "Test".to_string(), value: "Value".to_string(), }], }; // Test Deb822LikeParagraph::get let result: Option = Deb822LikeParagraph::get(¶, "Test"); assert_eq!(result, Some("Value".to_string())); // Test Deb822LikeParagraph::set Deb822LikeParagraph::set(&mut para, "Test", "NewValue"); assert_eq!(para.get("Test"), Some("NewValue")); // Test Deb822LikeParagraph::remove Deb822LikeParagraph::remove(&mut para, "Test"); assert_eq!(para.get("Test"), None); } #[test] fn test_deb822like_paragraph_impl() { // Create mock crate::Paragraph for tests let mut para = crate::Paragraph { fields: vec![crate::Field { name: "Name".to_string(), value: "Test".to_string(), }], }; // Test get() - this calls the implementation on line 16-17 assert_eq!(para.get("Name"), Some("Test")); assert_eq!(para.get("NonExistent"), None); // Test set() - this calls the implementation on line 20-21 para.set("Name", "NewValue"); assert_eq!(para.get("Name"), Some("NewValue")); // Test set() with new key para.set("NewKey", "Value"); assert_eq!(para.get("NewKey"), Some("Value")); // Test remove() - this calls the implementation on line 24-25 para.remove("Name"); assert_eq!(para.get("Name"), None); assert_eq!(para.get("NewKey"), Some("Value")); // Create a new paragraph with multiple fields of the same name let mut para = crate::Paragraph { fields: vec![ crate::Field { name: "Duplicate".to_string(), value: "Value1".to_string(), }, crate::Field { name: "Duplicate".to_string(), value: "Value2".to_string(), }, ], }; // Test remove() removes all matches para.remove("Duplicate"); assert_eq!(para.get("Duplicate"), None); assert_eq!(para.fields.len(), 0); } #[cfg(feature = "derive")] mod derive { use super::*; use crate as deb822_fast; use crate::{FromDeb822, ToDeb822}; #[test] fn test_derive() { #[derive(ToDeb822)] struct Foo { bar: String, baz: i32, blah: Option, } let foo = Foo { bar: "hello".to_string(), baz: 42, blah: None, }; let paragraph: crate::Paragraph = foo.to_paragraph(); assert_eq!(paragraph.get("bar"), Some("hello")); assert_eq!(paragraph.get("baz"), Some("42")); assert_eq!(paragraph.get("blah"), None); } #[test] fn test_optional_missing() { #[derive(ToDeb822)] struct Foo { bar: String, baz: Option, } let foo = Foo { bar: "hello".to_string(), baz: None, }; let paragraph: crate::Paragraph = foo.to_paragraph(); assert_eq!(paragraph.get("bar"), Some("hello")); assert_eq!(paragraph.get("baz"), None); assert_eq!("bar: hello\n", paragraph.to_string()); } #[test] fn test_deserialize_with() { let mut para: crate::Paragraph = "bar: bar\n# comment\nbaz: blah\n".parse().unwrap(); fn to_bool(s: &str) -> Result { Ok(s == "ja") } fn from_bool(s: &bool) -> String { if *s { "ja".to_string() } else { "nee".to_string() } } #[derive(FromDeb822, ToDeb822)] struct Foo { bar: String, #[deb822(deserialize_with = to_bool, serialize_with = from_bool)] baz: bool, } let mut foo: Foo = Foo::from_paragraph(¶).unwrap(); assert_eq!(foo.bar, "bar"); assert!(!foo.baz); foo.bar = "new".to_string(); foo.update_paragraph(&mut para); assert_eq!(para.get("bar"), Some("new")); assert_eq!(para.get("baz"), Some("nee")); assert_eq!(para.to_string(), "bar: new\nbaz: nee\n"); } #[test] fn test_update_remove() { let mut para: crate::Paragraph = "bar: bar\n# comment\nbaz: blah\n".parse().unwrap(); #[derive(FromDeb822, ToDeb822)] struct Foo { bar: Option, baz: String, } let mut foo: Foo = Foo::from_paragraph(¶).unwrap(); assert_eq!(foo.bar, Some("bar".to_string())); assert_eq!(foo.baz, "blah"); foo.bar = None; foo.update_paragraph(&mut para); assert_eq!(para.get("bar"), None); assert_eq!(para.get("baz"), Some("blah")); assert_eq!(para.to_string(), "baz: blah\n"); } } } deb822-fast-0.2.3/src/lib.rs000064400000000000000000001233531046102023000134610ustar 00000000000000//! Fast parser for deb822 format. //! //! This parser is lossy in the sense that it will discard whitespace and comments //! in the input. //! //! ## API Variants //! //! This crate provides two parsing APIs: //! //! ### Owned API (default) //! The main API using [`Deb822`], [`Paragraph`], and [`Field`] types that own their data. //! - Easy to use - no lifetime management required //! - Can be stored, moved, and outlive the source string //! - Good performance with moderate allocations //! //! ### Borrowed API (low-allocation) //! The [`borrowed`] module provides a low-allocation API using borrowed string slices. //! - Maximum performance - avoids allocating owned Strings for field data //! - Still allocates Vec structures for paragraphs and fields //! - Requires lifetime management //! - Data cannot outlive the source string //! - Best for parsing large files where you process data immediately //! //! ```rust //! use deb822_fast::{Deb822, borrowed::BorrowedParser}; //! //! let input = "Package: hello\nVersion: 1.0\n"; //! //! // Owned API - easy to use //! let doc: Deb822 = input.parse().unwrap(); //! let package = doc.iter().next().unwrap().get("Package"); //! //! // Borrowed API - maximum performance //! let paragraphs = BorrowedParser::new(input).parse_all().unwrap(); //! let package = paragraphs[0].get("Package"); //! ``` #[cfg(feature = "derive")] pub use deb822_derive::{FromDeb822, ToDeb822}; pub mod convert; pub use convert::{FromDeb822Paragraph, ToDeb822Paragraph}; pub mod borrowed; /// Canonical field order for source paragraphs in debian/control files pub const SOURCE_FIELD_ORDER: &[&str] = &[ "Source", "Section", "Priority", "Maintainer", "Uploaders", "Build-Depends", "Build-Depends-Indep", "Build-Depends-Arch", "Build-Conflicts", "Build-Conflicts-Indep", "Build-Conflicts-Arch", "Standards-Version", "Vcs-Browser", "Vcs-Git", "Vcs-Svn", "Vcs-Bzr", "Vcs-Hg", "Vcs-Darcs", "Vcs-Cvs", "Vcs-Arch", "Vcs-Mtn", "Homepage", "Rules-Requires-Root", "Testsuite", "Testsuite-Triggers", ]; /// Canonical field order for binary packages in debian/control files pub const BINARY_FIELD_ORDER: &[&str] = &[ "Package", "Architecture", "Section", "Priority", "Multi-Arch", "Essential", "Build-Profiles", "Built-Using", "Pre-Depends", "Depends", "Recommends", "Suggests", "Enhances", "Conflicts", "Breaks", "Replaces", "Provides", "Description", ]; /// Error type for the parser. #[derive(Debug)] pub enum Error { /// An unexpected token was encountered. UnexpectedToken(String), /// Unexpected end-of-file. UnexpectedEof, /// Expected end-of-file. ExpectedEof, /// IO error. Io(std::io::Error), } impl From for Error { fn from(e: std::io::Error) -> Self { Self::Io(e) } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { match self { Self::UnexpectedToken(t) => write!(f, "Unexpected token: {}", t), Self::UnexpectedEof => f.write_str("Unexpected end-of-file"), Self::Io(e) => write!(f, "IO error: {}", e), Self::ExpectedEof => f.write_str("Expected end-of-file"), } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::Io(e) => Some(e), _ => None, } } } /// A field in a deb822 paragraph. #[derive(Debug, PartialEq, Eq, Clone)] pub struct Field { /// The name of the field. pub name: String, /// The value of the field. pub value: String, } /// A deb822 paragraph. #[derive(Debug, PartialEq, Eq, Clone)] pub struct Paragraph { /// Fields in the paragraph. pub fields: Vec, } impl Paragraph { /// Get the value of a field by name. /// /// Field names are compared case-insensitively. /// Returns `None` if the field does not exist. pub fn get(&self, name: &str) -> Option<&str> { for field in &self.fields { if field.name.eq_ignore_ascii_case(name) { return Some(&field.value); } } None } /// Check if the paragraph is empty. pub fn is_empty(&self) -> bool { self.fields.is_empty() } /// Return the number of fields in the paragraph. pub fn len(&self) -> usize { self.fields.len() } /// Iterate over the fields in the paragraph. pub fn iter(&self) -> impl Iterator { self.fields .iter() .map(|field| (field.name.as_str(), field.value.as_str())) } /// Iterate over the fields in the paragraph, mutably. pub fn iter_mut(&mut self) -> impl Iterator { self.fields .iter_mut() .map(|field| (field.name.as_str(), &mut field.value)) } /// Insert a field into the paragraph. /// /// If a field with the same name already exists, a /// new field will be added. pub fn insert(&mut self, name: &str, value: &str) { self.fields.push(Field { name: name.to_string(), value: value.to_string(), }); } /// Set the value of a field, inserting at the appropriate location if new. /// /// Field names are compared case-insensitively. /// If a field with the same name already exists, its value will be updated. /// If the field doesn't exist, it will be inserted at the appropriate position /// based on canonical field ordering. pub fn set(&mut self, name: &str, value: &str) { // Check if field already exists and update it for field in &mut self.fields { if field.name.eq_ignore_ascii_case(name) { field.value = value.to_string(); return; } } // By default, insert at the end let insertion_index = self.fields.len(); self.fields.insert( insertion_index, Field { name: name.to_string(), value: value.to_string(), }, ); } /// Set a field using a specific field ordering. /// /// Field names are compared case-insensitively. pub fn set_with_field_order(&mut self, name: &str, value: &str, field_order: &[&str]) { // Check if field already exists and update it for field in &mut self.fields { if field.name.eq_ignore_ascii_case(name) { field.value = value.to_string(); return; } } let insertion_index = self.find_insertion_index(name, field_order); self.fields.insert( insertion_index, Field { name: name.to_string(), value: value.to_string(), }, ); } /// Find the appropriate insertion index for a new field based on field ordering. fn find_insertion_index(&self, name: &str, field_order: &[&str]) -> usize { // Find position of the new field in the canonical order (case-insensitive) let new_field_position = field_order .iter() .position(|&field| field.eq_ignore_ascii_case(name)); let mut insertion_index = self.fields.len(); // Find the right position based on canonical field order for (i, field) in self.fields.iter().enumerate() { let existing_position = field_order .iter() .position(|&f| f.eq_ignore_ascii_case(&field.name)); match (new_field_position, existing_position) { // Both fields are in the canonical order (Some(new_pos), Some(existing_pos)) => { if new_pos < existing_pos { insertion_index = i; break; } } // New field is in canonical order, existing is not (Some(_), None) => { // Continue looking - unknown fields go after known ones } // New field is not in canonical order, existing is (None, Some(_)) => { // Continue until we find all known fields } // Neither field is in canonical order, maintain alphabetical (None, None) => { if name < &field.name { insertion_index = i; break; } } } } // If we have a position in canonical order but haven't found where to insert yet, // we need to insert after all known fields that come before it if new_field_position.is_some() && insertion_index == self.fields.len() { // Look for the position after the last known field that comes before our field for (i, field) in self.fields.iter().enumerate().rev() { if field_order .iter() .any(|&f| f.eq_ignore_ascii_case(&field.name)) { // Found a known field, insert after it insertion_index = i + 1; break; } } } insertion_index } /// Remove a field from the paragraph. /// /// Field names are compared case-insensitively. pub fn remove(&mut self, name: &str) { self.fields .retain(|field| !field.name.eq_ignore_ascii_case(name)); } } impl std::fmt::Display for Field { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let lines = self.value.lines().collect::>(); if lines.len() > 1 { write!(f, "{}:", self.name)?; for line in lines { writeln!(f, " {}", line)?; } Ok(()) } else { writeln!(f, "{}: {}", self.name, self.value) } } } impl std::fmt::Display for Paragraph { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for field in &self.fields { field.fmt(f)?; } Ok(()) } } impl std::fmt::Display for Deb822 { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for (i, paragraph) in self.0.iter().enumerate() { if i > 0 { writeln!(f)?; } write!(f, "{}", paragraph)?; } Ok(()) } } impl std::str::FromStr for Paragraph { type Err = Error; fn from_str(s: &str) -> Result { let doc: Deb822 = s.parse()?; if doc.is_empty() { Err(Error::UnexpectedEof) } else if doc.len() > 1 { Err(Error::ExpectedEof) } else { Ok(doc.0.into_iter().next().unwrap()) } } } impl From> for Paragraph { fn from(fields: Vec<(String, String)>) -> Self { fields.into_iter().collect() } } impl FromIterator<(String, String)> for Paragraph { fn from_iter>(iter: T) -> Self { let fields = iter .into_iter() .map(|(name, value)| Field { name, value }) .collect(); Paragraph { fields } } } impl IntoIterator for Paragraph { type Item = (String, String); type IntoIter = std::iter::Map, fn(Field) -> (String, String)>; fn into_iter(self) -> Self::IntoIter { self.fields .into_iter() .map(|field| (field.name, field.value)) } } /// A deb822 document. #[derive(Debug, PartialEq, Eq, Clone)] pub struct Deb822(Vec); impl From for Vec { fn from(doc: Deb822) -> Self { doc.0 } } impl IntoIterator for Deb822 { type Item = Paragraph; type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } impl Deb822 { /// Number of paragraphs in the document. pub fn len(&self) -> usize { self.0.len() } /// Check if the document is empty. pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Iterate over the paragraphs in the document. pub fn iter(&self) -> impl Iterator { self.0.iter() } /// Iterate over the paragraphs in the document, mutably. pub fn iter_mut(&mut self) -> impl Iterator { self.0.iter_mut() } /// Read from a reader. pub fn from_reader(mut r: R) -> Result { let mut buf = String::new(); r.read_to_string(&mut buf)?; buf.parse() } /// Stream paragraphs from a reader. /// /// This returns an iterator that reads and parses paragraphs one at a time, /// which is more memory-efficient for large files. pub fn iter_paragraphs_from_reader(reader: R) -> ParagraphReader { ParagraphReader::new(reader) } } /// Reader that streams paragraphs from a buffered reader. pub struct ParagraphReader { reader: R, buffer: String, finished: bool, } impl ParagraphReader { /// Create a new paragraph reader from a buffered reader. pub fn new(reader: R) -> Self { Self { reader, buffer: String::new(), finished: false, } } } impl Iterator for ParagraphReader { type Item = Result; fn next(&mut self) -> Option { if self.finished { return None; } self.buffer.clear(); let mut found_content = false; loop { let mut line = String::new(); match self.reader.read_line(&mut line) { Ok(0) => { // End of file self.finished = true; if found_content { // Parse the buffered paragraph return Some(self.buffer.parse()); } return None; } Ok(_) => { // Check if this is a blank line (paragraph separator) if line.trim().is_empty() && found_content { // End of current paragraph return Some(self.buffer.parse()); } // Skip leading blank lines and comments before first field if !found_content && (line.trim().is_empty() || line.trim_start().starts_with('#')) { continue; } // Check if this starts a new field (not indented) if !line.starts_with(|c: char| c.is_whitespace()) && line.contains(':') { found_content = true; } else if found_content { // Continuation line or comment within paragraph } else if !line.trim_start().starts_with('#') { // Non-blank, non-comment line before any field - this is content found_content = true; } self.buffer.push_str(&line); } Err(e) => { self.finished = true; return Some(Err(Error::Io(e))); } } } } } impl std::str::FromStr for Deb822 { type Err = Error; fn from_str(s: &str) -> Result { // Optimized zero-copy byte-level parser let bytes = s.as_bytes(); let mut paragraphs = Vec::new(); let mut pos = 0; let len = bytes.len(); while pos < len { // Skip leading newlines and comments between paragraphs while pos < len { let b = bytes[pos]; if b == b'#' { while pos < len && bytes[pos] != b'\n' { pos += 1; } if pos < len { pos += 1; } } else if b == b'\n' || b == b'\r' { pos += 1; } else { break; } } if pos >= len { break; } // Check for unexpected leading space/tab before paragraph if bytes[pos] == b' ' || bytes[pos] == b'\t' { let line_start = pos; while pos < len && bytes[pos] != b'\n' { pos += 1; } let token = unsafe { std::str::from_utf8_unchecked(&bytes[line_start..pos]) }; return Err(Error::UnexpectedToken(token.to_string())); } // Parse paragraph let mut fields: Vec = Vec::new(); loop { if pos >= len { break; } // Check for blank line (end of paragraph) if bytes[pos] == b'\n' { pos += 1; break; } // Skip comment lines if bytes[pos] == b'#' { while pos < len && bytes[pos] != b'\n' { pos += 1; } if pos < len { pos += 1; } continue; } // Check for continuation line (starts with space/tab) if bytes[pos] == b' ' || bytes[pos] == b'\t' { if fields.is_empty() { // Indented line before any field - this is an error let line_start = pos; while pos < len && bytes[pos] != b'\n' { pos += 1; } let token = unsafe { std::str::from_utf8_unchecked(&bytes[line_start..pos]) }; return Err(Error::UnexpectedToken(token.to_string())); } // Skip all leading whitespace (deb822 format strips leading spaces) while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') { pos += 1; } // Read the rest of the continuation line let line_start = pos; while pos < len && bytes[pos] != b'\n' { pos += 1; } // Add to previous field value if let Some(last_field) = fields.last_mut() { last_field.value.push('\n'); last_field.value.push_str(unsafe { std::str::from_utf8_unchecked(&bytes[line_start..pos]) }); } if pos < len { pos += 1; // Skip newline } continue; } // Parse field name let name_start = pos; while pos < len && bytes[pos] != b':' && bytes[pos] != b'\n' { pos += 1; } if pos >= len || bytes[pos] != b':' { // Invalid line - missing colon or value without key let line_start = name_start; while pos < len && bytes[pos] != b'\n' { pos += 1; } let token = unsafe { std::str::from_utf8_unchecked(&bytes[line_start..pos]) }; return Err(Error::UnexpectedToken(token.to_string())); } let name = unsafe { std::str::from_utf8_unchecked(&bytes[name_start..pos]) }; // Check for empty field name (e.g., line starting with ':') if name.is_empty() { let line_start = name_start; let mut end = pos; while end < len && bytes[end] != b'\n' { end += 1; } let token = unsafe { std::str::from_utf8_unchecked(&bytes[line_start..end]) }; return Err(Error::UnexpectedToken(token.to_string())); } pos += 1; // Skip colon // Skip whitespace after colon while pos < len && (bytes[pos] == b' ' || bytes[pos] == b'\t') { pos += 1; } // Read field value (rest of line) let value_start = pos; while pos < len && bytes[pos] != b'\n' { pos += 1; } let value = unsafe { std::str::from_utf8_unchecked(&bytes[value_start..pos]) }; fields.push(Field { name: name.to_string(), value: value.to_string(), }); if pos < len { pos += 1; // Skip newline } } if !fields.is_empty() { paragraphs.push(Paragraph { fields }); } } Ok(Deb822(paragraphs)) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_error_display() { let err = Error::UnexpectedToken("invalid".to_string()); assert_eq!(err.to_string(), "Unexpected token: invalid"); let err = Error::UnexpectedEof; assert_eq!(err.to_string(), "Unexpected end-of-file"); let err = Error::ExpectedEof; assert_eq!(err.to_string(), "Expected end-of-file"); let io_err = std::io::Error::other("test error"); let err = Error::Io(io_err); assert!(err.to_string().contains("IO error: test error")); } #[test] fn test_parse() { let input = r#"Package: hello Version: 2.10 Description: A program that says hello Some more text Package: world Version: 1.0 Description: A program that says world And some more text Another-Field: value # A comment "#; let mut deb822: Deb822 = input.parse().unwrap(); assert_eq!( deb822, Deb822(vec![ Paragraph { fields: vec![ Field { name: "Package".to_string(), value: "hello".to_string(), }, Field { name: "Version".to_string(), value: "2.10".to_string(), }, Field { name: "Description".to_string(), value: "A program that says hello\nSome more text".to_string(), }, ], }, Paragraph { fields: vec![ Field { name: "Package".to_string(), value: "world".to_string(), }, Field { name: "Version".to_string(), value: "1.0".to_string(), }, Field { name: "Description".to_string(), value: "A program that says world\nAnd some more text".to_string(), }, Field { name: "Another-Field".to_string(), value: "value".to_string(), }, ], }, ]) ); assert_eq!(deb822.len(), 2); assert!(!deb822.is_empty()); assert_eq!(deb822.iter().count(), 2); let para = deb822.iter().next().unwrap(); assert_eq!(para.get("Package"), Some("hello")); assert_eq!(para.get("Version"), Some("2.10")); assert_eq!( para.get("Description"), Some("A program that says hello\nSome more text") ); assert_eq!(para.get("Another-Field"), None); assert!(!para.is_empty()); assert_eq!(para.len(), 3); assert_eq!( para.iter().collect::>(), vec![ ("Package", "hello"), ("Version", "2.10"), ("Description", "A program that says hello\nSome more text"), ] ); let para = deb822.iter_mut().next().unwrap(); para.insert("Another-Field", "value"); assert_eq!(para.get("Another-Field"), Some("value")); let mut newpara = Paragraph { fields: vec![] }; newpara.insert("Package", "new"); assert_eq!(newpara.to_string(), "Package: new\n"); } #[test] fn test_paragraph_iter() { let input = r#"Package: hello Version: 2.10 "#; let para: Paragraph = input.parse().unwrap(); let mut iter = para.into_iter(); assert_eq!( iter.next(), Some(("Package".to_string(), "hello".to_string())) ); assert_eq!( iter.next(), Some(("Version".to_string(), "2.10".to_string())) ); assert_eq!(iter.next(), None); } #[test] fn test_format_multiline() { let para = Paragraph { fields: vec![Field { name: "Description".to_string(), value: "A program that says hello\nSome more text".to_string(), }], }; assert_eq!( para.to_string(), "Description: A program that says hello\n Some more text\n" ); } #[test] fn test_paragraph_from_str_errors() { // Test ExpectedEof error let result = "Package: foo\n\nPackage: bar\n".parse::(); assert!(matches!(result, Err(Error::ExpectedEof))); // Test UnexpectedEof error let result = "".parse::(); assert!(matches!(result, Err(Error::UnexpectedEof))); } #[test] fn test_from_vec() { let fields = vec![ ("Package".to_string(), "hello".to_string()), ("Version".to_string(), "1.0".to_string()), ]; let para: Paragraph = fields.into(); assert_eq!(para.get("Package"), Some("hello")); assert_eq!(para.get("Version"), Some("1.0")); } #[test] fn test_unexpected_tokens() { // Test parsing with unexpected tokens let input = "Value before key\nPackage: hello\n"; let result = input.parse::(); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); // Test parsing with missing colon after key let input = "Package hello\n"; let result = input.parse::(); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); // Test parsing with unexpected indent let input = " Indented: value\n"; let result = input.parse::(); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); // Test parsing with unexpected value let input = "Key: value\nvalue without key\n"; let result = input.parse::(); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); // Test parsing with unexpected colon let input = "Key: value\n:\n"; let result = input.parse::(); assert!(matches!(result, Err(Error::UnexpectedToken(_)))); } #[test] fn test_parse_continuation_with_colon() { // Test that continuation lines with colons are properly parsed let input = "Package: test\nDescription: short\n line: with colon\n"; let result = input.parse::(); assert!(result.is_ok()); let deb822 = result.unwrap(); assert_eq!(deb822.0.len(), 1); assert_eq!(deb822.0[0].fields.len(), 2); assert_eq!(deb822.0[0].fields[0].name, "Package"); assert_eq!(deb822.0[0].fields[0].value, "test"); assert_eq!(deb822.0[0].fields[1].name, "Description"); assert_eq!(deb822.0[0].fields[1].value, "short\nline: with colon"); } #[test] fn test_parse_continuation_starting_with_colon() { // Test continuation line STARTING with a colon (issue #315) let input = "Package: test\nDescription: short\n :value\n"; let result = input.parse::(); assert!(result.is_ok()); let deb822 = result.unwrap(); assert_eq!(deb822.0.len(), 1); assert_eq!(deb822.0[0].fields.len(), 2); assert_eq!(deb822.0[0].fields[0].name, "Package"); assert_eq!(deb822.0[0].fields[0].value, "test"); assert_eq!(deb822.0[0].fields[1].name, "Description"); assert_eq!(deb822.0[0].fields[1].value, "short\n:value"); } #[test] fn test_from_reader() { // Test Deb822::from_reader with valid input let input = "Package: hello\nVersion: 1.0\n"; let result = Deb822::from_reader(input.as_bytes()).unwrap(); assert_eq!(result.len(), 1); let para = result.iter().next().unwrap(); assert_eq!(para.get("Package"), Some("hello")); // Test with IO error use std::io::Error as IoError; struct FailingReader; impl std::io::Read for FailingReader { fn read(&mut self, _: &mut [u8]) -> std::io::Result { Err(IoError::other("test error")) } } let result = Deb822::from_reader(FailingReader); assert!(matches!(result, Err(Error::Io(_)))); } #[test] fn test_deb822_vec_conversion() { let paragraphs = vec![ Paragraph { fields: vec![Field { name: "Package".to_string(), value: "hello".to_string(), }], }, Paragraph { fields: vec![Field { name: "Package".to_string(), value: "world".to_string(), }], }, ]; let deb822 = Deb822(paragraphs.clone()); let vec: Vec = deb822.into(); assert_eq!(vec, paragraphs); } #[test] fn test_deb822_iteration() { let paragraphs = vec![ Paragraph { fields: vec![Field { name: "Package".to_string(), value: "hello".to_string(), }], }, Paragraph { fields: vec![Field { name: "Package".to_string(), value: "world".to_string(), }], }, ]; let deb822 = Deb822(paragraphs.clone()); // Test IntoIterator implementation let collected: Vec<_> = deb822.into_iter().collect(); assert_eq!(collected, paragraphs); // Test iter() and iter_mut() let deb822 = Deb822(paragraphs.clone()); let iter_refs: Vec<&Paragraph> = deb822.iter().collect(); assert_eq!(iter_refs.len(), 2); assert_eq!(iter_refs[0].get("Package"), Some("hello")); let mut deb822 = Deb822(paragraphs.clone()); for para in deb822.iter_mut() { if para.get("Package") == Some("hello") { para.set("Version", "1.0"); } } assert_eq!(deb822.iter().next().unwrap().get("Version"), Some("1.0")); } #[test] fn test_empty_collections() { // Test empty Deb822 let deb822 = Deb822(vec![]); assert!(deb822.is_empty()); assert_eq!(deb822.len(), 0); assert_eq!(deb822.iter().count(), 0); // Test empty Paragraph let para = Paragraph { fields: vec![] }; assert!(para.is_empty()); assert_eq!(para.len(), 0); assert_eq!(para.iter().count(), 0); assert_eq!(para.get("Any"), None); // Test formatting of empty paragraph assert_eq!(para.to_string(), ""); // Test formatting of empty Deb822 assert_eq!(deb822.to_string(), ""); } #[test] fn test_paragraph_mutable_iteration() { let mut para = Paragraph { fields: vec![ Field { name: "First".to_string(), value: "1".to_string(), }, Field { name: "Second".to_string(), value: "2".to_string(), }, ], }; // Test iter_mut for (_, value) in para.iter_mut() { *value = format!("{}0", value); } assert_eq!(para.get("First"), Some("10")); assert_eq!(para.get("Second"), Some("20")); } #[test] fn test_insert_duplicate_key() { let mut para = Paragraph { fields: vec![Field { name: "Key".to_string(), value: "Value1".to_string(), }], }; // Insert will add a new field, even if the key already exists para.insert("Key", "Value2"); assert_eq!(para.fields.len(), 2); assert_eq!(para.fields[0].value, "Value1"); assert_eq!(para.fields[1].value, "Value2"); // But get() will return the first occurrence assert_eq!(para.get("Key"), Some("Value1")); } #[test] fn test_multiline_field_format() { // Test display formatting for multiline field values let field = Field { name: "MultiField".to_string(), value: "line1\nline2\nline3".to_string(), }; let formatted = format!("{}", field); assert_eq!(formatted, "MultiField: line1\n line2\n line3\n"); // Test formatting within paragraph context let para = Paragraph { fields: vec![field], }; let formatted = format!("{}", para); assert_eq!(formatted, "MultiField: line1\n line2\n line3\n"); } #[test] fn test_paragraph_parsing_edge_cases() { // Test parsing empty value let input = "Key:\n"; let para: Paragraph = input.parse().unwrap(); assert_eq!(para.get("Key"), Some("")); // Test parsing value with just whitespace // Note: whitespace after the colon appears to be trimmed by the parser let input = "Key: \n"; let para: Paragraph = input.parse().unwrap(); assert_eq!(para.get("Key"), Some("")); // Test parsing multiple empty lines between paragraphs let input = "Key1: value1\n\n\n\nKey2: value2\n"; let deb822: Deb822 = input.parse().unwrap(); assert_eq!(deb822.len(), 2); // Test parsing complex indentation // The parser preserves the indentation from the original file let input = "Key: value\n with\n indentation\n levels\n"; let para: Paragraph = input.parse().unwrap(); assert_eq!(para.get("Key"), Some("value\nwith\nindentation\nlevels")); } #[test] fn test_parse_complex() { // Test various edge cases in the parser let input = "# Comment at start\nKey1: val1\nKey2: \n indented\nKey3: val3\n\n# Comment between paragraphs\n\nKey4: val4\n"; let deb822: Deb822 = input.parse().unwrap(); assert_eq!(deb822.len(), 2); let paragraphs: Vec = deb822.into(); assert_eq!(paragraphs[0].get("Key2"), Some("\nindented")); assert_eq!(paragraphs[1].get("Key4"), Some("val4")); // Test parsing with an indented line immediately after a key let input = "Key:\n indented value\n"; let para: Paragraph = input.parse().unwrap(); assert_eq!(para.get("Key"), Some("\nindented value")); } #[test] fn test_deb822_display() { // Test the Deb822::fmt Display implementation (lines 158-164) let para1 = Paragraph { fields: vec![Field { name: "Key1".to_string(), value: "Value1".to_string(), }], }; let para2 = Paragraph { fields: vec![Field { name: "Key2".to_string(), value: "Value2".to_string(), }], }; let deb822 = Deb822(vec![para1, para2]); let formatted = format!("{}", deb822); assert_eq!(formatted, "Key1: Value1\n\nKey2: Value2\n"); } #[test] fn test_parser_edge_cases() { // Let's focus on testing various parser behaviors rather than expecting errors // Test comment handling let input = "# Comment\nKey: value"; let deb822: Deb822 = input.parse().unwrap(); assert_eq!(deb822.len(), 1); // Test for unexpected token at line 303 let input = "Key: value\n .indented"; let deb822: Deb822 = input.parse().unwrap(); assert_eq!( deb822.iter().next().unwrap().get("Key"), Some("value\n.indented") ); // Test multi-line values let input = "Key: value\n line1\n line2\n\nNextKey: value"; let deb822: Deb822 = input.parse().unwrap(); assert_eq!(deb822.len(), 2); assert_eq!( deb822.iter().next().unwrap().get("Key"), Some("value\nline1\nline2") ); } #[test] fn test_iter_paragraphs_from_reader() { use std::io::BufReader; let input = r#"Package: hello Version: 2.10 Description: A program that says hello Some more text Package: world Version: 1.0 Description: A program that says world And some more text Another-Field: value # A comment "#; let reader = BufReader::new(input.as_bytes()); let paragraphs: Result, _> = Deb822::iter_paragraphs_from_reader(reader).collect(); let paragraphs = paragraphs.unwrap(); assert_eq!(paragraphs.len(), 2); assert_eq!(paragraphs[0].get("Package"), Some("hello")); assert_eq!(paragraphs[0].get("Version"), Some("2.10")); assert_eq!( paragraphs[0].get("Description"), Some("A program that says hello\nSome more text") ); assert_eq!(paragraphs[1].get("Package"), Some("world")); assert_eq!(paragraphs[1].get("Version"), Some("1.0")); assert_eq!( paragraphs[1].get("Description"), Some("A program that says world\nAnd some more text") ); assert_eq!(paragraphs[1].get("Another-Field"), Some("value")); } #[test] fn test_iter_paragraphs_from_reader_empty() { use std::io::BufReader; let input = ""; let reader = BufReader::new(input.as_bytes()); let paragraphs: Result, _> = Deb822::iter_paragraphs_from_reader(reader).collect(); let paragraphs = paragraphs.unwrap(); assert_eq!(paragraphs.len(), 0); } #[test] fn test_iter_paragraphs_from_reader_with_leading_comments() { use std::io::BufReader; let input = r#"# Leading comment # Another comment Package: test Version: 1.0 "#; let reader = BufReader::new(input.as_bytes()); let paragraphs: Result, _> = Deb822::iter_paragraphs_from_reader(reader).collect(); let paragraphs = paragraphs.unwrap(); assert_eq!(paragraphs.len(), 1); assert_eq!(paragraphs[0].get("Package"), Some("test")); } #[test] fn test_case_insensitive_get() { let para = Paragraph { fields: vec![ Field { name: "Package".to_string(), value: "test".to_string(), }, Field { name: "Version".to_string(), value: "1.0".to_string(), }, ], }; // Test different case variations assert_eq!(para.get("Package"), Some("test")); assert_eq!(para.get("package"), Some("test")); assert_eq!(para.get("PACKAGE"), Some("test")); assert_eq!(para.get("PaCkAgE"), Some("test")); assert_eq!(para.get("Version"), Some("1.0")); assert_eq!(para.get("version"), Some("1.0")); assert_eq!(para.get("VERSION"), Some("1.0")); } #[test] fn test_case_insensitive_set() { let mut para = Paragraph { fields: vec![Field { name: "Package".to_string(), value: "test".to_string(), }], }; // Set with different case should update the existing field para.set("package", "updated"); assert_eq!(para.fields.len(), 1); assert_eq!(para.get("Package"), Some("updated")); assert_eq!(para.get("package"), Some("updated")); // Set with UPPERCASE para.set("PACKAGE", "updated2"); assert_eq!(para.fields.len(), 1); assert_eq!(para.get("Package"), Some("updated2")); } #[test] fn test_case_insensitive_remove() { let mut para = Paragraph { fields: vec![ Field { name: "Package".to_string(), value: "test".to_string(), }, Field { name: "Version".to_string(), value: "1.0".to_string(), }, ], }; // Remove with different case para.remove("package"); assert_eq!(para.fields.len(), 1); assert_eq!(para.get("Package"), None); assert_eq!(para.get("Version"), Some("1.0")); // Remove with uppercase para.remove("VERSION"); assert_eq!(para.fields.len(), 0); assert_eq!(para.get("Version"), None); } #[test] fn test_case_preservation() { let mut para = Paragraph { fields: vec![] }; // Insert with specific case para.insert("Package", "test"); assert_eq!(para.fields[0].name, "Package"); // Set with different case should preserve original case para.set("package", "updated"); assert_eq!(para.fields[0].name, "Package"); assert_eq!(para.fields[0].value, "updated"); } } deb822-fast-0.2.3/tests/field_formatting.rs000064400000000000000000000102211046102023000165700ustar 00000000000000use deb822_derive::{FromDeb822, ToDeb822}; use deb822_fast::convert::ToDeb822Paragraph; use deb822_fast::Paragraph; #[derive(Debug, FromDeb822, ToDeb822)] struct SingleLineTest { #[deb822(field = "Name", single_line)] name: String, } #[derive(Debug, FromDeb822, ToDeb822)] struct MultiLineTest { #[deb822(field = "Description", multi_line)] description: String, } #[derive(Debug, FromDeb822, ToDeb822)] struct FoldedTest { #[deb822(field = "Depends", folded)] depends: String, } #[test] fn test_single_line_valid() { let test = SingleLineTest { name: "test-package".to_string(), }; let para: Paragraph = test.to_paragraph(); assert_eq!(para.get("Name"), Some("test-package")); } #[test] #[should_panic(expected = "Field 'Name' is marked as single_line but contains newlines")] fn test_single_line_with_newlines_panics() { let test = SingleLineTest { name: "test\npackage".to_string(), }; // This should panic let _para: Paragraph = test.to_paragraph(); } #[test] fn test_multi_line_single_line_value() { let test = MultiLineTest { description: "A simple description".to_string(), }; let para: Paragraph = test.to_paragraph(); assert_eq!(para.get("Description"), Some("A simple description")); } #[test] fn test_multi_line_adds_spaces_to_continuation_lines() { let test = MultiLineTest { description: "First line\nSecond line\nThird line".to_string(), }; let para: Paragraph = test.to_paragraph(); let value = para.get("Description").unwrap(); // Should add space to continuation lines assert_eq!(value, "First line\n Second line\n Third line"); } #[test] fn test_multi_line_always_adds_spaces() { let test = MultiLineTest { description: "First line\nAlready has text\n Two spaces".to_string(), }; let para: Paragraph = test.to_paragraph(); let value = para.get("Description").unwrap(); // Should always add space to continuation lines assert_eq!(value, "First line\n Already has text\n Two spaces"); } #[test] fn test_multi_line_empty_lines_become_dot() { let test = MultiLineTest { description: "First line\n\nThird line".to_string(), }; let para: Paragraph = test.to_paragraph(); let value = para.get("Description").unwrap(); // Empty lines should become " ." assert_eq!(value, "First line\n .\n Third line"); } #[test] fn test_folded_single_line() { let test = FoldedTest { depends: "libc6".to_string(), }; let para: Paragraph = test.to_paragraph(); assert_eq!(para.get("Depends"), Some("libc6")); } #[test] fn test_folded_strips_whitespace_and_joins() { let test = FoldedTest { depends: " libc6 \n libssl3 \n zlib1g ".to_string(), }; let para: Paragraph = test.to_paragraph(); let value = para.get("Depends").unwrap(); // Should strip whitespace from each line and join with spaces assert_eq!(value, "libc6 libssl3 zlib1g"); } #[test] fn test_folded_filters_empty_lines() { let test = FoldedTest { depends: "libc6\n\nlibssl3\n \nzlib1g".to_string(), }; let para: Paragraph = test.to_paragraph(); let value = para.get("Depends").unwrap(); // Should filter out empty lines assert_eq!(value, "libc6 libssl3 zlib1g"); } #[test] fn test_update_paragraph_single_line() { let test = SingleLineTest { name: "updated-package".to_string(), }; let mut para: Paragraph = vec![].into(); test.update_paragraph(&mut para); assert_eq!(para.get("Name"), Some("updated-package")); } #[test] fn test_update_paragraph_multi_line() { let test = MultiLineTest { description: "Line one\nLine two".to_string(), }; let mut para: Paragraph = vec![].into(); test.update_paragraph(&mut para); let value = para.get("Description").unwrap(); assert_eq!(value, "Line one\n Line two"); } #[test] fn test_update_paragraph_folded() { let test = FoldedTest { depends: " pkg1 \n pkg2 ".to_string(), }; let mut para: Paragraph = vec![].into(); test.update_paragraph(&mut para); let value = para.get("Depends").unwrap(); assert_eq!(value, "pkg1 pkg2"); }