deser-hjson-2.2.4/.cargo_vcs_info.json0000644000000001360000000000100132620ustar { "git": { "sha1": "ad8b02c186f18438c59d6fab8482a229b0f0d88d" }, "path_in_vcs": "" }deser-hjson-2.2.4/.gitignore000064400000000000000000000000641046102023000140420ustar 00000000000000.bacon-locations /target Cargo.lock glassbench_*.db deser-hjson-2.2.4/CHANGELOG.md000064400000000000000000000035201046102023000136630ustar 00000000000000 ### v2.2.4 - 2023-11-28 - fix wrong handling of some multiline strings - Fix #19 ### v2.2.3 - 2023-11-19 - fix a case of non understood hjson (regression introduced in 2.2.1) - Fix #20 ### v2.2.2 - 2023-11-04 - fix non optional boolean value not parsed after a space in a struct - Fix #18 ### v2.2.1 - 2023-10-27 - performance improvements ### v2.2.0 - 2023-09-09 - Allow single-quoted identifiers and enum values - thanks @jwnrt ### v2.1.0 - 2023-07-09 - discard trailing whitespaces in quoteless strings ### v2.0.0 - 2023-07-09 - `from_reader` function - Error type no longer `Clone` and `PartialEq`, flagged `non_exhaustive` ### v1.2.0 - 2023-05-25 - `from_slice` function ### v1.1.1 - 2023-04-22 - accept quotes in "quoteless" keys - Fix #9 ### v1.1.0 - 2022-12-21 - support for braceless Hjson - Fix #7 ### v1.0.2 - 2021-07-31 - fix tab after quoteless map key being read as part of the key ### v1.0.1 - 2021-06-22 - properly parse single quote strings - fix type guessing in some cases for null, false, and true ### v1.0.0 - 2021-06-15 - it's stable. Calling it a 1.0 ### v0.1.13 - 2021-05-26 - make \r\n behave like \n - allow more liberty for enum variants ### v0.1.12 - 2021-02-13 - more precise number type guessing ### v0.1.11 - 2021-02-11 - fix primitive types (ie not Hjson texts but primitives like integers and floats) needing a space at the end - Fix #1 ### v0.1.10 - 2021-02-11 - make from_str parse a `DeserializeOwned` instead of a borrowed `Deserialize<'a>` deser-hjson-2.2.4/Cargo.toml0000644000000020220000000000100112540ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "deser-hjson" version = "2.2.4" authors = ["dystroy "] description = "a Hjson deserializer for Serde" readme = "README.md" keywords = [ "hjson", "deserialization", "serde", "derive", "json", ] categories = ["encoding"] license = "MIT" repository = "https://github.com/Canop/deser-hjson" [profile.bench] lto = true [profile.release] lto = true [[bench]] name = "parse" harness = false [dependencies.serde] version = "1.0" features = ["derive"] [dev-dependencies.glassbench] version = "0.3.5" deser-hjson-2.2.4/Cargo.toml.orig000064400000000000000000000010471046102023000147430ustar 00000000000000[package] name = "deser-hjson" version = "2.2.4" authors = ["dystroy "] repository = "https://github.com/Canop/deser-hjson" description = "a Hjson deserializer for Serde" edition = "2018" keywords = ["hjson", "deserialization", "serde", "derive", "json"] license = "MIT" categories = ["encoding"] readme = "README.md" [dependencies] serde = { version = "1.0", features = ["derive"] } [dev-dependencies] glassbench = "0.3.5" [[bench]] name = "parse" harness = false [profile.bench] lto = true [profile.release] lto = true deser-hjson-2.2.4/LICENSE000064400000000000000000000020461046102023000130610ustar 00000000000000MIT License Copyright (c) 2020 Canop Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. deser-hjson-2.2.4/README.md000064400000000000000000000066011046102023000133340ustar 00000000000000[![MIT][s2]][l2] [![Latest Version][s1]][l1] [![docs][s3]][l3] [![Chat on Miaou][s4]][l4] [s1]: https://img.shields.io/crates/v/deser-hjson.svg [l1]: https://crates.io/crates/deser-hjson [s2]: https://img.shields.io/badge/license-MIT-blue.svg [l2]: LICENSE [s3]: https://docs.rs/deser-hjson/badge.svg [l3]: https://docs.rs/deser-hjson/ [s4]: https://miaou.dystroy.org/static/shields/room.svg [l4]: https://miaou.dystroy.org/3768 # deser_hjson This is a Serde deserializer for [Hjson](https://hjson.github.io/), tailored for derive powered deserialization. Hjson is a good language for a configuration file. Such files should be written by a human, read and modified by other humans, then deserialized into a precise structure by a program: ```rust let file_content = fs::read_to_string(&file_path)?; let configuration = deser_hjson::from_str(&file_content); ``` If the configuration file is invalid or doesn't match the expected type, the error details the expectation and the error precise location. ## Example ```rust use { deser_hjson::*, serde::Deserialize, std::collections::HashMap, }; // This Hjson document comes from https://hjson.github.io/ let hjson = r#" // use #, // or /**/ for comments, // omit quotes for keys key: 1 // omit quotes for strings contains: everything on this line // omit commas at the end of a line cool: { foo: 1 bar: 2 } // allow trailing commas list: [ 1, 2, ] // and use multiline strings realist: ''' My half empty glass, I will fill your empty half. Now you are half full. ''' "#; // we'll deserialize it into this struct: #[derive(Deserialize, PartialEq, Debug)] struct Example { key: i32, contains: Option, cool: HashMap, list: Vec, realist: String, missing: Option, } let mut cool = HashMap::new(); cool.insert("foo".to_owned(), 1); cool.insert("bar".to_owned(), 2); let expected = Example { key: 1, contains: Some("everything on this line".to_owned()), cool, list: vec![1, 2], realist: "My half empty glass,\nI will fill your empty half.\nNow you are half full.".to_owned(), missing: None, }; // Here's the deserialization and the equality check: assert_eq!(expected, from_str(hjson).unwrap()); ``` ## Known open-source usages * [Broot](https://dystroy.org/broot) can be configured either with TOML or with Hjson (the selection is dynamic, based on the file extension). * [lemmy](https://github.com/LemmyNet/lemmy) is configured in Hjson * [Resc](https://github.com/Canop/resc) can be configured either with JSON or with Hjson ## FAQ ### Does it work with JSON ? Yes as any JSON file can be read as Hjson. ### Why only a derive-based deserializer? Guessing the types in a format with implicit typing is way too dangereous. When your user typed `false`, was it a string or a boolean ? When she typed `3`, was it as string or a number ? While [not as crazy as YAML](https://hitchdev.com/strictyaml/why/implicit-typing-removed/), Hjson has no internal guard for this, and thus should only be deserialized into explicit types. ### Why a deserializer and no serializer? Hjson isn't a data exchange format. It's intended to be written by humans, be full of comments and with a meaningful formatting. While serializers would make sense in some context, they would have to be template based, or offer other means to specify comments and formatting, and serde isn't the right tool for that. deser-hjson-2.2.4/bacon.toml000064400000000000000000000033241046102023000140330ustar 00000000000000# This is a configuration file for the bacon tool # # Bacon repository: https://github.com/Canop/bacon # Complete help on configuration: https://dystroy.org/bacon/config/ default_job = "check" [jobs.check] command = ["cargo", "check", "--color", "always"] need_stdout = false [jobs.check-all] command = ["cargo", "check", "--all-targets", "--color", "always"] need_stdout = false [jobs.clippy] command = [ "cargo", "clippy", "--color", "always", "--", "-A", "clippy::vec_init_then_push", ] need_stdout = false [jobs.test] command = [ "cargo", "test", "--color", "always", "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124 ] need_stdout = true [jobs.doc] command = ["cargo", "doc", "--color", "always", "--no-deps"] need_stdout = false # If the doc compiles, then it opens in your browser and bacon switches # to the previous job [jobs.doc-open] command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"] need_stdout = false on_success = "back" # so that we don't open the browser at each change # You can run your application and have the result displayed in bacon, # *if* it makes sense for this crate. You can run an example the same # way. Don't forget the `--color always` part or the errors won't be # properly parsed. # If you want to pass options to your program, a `--` separator # will be needed. [jobs.run] command = [ "cargo", "run", "--color", "always" ] need_stdout = true allow_warnings = true # You may define here keybindings that would be specific to # a project, for example a shortcut to launch a specific job. # Shortcuts to internal functions (scrolling, toggling, etc.) # should go in your personal global prefs.toml file instead. [keybindings] # alt-m = "job:my-job" deser-hjson-2.2.4/benches/parse.rs000064400000000000000000000025251046102023000151450ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, glassbench::*, }; static GIFTS: &[&str] = &[ "{gift:null}", "{gift:false}", "{gift: true}", "{gift:'bar'}", r#"{gift:"bar"}"#, "{gift:42}", "{gift:42457811247}", "{gift:-42}", r#"{gift: "abcㅈ"}"#, "{gift:[15, -50]}", "{gift:[\"abc\"]}", r#"{gift:["abc", "another string"]}"#, r#" { gift: [ "abc", "another string" and a third one (unquoted) ] }"#, "{gift:''}", ]; #[derive(Deserialize, PartialEq, Debug)] #[serde(untagged)] enum Guess { Bool(bool), U8(u8), I8(i8), U16(u16), I16(i16), U32(u32), I32(i32), U64(u64), I64(i64), F64(f64), Char(char), String(Option), U16Array(Vec), I16Array(Vec), StrArray(Vec), } #[derive(Deserialize, PartialEq, Debug)] struct WrappedGuess { gift: Guess, } fn bench_parse(bench: &mut Bench) { bench.task("guess wrapped", |task| { task.iter(|| { for hjson in GIFTS { let guessed = from_str::(hjson) .unwrap_or_else(|e| panic!("Parsing failed for {:?} : {}", hjson, e)); pretend_used(guessed); } }); }); } glassbench!( "Parse", bench_parse, ); deser-hjson-2.2.4/src/de.rs000064400000000000000000000726301046102023000136070ustar 00000000000000//! A Hjson deserializer. //! use { crate::{ de_enum::*, de_map::*, de_number::*, de_seq::*, error::{ Error, ErrorCode::{self, *}, Result, }, utf8::*, }, serde::de::{self, IntoDeserializer, Visitor}, }; /// The deserializer. You normally don't call it directly /// but use the `from_str` function available at crate's level. pub struct Deserializer<'de> { // the complete string we received src: &'de str, // where we're at, in bytes pos: usize, // Make it possible to avoid reading a string as a quoteless // string when a key map is waited for (for example in // { // key: value // } // ) so that the key doesn't go til the end of the line. pub(crate) accept_quoteless_value: bool, } impl<'de> Deserializer<'de> { pub fn from_str(src: &'de str) -> Self { Deserializer { src, pos: 0, accept_quoteless_value: true, } } /// Compute the number of lines and columns to current pos. /// First line and first col are of index 1. #[cold] fn location(&self) -> (usize, usize) { let (mut line, mut col) = (1, 1); for ch in self.src[..self.pos].chars() { if ch == '\n' { col = 1; line += 1; } else { col += 1; } } (line, col) } fn col(&self) -> usize { let mut p = self.pos; loop { if p == 0 { break; } let b = self.src.as_bytes()[p]; if b == b'\r' || b == b'\n' { break; } p -= 1; } self.pos - p } /// build a syntax error #[cold] pub(crate) fn err(&self, code: ErrorCode) -> Error { let (line, col) = self.location(); // we'll show the next 15 chars in the error message let at = self.input().chars().take(15).collect(); Error::Syntax { line, col, code, at, } } /// convert a serde raised error into one with precise location #[cold] pub(crate) fn cook_err(&self, err: Error) -> Result { match err { Error::RawSerde(message) => { let (line, col) = self.location(); // we have no real idea where Serde found the problem // so we write the position but not the characters around Err(Error::Serde { line, col, message, }) } e => Err(e), } } #[cold] pub(crate) fn fail(&self, code: ErrorCode) -> Result { Err(self.err(code)) } /// return an error if there's more than just spaces /// and comments in the remaining input pub fn check_all_consumed(&mut self) -> Result<()> { self.eat_shit().ok(); if self.input().is_empty() { Ok(()) } else { self.fail(TrailingCharacters) } } /// what remains to be parsed (including the /// character we peeked at, if any) #[inline(always)] pub(crate) fn input(&self) -> &'de str { &self.src[self.pos..] } /// takes all remaining characters #[inline(always)] pub(crate) fn take_all(&mut self) -> &'de str { let s = &self.src[self.pos..]; self.pos = self.src.len(); s } /// return the next code point and its byte size, without advancing the cursor // adapted from https://doc.rust-lang.org/src/core/str/validations.rs.html #[inline] fn peek_code_point(&self) -> Result<(u32, usize)> { let bytes = self.src.as_bytes(); if self.pos >= bytes.len() { return self.fail(Eof); } // As we start from an already verified UTF8 str, and a valid position, // we can safely assume the bytes here are consistent with an UTF8 string let x = bytes[self.pos]; if x < 128 { return Ok(((x as u32), 1)); } // Decode from a byte combination out of: [[[x y] z] w] let init = utf8_first_byte(x, 2); // SAFETY bytes assumed valid utf8 let y = unsafe { *bytes.get_unchecked(self.pos+1) }; let mut ch = utf8_acc_cont_byte(init, y); if x >= 0xE0 { // [[x y z] w] case // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid let z = unsafe { *bytes.get_unchecked(self.pos+2) }; let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); ch = init << 12 | y_z; if x >= 0xF0 { // [x y z w] case // use only the lower 3 bits of `init` let w = unsafe { *bytes.get_unchecked(self.pos+3) }; ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w); Ok((ch, 4)) } else { Ok((ch, 3)) } } else { Ok((ch, 2)) } } /// return the next byte (or an error on EOF). /// There's no guarantee the byte is a whole char #[inline] pub(crate) fn peek_byte(&self) -> Result { let bytes = self.src.as_bytes(); if self.pos >= bytes.len() { self.fail(Eof) } else { Ok(bytes[self.pos]) } } /// Return the next byte (at position pos). As it advances the cursor, /// caller MUST throw an error if the byte isn't a valid full character. #[inline] pub(crate) fn next_byte(&mut self) -> Result { let bytes = self.src.as_bytes(); if self.pos >= bytes.len() { self.fail(Eof) } else { let b = bytes[self.pos]; self.pos += 1; Ok(b) } } /// Look at the first character in the input without consuming it. #[inline] pub(crate) fn peek_char(&self) -> Result { self.peek_code_point() .map(|(code, _)| unsafe { char::from_u32_unchecked(code) }) } /// Consume the first character in the input. #[inline] pub(crate) fn next_char(&mut self) -> Result { let (code, len) = self.peek_code_point()?; self.pos += len; let ch = unsafe { char::from_u32_unchecked(code) }; Ok(ch) } /// read bytes_count bytes of a string. /// /// The validity of pos + bytes_count as a valid UTF8 position must /// have been checked before. #[inline] pub(crate) fn take_str(&mut self, bytes_count: usize) -> Result<&str> { if self.src.len() >= self.pos + bytes_count { let pos = self.pos; self.pos += bytes_count; Ok(&self.src[pos..pos + bytes_count]) } else { self.fail(Eof) } } /// if the next bytes are s, then advance its length and return true /// otherwise return false. /// We do a comparison with a &[u8] to avoid the risk of trying read /// at arbitrary positions and fall between valid UTF8 positions #[inline] pub(crate) fn try_read(&mut self, s: &[u8]) -> bool { #[allow(clippy::collapsible_if)] if self.src.len() >= self.pos + s.len() { if &self.src.as_bytes()[self.pos..self.pos + s.len()] == s { self.pos += s.len(); return true; } } false } /// return the `len` first bytes of the input, without checking anything /// (assuming it has been done) nor consuming anything #[inline] pub(crate) fn start(&self, len: usize) -> &'de str { &self.src[self.pos..self.pos + len] } /// remove the next character (which is assumed to be ch) #[inline] pub(crate) fn drop(&mut self, ch: char) { self.advance(ch.len_utf8()); } /// advance the cursor (assuming bytes_count is consistent with chars) #[inline] pub(crate) fn advance(&mut self, bytes_count: usize) { self.pos += bytes_count; } /// tells whether the next tree bytes are `'''` which /// is the start or end of a multiline string literal in Hjson #[inline] fn is_at_triple_quote(&self) -> bool { self.src.len() >= self.pos + 3 && &self.src[self.pos..self.pos + 3] == "'''" } #[inline] fn eat_line(&mut self) -> Result<()> { self.accept_quoteless_value = true; let bytes = self.src.as_bytes(); unsafe { for i in self.pos..bytes.len() { if *bytes.get_unchecked(i) == b'\n' { self.advance(i - self.pos + 1); return Ok(()); } } } self.fail(Eof) } #[inline] pub(crate) fn eat_until_star_slash(&mut self) -> Result<()> { match self.input().find("*/") { Some(len) => { self.advance(len + 2); Ok(()) } None => self.fail(Eof), } } /// consume spaces, new lines, comments, and stop before /// first interesting char #[inline] pub(crate) fn eat_shit(&mut self) -> Result<()> { let mut last_is_slash = false; loop { match self.peek_byte()? { b'#' => { self.eat_line()?; last_is_slash = false; } b'*' => { if last_is_slash { self.eat_until_star_slash()?; } else { self.advance(1); } last_is_slash = false; } b'/' => { if last_is_slash { self.eat_line()?; last_is_slash = false; } else { self.advance(1); last_is_slash = true; } } b'\n' => { self.accept_quoteless_value = true; self.advance(1); last_is_slash = false; } b' ' | b'\t'| b'\x0C' | b'\r' => { // Hjson whitespaces self.advance(1); last_is_slash = false; } _ => { if last_is_slash { // we don't consume the /: it's the start of a string self.pos -= 1; } return Ok(()); } } } } pub(crate) fn eat_shit_and(&mut self, mut including: Option) -> Result<()> { let mut last_is_slash = false; loop { let ch = self.peek_char()?; match ch { '#' => { self.eat_line()?; last_is_slash = false; } '*' => { if last_is_slash { self.eat_until_star_slash()?; } else { self.advance(1); } last_is_slash = false; } '/' => { if last_is_slash { self.eat_line()?; last_is_slash = false; } else { self.advance(1); last_is_slash = true; } } '\n' => { self.accept_quoteless_value = true; self.advance(1); last_is_slash = false; } _ if including == Some(ch) => { self.drop(ch); including = None; last_is_slash = false; } _ if ch.is_whitespace() => { self.drop(ch); last_is_slash = false; } _ => { if last_is_slash { self.pos -= 1; } return Ok(()); } } } } /// Parse the JSON identifier `true` or `false`. fn parse_bool(&mut self) -> Result { self.eat_shit()?; if self.try_read(b"true") { Ok(true) } else if self.try_read(b"false") { Ok(false) } else { self.fail(ExpectedBoolean) } } /// read the characters of the coming integer, without parsing the /// resulting string #[inline] fn read_integer(&mut self, unsigned: bool) -> Result<&'de str> { // parsing could be done in the same loop but then I would have // to handle overflow self.eat_shit()?; let bytes = self.src.as_bytes(); for (idx, b) in bytes.iter().skip(self.pos).enumerate() { match b { b'-' if unsigned => { return self.fail(ExpectedPositiveInteger); } b'-' if idx > 0 => { return self.fail(UnexpectedChar); } b'0'..=b'9' | b'-' => { // if it's too long, this will be handled at conversion } _ => { let s = self.start(idx); self.advance(idx); // we keep the last char return Ok(s); } } } Ok(self.take_all()) } /// read the characters of the coming floating point number, without parsing #[inline] fn read_float(&mut self) -> Result<&'de str> { self.eat_shit()?; let bytes = &self.src.as_bytes()[self.pos..]; for (idx, b) in bytes.iter().enumerate() { match b { b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E' => { // if it's invalid, this will be handled at conversion } _ => { let s = self.start(idx); self.advance(idx); // we keep the last char return Ok(s); } } } Ok(self.take_all()) } /// Parse a string until the next unescaped quote #[inline] fn parse_quoted_string(&mut self) -> Result { let mut s = String::new(); let starting_quote = self.next_char()?; loop { let mut c = self.next_char()?; if c == starting_quote { break; } else if c == '\\' { c = match self.next_byte()? { b'\"' => '\"', b'\'' => '\'', b'\\' => '\\', b'/' => '/', b'b' => '\x08', // why did they put this in JSON ? b'f' => '\x0c', // and this one ?! b'n' => '\n', b'r' => '\r', b't' => '\t', b'u' => { self.take_str(4).ok() .and_then(|s| u32::from_str_radix(s, 16).ok()) .and_then(std::char::from_u32) .ok_or_else(|| self.err(InvalidEscapeSequence))? } _ => { return self.fail(InvalidEscapeSequence); } }; } s.push(c); } Ok(s) } /// Parse a string until end of line fn parse_quoteless_str(&mut self) -> Result<&'de str> { for (idx, ch) in self.input().char_indices() { if ch == '\r' || ch == '\n' { let s = self.start(idx); self.advance(idx + 1); return Ok(s.trim_end()); } } Ok(self.take_all().trim_end()) } /// Parse a string until the next triple quote. fn parse_multiline_string(&mut self) -> Result { let indent = self.col() - 1; self.advance(3); // consume the triple quote // if the multiline string starts on the same line // than the triple quote, we must ignore the leading // spaces loop { let b = self.peek_byte()?; match b { b'\n' => { self.advance(1); break; } b' ' | b'\t'| b'\x0C' | b'\r' => { self.advance(1); } _ => { break; } } } // we then loop on lines let mut v = String::new(); let mut rem = indent; // the number of leading spaces we remove while let Ok(ch) = self.next_char() { match ch { '\'' if self.src.as_bytes()[self.pos] == b'\'' && self.src.as_bytes()[self.pos+1] == b'\'' => { self.advance(2); // the 2 other quotes v.truncate(v.trim_end_matches(|c| c=='\n' || c=='\r').len()); // trimming \n at end return Ok(v); } '\n' => { v.push(ch); rem = indent; } '\r' => { // a \r not followed by a \n is probably not // valid but I'm not sure an error would be // more useful here than silently ignoring it } ' ' | '\t'| '\x0C' => { if rem > 0 { rem -= 1; } else { v.push(ch); } } _ => { rem = 0; v.push(ch); } } } self.fail(Eof) // it's not legal to not have the triple quotes } /// Parse an identifier without quotes: /// - map key /// - enum variant fn parse_quoteless_identifier(&mut self) -> Result<&'de str> { self.eat_shit()?; for (idx, ch) in self.input().char_indices() { match ch { ',' | '[' | ']' | '{' | '}' | ':' | '\r'| '\n' => { let s = self.start(idx); self.advance(idx); return Ok(s); } ' ' | '\t' => { let s = self.start(idx); self.advance(idx + 1); return Ok(s); } _ => {} } } Ok(self.take_all()) } /// parse a string which may be a value /// (i.e. not an map key or variant identifier ) fn parse_string_value(&mut self) -> Result { self.eat_shit()?; let b = self.peek_byte()?; let v = match b { b',' | b':' | b'[' | b']' | b'{' | b'}' => self.fail(UnexpectedChar), b'\'' if self.is_at_triple_quote() => self.parse_multiline_string(), b'"' | b'\'' => self.parse_quoted_string(), _ => (if self.accept_quoteless_value { self.parse_quoteless_str() } else { self.parse_quoteless_identifier() }) .map(|s| s.to_string()), }; self.accept_quoteless_value = true; v } #[inline] fn parse_identifier(&mut self) -> Result { self.eat_shit()?; let b = self.peek_byte()?; // we set accept_quoteless_value to true so that a quoteless // string can be accepted *after* the current identifier self.accept_quoteless_value = true; let r = match b { b',' | b':' | b'[' | b']' | b'{' | b'}' => self.fail(UnexpectedChar), b'"' | b'\'' => self.parse_quoted_string(), _ => self.parse_quoteless_identifier().map(|s| s.to_string()) }; r } /// Braceless Hjson: same than usual but not within { and }, /// can only be for the whole document fn deserialize_braceless_map(&mut self, visitor: V) -> Result where V: Visitor<'de>, { let mut map_reader = MapReader::braceless(self); map_reader.braceless = true; let value = match visitor.visit_map(map_reader) { Ok(v) => v, Err(e) => { return self.cook_err(e); } }; Ok(value) } } impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { type Error = Error; fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.eat_shit()?; match self.peek_byte()? { b'"' | b'\'' => self.deserialize_string(visitor), b'0'..=b'9' | b'-' => { let number = Number::read(self)?; number.visit(self, visitor) } b'[' => self.deserialize_seq(visitor), b'{' => self.deserialize_map(visitor), _ => { if self.try_read(b"null") { return visitor.visit_none(); } if self.try_read(b"true") { return visitor.visit_bool(true); } if self.try_read(b"false") { return visitor.visit_bool(false); } let s = self.parse_string_value()?; visitor.visit_string(s) } } } fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_bool(self.parse_bool()?) } fn deserialize_i8(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(false) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedI8)))?; visitor.visit_i8(v) } fn deserialize_i16(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(false) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedI16)))?; visitor.visit_i16(v) } fn deserialize_i32(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(false) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedI32)))?; visitor.visit_i32(v) } fn deserialize_i64(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(false) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedI64)))?; visitor.visit_i64(v) } fn deserialize_u8(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(true) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedU8)))?; visitor.visit_u8(v) } fn deserialize_u16(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(true) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedU16)))?; visitor.visit_u16(v) } fn deserialize_u32(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(true) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedU32)))?; visitor.visit_u32(v) } fn deserialize_u64(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_integer(true) .and_then(|s| s.parse().map_err(|_| self.err(ExpectedU64)))?; visitor.visit_u64(v) } fn deserialize_f32(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_float() .and_then(|s| s.parse().map_err(|_| self.err(ExpectedF32)))?; visitor.visit_f32(v) } fn deserialize_f64(self, visitor: V) -> Result where V: Visitor<'de>, { let v = self .read_float() .and_then(|s| s.parse().map_err(|_| self.err(ExpectedF64)))?; visitor.visit_f64(v) } fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { let c = self .parse_string_value() .and_then(|s| s.chars().next().ok_or_else(|| self.err(ExpectedSingleChar)))?; visitor.visit_char(c) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { // we can't always borrow strs from the source as it's not possible // when there's an escape sequence. So str are parsed as strings. self.deserialize_string(visitor) } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_string(self.parse_string_value()?) } fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { self.eat_shit()?; if self.try_read(b"null") { visitor.visit_none() } else { visitor.visit_some(self) } } // In Serde, unit means an anonymous value containing no data. fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { self.eat_shit()?; if self.try_read(b"null") { visitor.visit_unit() } else { self.fail(ExpectedNull) } } // Unit struct means a named value containing no data. fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result where V: Visitor<'de>, { self.eat_shit()?; visitor.visit_newtype_struct(self) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { self.eat_shit()?; if self.next_byte()? == b'[' { let value = visitor.visit_seq(SeqReader::new(self))?; if self.next_byte()? == b']' { Ok(value) } else { self.fail(ExpectedArrayEnd) } } else { self.fail(ExpectedArray) } } fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } fn deserialize_tuple_struct( self, _name: &'static str, _len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de>, { let on_start = self.pos == 0; if let Err(e) = self.eat_shit() { if on_start && e.is_eof() { return self.deserialize_braceless_map(visitor); } else { return Err(e); } } if self.peek_byte()? == b'{' { self.advance(1); let value = match visitor.visit_map(MapReader::within_braces(self)) { Ok(v) => v, Err(e) => { return self.cook_err(e); } }; self.eat_shit()?; if self.next_byte()? == b'}' { Ok(value) } else { self.fail(ExpectedMapEnd) } } else if on_start { self.deserialize_braceless_map(visitor) } else { self.fail(ExpectedMap) } } fn deserialize_struct( self, _name: &'static str, _fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_map(visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.eat_shit()?; match self.peek_byte()? { b'"' | b'\'' => { // Visit a unit variant. visitor.visit_enum(self.parse_quoted_string()?.into_deserializer()) } b'{' => { self.advance(1); // Visit a newtype variant, tuple variant, or struct variant. let value = visitor.visit_enum(EnumReader::new(self))?; self.eat_shit()?; if self.next_byte()? == b'}' { Ok(value) } else { self.fail(ExpectedMapEnd) } } _ => { visitor.visit_enum(self.parse_quoteless_identifier()?.into_deserializer()) } } } fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_string(self.parse_identifier()?) } fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_any(visitor) } } deser-hjson-2.2.4/src/de_enum.rs000064400000000000000000000051411046102023000146240ustar 00000000000000use { crate::{ de::Deserializer, error::{Error, ErrorCode::*, Result}, }, serde::de::{self, DeserializeSeed, EnumAccess, VariantAccess, Visitor}, }; pub struct EnumReader<'a, 'de: 'a> { de: &'a mut Deserializer<'de>, } impl<'a, 'de> EnumReader<'a, 'de> { pub fn new(de: &'a mut Deserializer<'de>) -> Self { EnumReader { de } } } // `EnumAccess` is provided to the `Visitor` to give it the ability to determine // which variant of the enum is supposed to be deserialized. // // Note that all enum deserialization methods in Serde refer exclusively to the // "externally tagged" enum representation. impl<'de, 'a> EnumAccess<'de> for EnumReader<'a, 'de> { type Error = Error; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> where V: DeserializeSeed<'de>, { // The `deserialize_enum` method parsed a `{` character so we are // currently inside of a map. The seed will be deserializing itself from // the key of the map. let val = seed.deserialize(&mut *self.de)?; self.de.eat_shit()?; if self.de.next_byte()? == b':' { Ok((val, self)) } else { self.de.fail(ExpectedMapColon) } } } // `VariantAccess` is provided to the `Visitor` to give it the ability to see // the content of the single variant that it decided to deserialize. impl<'de, 'a> VariantAccess<'de> for EnumReader<'a, 'de> { type Error = Error; // If the `Visitor` expected this variant to be a unit variant, the input // should have been the plain string case handled in `deserialize_enum`. fn unit_variant(self) -> Result<()> { self.de.fail(ExpectedString) } // Newtype variants are represented in JSON as `{ NAME: VALUE }` so // deserialize the value here. fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self.de) } // Tuple variants are represented in JSON as `{ NAME: [DATA...] }` so // deserialize the sequence of data here. fn tuple_variant(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { de::Deserializer::deserialize_seq(self.de, visitor) } // Struct variants are represented in JSON as `{ NAME: { K: V, ... } }` so // deserialize the inner map here. fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result where V: Visitor<'de>, { de::Deserializer::deserialize_map(self.de, visitor) } } deser-hjson-2.2.4/src/de_map.rs000064400000000000000000000050001046102023000144270ustar 00000000000000use { crate::{ de::Deserializer, error::{Error, ErrorCode::*, Result}, }, serde::de::{DeserializeSeed, MapAccess}, }; pub struct MapReader<'a, 'de: 'a> { de: &'a mut Deserializer<'de>, /// if braceless is true, the map may be closed by an eof instead of a '}' pub braceless: bool, } impl<'a, 'de> MapReader<'a, 'de> { pub fn braceless(de: &'a mut Deserializer<'de>) -> Self { MapReader { de, braceless: true } } pub fn within_braces(de: &'a mut Deserializer<'de>) -> Self { MapReader { de, braceless: false } } } // `MapAccess` is provided to the `Visitor` to give it the ability to iterate // through entries of the map. impl<'de, 'a> MapAccess<'de> for MapReader<'a, 'de> { type Error = Error; /// read a map key and the following colon fn next_key_seed(&mut self, seed: K) -> Result> where K: DeserializeSeed<'de>, { if let Err(e) = self.de.eat_shit_and(Some(',')) { if !self.braceless || !e.is_eof() { return Err(e); } } match self.de.peek_byte() { Ok(b'}') => { return Ok(None); } Err(e) => { if e.is_eof() && self.braceless { return Ok(None); } else { return Err(e); } } _ => {} } // Here's there's a problem: if the key is a string it should be // parsed as an identifier but serde will call deserialize_string. // The problem here is that I thus can't accept colons in quoteless // strings, even when not in a identifier location :\ self.de.accept_quoteless_value = false; let v = seed.deserialize(&mut *self.de)?; self.de.eat_shit()?; if self.de.next_byte()? == b':' { Ok(Some(v)) } else { self.de.fail(ExpectedMapColon) } } /// read a map value and eat the optional comma which may follow it fn next_value_seed(&mut self, seed: V) -> Result where V: DeserializeSeed<'de>, { self.de.eat_shit()?; match seed.deserialize(&mut *self.de) { Err(e) => self.de.cook_err(e), Ok(v) => { if let Err(e) = self.de.eat_shit_and(Some(',')) { if !self.braceless || !e.is_eof() { return Err(e); } } Ok(v) } } } } deser-hjson-2.2.4/src/de_number.rs000064400000000000000000000042641046102023000151550ustar 00000000000000use { crate::{ de::Deserializer, error::{ErrorCode::*, Result}, }, serde::de::Visitor, }; /// an intermediate representation of number which /// are read into undefinite types pub(crate) struct Number<'de> { negative: bool, s: &'de str, has_float_chars: bool, } impl<'de> Number<'de> { /// read the characters of the coming floating point number, without parsing. /// The sign at the start is assumed to have been already read pub fn read<'a>( de: &'a mut Deserializer<'de>, ) -> Result { de.eat_shit()?; let mut negative = false; let mut has_float_chars = false; for (idx, ch) in de.input().char_indices() { match ch { '0'..='9' => { } '-' if idx == 0 => { negative = true; } '-' | '+' | '.' | 'e' | 'E' => { has_float_chars = true; } _ => { let s = de.start(idx); de.advance(idx); // we keep the last char return Ok(Self { negative, s, has_float_chars }); } } } let s = de.take_all(); Ok(Self { negative, s, has_float_chars }) } /// deserialize into a relevant number type pub fn visit<'a, V>( &self, de: &'a mut Deserializer<'de>, visitor: V, ) -> Result where V: Visitor<'de>, { if self.has_float_chars { // this is a floating point number (or an error) let v: f64 = self.s.parse() .map_err(|_| de.err(ExpectedF64))?; visitor.visit_f64(v) } else if self.negative { // this is a negative integer (or an error) let v: i64 = self.s.parse() .map_err(|_| de.err(ExpectedI64))?; visitor.visit_i64(v) } else { // this is a positive integer (or a number) let v: u64 = self.s.parse() .map_err(|_| de.err(ExpectedU64))?; visitor.visit_u64(v) } } } deser-hjson-2.2.4/src/de_seq.rs000064400000000000000000000020301046102023000144420ustar 00000000000000use { crate::{ de::Deserializer, error::{Error, Result}, }, serde::de::{DeserializeSeed, SeqAccess}, }; /// an implementation of serde's SeqAccess interface which /// is used to deserialize arrays pub struct SeqReader<'a, 'de: 'a> { de: &'a mut Deserializer<'de>, } impl<'a, 'de> SeqReader<'a, 'de> { pub fn new(de: &'a mut Deserializer<'de>) -> Self { SeqReader { de } } } // `SeqAccess` is provided to the `Visitor` to give it the ability to iterate // through elements of the sequence. impl<'de, 'a> SeqAccess<'de> for SeqReader<'a, 'de> { type Error = Error; /// read an array item and eat the optional comma which may follow it fn next_element_seed(&mut self, seed: T) -> Result> where T: DeserializeSeed<'de>, { self.de.eat_shit()?; if self.de.peek_byte()? == b']' { return Ok(None); } let v = seed.deserialize(&mut *self.de)?; self.de.eat_shit_and(Some(','))?; Ok(Some(v)) } } deser-hjson-2.2.4/src/error.rs000064400000000000000000000053721046102023000143470ustar 00000000000000use { serde::de, std::{ fmt, io, str::Utf8Error, }, }; pub type Result = std::result::Result; /// The types of errors which can happen in our code /// during deserialization #[derive(Debug, Clone, PartialEq)] pub enum ErrorCode { Eof, ExpectedBoolean, ExpectedInteger, ExpectedI8, ExpectedI16, ExpectedI32, ExpectedI64, ExpectedU8, ExpectedU16, ExpectedU32, ExpectedU64, ExpectedF32, ExpectedF64, ExpectedPositiveInteger, ExpectedString, ExpectedNull, ExpectedArray, ExpectedArrayComma, ExpectedArrayEnd, ExpectedMap, ExpectedMapColon, ExpectedMapComma, ExpectedMapEnd, ExpectedEnum, ExpectedSingleChar, InvalidEscapeSequence, TrailingCharacters, UnexpectedChar, } #[derive(Debug)] #[non_exhaustive] pub enum Error { /// a Hjson syntax error raised in our code, /// with location Syntax { line: usize, col: usize, // in chars (tab is one char) code: ErrorCode, at: String, // next few chars }, /// A Serde error, with approximate location Serde { line: usize, col: usize, // in chars (tab is one char) message: String, }, /// a raw Serde error. We should try to /// convert them to Serde located errors as /// much as possible RawSerde(String), /// an UTF8 error, raised when using from_slice /// with an invalid UTF8 slice Utf8(Utf8Error), /// an IO error, raised when using from_reader Io(io::Error), } impl Error { pub fn is_eof(&self) -> bool { matches!(self, Error::Syntax { code: ErrorCode::Eof, .. }) } } impl de::Error for Error { fn custom(msg: T) -> Self { Error::RawSerde(msg.to_string()) } } impl From for Error { fn from(source: Utf8Error) -> Self { Self::Utf8(source) } } impl From for Error { fn from(source: io::Error) -> Self { Self::Io(source) } } impl fmt::Display for Error { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { match self { Self::Syntax { line, col, code, at } => { write!(formatter, "{:?} at {}:{} at {:?}", code, line, col, at) } Self::Serde { line, col, message } => { write!(formatter, "{:?} near {}:{}", message, line, col) } Self::RawSerde(msg) => { write!(formatter, "error message: {:?}", msg) } Self::Utf8(source) => { source.fmt(formatter) } Self::Io(source) => { source.fmt(formatter) } } } } impl std::error::Error for Error {} deser-hjson-2.2.4/src/lib.rs000064400000000000000000000064521046102023000137640ustar 00000000000000/*! A Serde deserializer for Hjson ``` use { deser_hjson::*, serde::Deserialize, std::collections::HashMap, }; // This example comes from https://hjson.github.io/ let hjson = r#" // use #, // or /**/ for comments, // omit quotes for keys key: 1 // omit quotes for strings contains: everything on this line // omit commas at the end of a line cool: { foo: 1 bar: 2 } // allow trailing commas list: [ 1, 2, ] // and use multiline strings realist: ''' My half empty glass, I will fill your empty half. Now you are half full. ''' "#; // we'll deserialize it into this struct: #[derive(Deserialize, PartialEq, Debug)] struct Example { key: i32, contains: Option, cool: HashMap, list: Vec, realist: String, missing: Option, } let mut cool = HashMap::new(); cool.insert("foo".to_owned(), 1); cool.insert("bar".to_owned(), 2); let expected = Example { key: 1, contains: Some("everything on this line".to_owned()), cool, list: vec![1, 2], realist: "My half empty glass,\nI will fill your empty half.\nNow you are half full.".to_owned(), missing: None, }; assert_eq!(expected, from_str(hjson).unwrap()); ``` */ mod de; mod de_enum; mod de_map; mod de_number; mod de_seq; mod error; mod utf8; pub use error::*; /// Deserialize an instance of type `T` from a reader of Hjson text /// /// # Example /// /// ``` /// use serde::Deserialize; /// use std::io::Cursor; /// /// #[derive(Deserialize, Debug)] /// struct User { /// fingerprint: String, /// location: String, /// } /// /// // The type of `j` is `Cursor` which implements the `Read` trait /// let j = Cursor::new(" /// fingerprint: 0xF9BA143B95FF6D82 /// location: Menlo Park, CA /// "); /// /// let u: User = deser_hjson::from_reader(j).unwrap(); /// println!("{:#?}", u); /// ``` pub fn from_reader(mut reader: R) -> Result where R: std::io::Read, T: serde::de::DeserializeOwned, { let mut buf = Vec::new(); reader.read_to_end(&mut buf)?; from_slice(&buf) } /// Deserialize an instance of type `T` from bytes of Hjson text /// /// # Example /// /// ``` /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug)] /// struct User { /// fingerprint: String, /// location: String, /// } /// /// // The type of `j` is `&[u8]` /// let j = b" /// fingerprint: 0xF9BA143B95FF6D82 /// location: Menlo Park, CA /// "; /// /// let u: User = deser_hjson::from_slice(j).unwrap(); /// println!("{:#?}", u); /// ``` pub fn from_slice(bytes: &[u8]) -> Result where T: serde::de::DeserializeOwned, { let s = std::str::from_utf8(bytes)?; from_str(s) } /// Deserialize an instance of type `T` from a string of Hjson text /// /// # Example /// /// ``` /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug)] /// struct User { /// hands: Option, /// location: String, /// } /// /// // The type of `j` is `&str` /// let j = " /// hands: 2 /// location: Menlo Park, CA /// "; /// /// let u: User = deser_hjson::from_str(j).unwrap(); /// println!("{:#?}", u); /// ``` pub fn from_str(s: &str) -> Result where T: serde::de::DeserializeOwned, { let mut deserializer = de::Deserializer::from_str(s); let t = T::deserialize(&mut deserializer)?; deserializer.check_all_consumed()?; Ok(t) } deser-hjson-2.2.4/src/utf8.rs000064400000000000000000000012671046102023000141030ustar 00000000000000//! functions taken, without change, from //! /// Returns the initial codepoint accumulator for the first byte. /// The first byte is special, only want bottom 5 bits for width 2, 4 bits /// for width 3, and 3 bits for width 4. #[inline] pub(crate) const fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 } /// Returns the value of `ch` updated with continuation byte `byte`. #[inline] pub(crate) const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 } /// Mask of the value bits of a continuation byte. pub(crate) const CONT_MASK: u8 = 0b0011_1111; deser-hjson-2.2.4/tests/array.rs000064400000000000000000000021211046102023000146740ustar 00000000000000#[macro_use] mod common; #[test] fn array() { let arr: Vec = deser_hjson::from_str("[]").unwrap(); assert_eq!(arr, vec![]); let arr: Vec = deser_hjson::from_str("[5, 3]").unwrap(); assert_eq!(arr, vec![5, 3]); let arr: Vec = deser_hjson::from_str(" [ 5 ,\n 3 ] ").unwrap(); assert_eq!(arr, vec![5, 3]); // A quoteless string goes til the end of the line. // It means than a string in an array must either be quoted // or go til the end of the line. The following array contains // only one element. I'm not making the spec :( let arr: Vec = deser_hjson::from_str(r#" [a, 3] // not a comment ] "# ).unwrap(); assert_eq!(arr, vec!["a, 3] // not a comment"]); // Another consequence of the quoteless string going til the end of the // line: the ']' is part of the string, and the array isn't closed assert!(deser_hjson::from_str::>(r#"[abc, def]"#).is_err()); let arr: Vec = deser_hjson::from_str(r#"["abc", "def"]"#).unwrap(); assert_eq!(arr, vec!["abc", "def"]); } deser-hjson-2.2.4/tests/bad_format.rs000064400000000000000000000004351046102023000156620ustar 00000000000000 #[macro_use] mod common; /// check we fail when the data is invalid #[test] fn test_bad_format() { assert!(deser_hjson::from_str::("-1").is_err()); assert!(deser_hjson::from_str::("1e-3").is_err()); assert!(deser_hjson::from_str::("1e-3e-5").is_err()); } deser-hjson-2.2.4/tests/braceless.rs000064400000000000000000000015231046102023000155260ustar 00000000000000use { serde::{ Deserialize, }, }; #[macro_use] mod common; /// check we support braceless Hjson #[test] fn test_braceless() { #[derive(Debug, Deserialize)] struct T { field: Option, } fn check(hjson: &str, field: Option<&str>) { println!("checking {hjson:?}"); let t = deser_hjson::from_str::(hjson).unwrap(); assert_eq!(t.field, field.map(|s| s.to_string())); } check("{}", None); check(r#"{field:"value"}"#, Some("value")); check(r#"field:"value""#, Some("value")); check(r#"field:value"#, Some("value")); check( r#" field: value useless: line "#, Some("value") ); check(" ", None); check("", None); check( r#" // just some comments "#, None, ); } deser-hjson-2.2.4/tests/common/mod.rs000064400000000000000000000011331046102023000156270ustar 00000000000000#![allow(unused_macros)] // allows writing vo!["a", "b"] to build a vec of strings macro_rules! vo { ($($item:literal),* $(,)?) => {{ #[allow(unused_mut)] let mut vec = Vec::new(); $( vec.push($item.to_owned()); )* vec }} } // allows writing mo!{"a":"b", "c":"d"} to build a map of strings to strings macro_rules! mo { ($($key:literal:$value:literal),* $(,)?) => {{ #[allow(unused_mut)] let mut map = HashMap::new(); $( map.insert($key.to_owned(), $value.to_owned()); )* map }} } deser-hjson-2.2.4/tests/crlf.rs000064400000000000000000000033231046102023000145110ustar 00000000000000use { serde::Deserialize, }; #[macro_use] mod common; // check that CRLF are considered as LF #[test] fn test_crlf() { #[derive(Deserialize, PartialEq, Debug)] enum Enum { A, B, } #[derive(Deserialize, PartialEq, Debug)] struct InnerStruct { txt: String, val: Enum, } #[derive(Deserialize, PartialEq, Debug)] struct OuterStruct { int: i32, float: f64, seq: Vec, txt: Option, structs: Vec, } let hjson_lf = r#" { # Some comments int: 44, seq: [ "bla", // comments again ''' some multiline string ''' no comma ] float: 5.7, // comments too structs: [ { txt: "" val: "A" } { val: "B" txt: ''' also on three lines ''' } ] } "#; let hjson_crlf = hjson_lf.replace('\n', "\r\n"); let hjson_crlf = &hjson_crlf; fn check(os: &OuterStruct) { assert_eq!( os.seq, vo!["bla", "some\nmultiline\nstring", "no comma"], ); assert_eq!(os.int, 44); assert_eq!( os.structs[1].txt, "also on\nthree\nlines".to_owned(), ); } let crlf = deser_hjson::from_str::(hjson_crlf).unwrap(); let lf = deser_hjson::from_str::(hjson_lf).unwrap(); check(&crlf); check(&lf); assert_eq!( crlf, lf, ); } deser-hjson-2.2.4/tests/enum.rs000064400000000000000000000064731046102023000145400ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, }; #[macro_use] mod common; #[test] fn test_enum() { #[derive(Deserialize, PartialEq, Debug)] enum E { Unit, Newtype(u32), Tuple(u32, u32), Struct { a: u32 }, } let j = r#""Unit""#; let expected = E::Unit; assert_eq!(expected, from_str(j).unwrap()); let j = r#"{Newtype:1}"#; let expected = E::Newtype(1); assert_eq!(expected, from_str(j).unwrap()); let j = r#" { Tuple : [ # Tuple variant 1 2 ] } "#; let expected = E::Tuple(1, 2); assert_eq!(expected, from_str(j).unwrap()); let j = r#" { # this variant is explitely defined Struct: {a:1} }"#; let expected = E::Struct { a: 1 }; assert_eq!(expected, from_str(j).unwrap()); } #[test] fn test_quoteless_tag_variant() { #[derive(Deserialize, PartialEq, Debug)] enum E { A, B, } let hjson = "B\n"; assert_eq!(E::B, from_str(hjson).unwrap()); let hjson = "B"; assert_eq!(E::B, from_str(hjson).unwrap()); #[derive(Deserialize, PartialEq, Debug)] struct S { e: E, } let hjson = r#"{ e: B }"#; assert_eq!(S{e:E::B}, from_str(hjson).unwrap()); let hjson = r#"{"e": "B"}"#; assert_eq!(S{e:E::B}, from_str(hjson).unwrap()); let hjson = "{e:B}"; assert_eq!(S{e:E::B}, from_str(hjson).unwrap()); } #[test] fn test_arr_struct_untagged() { // this enum is untagged: the variant is automatically recognized #[derive(Deserialize, PartialEq, Debug)] #[serde(untagged)] enum Untagged { Int(u16), Float(f32), String(String), Array(Vec), } #[derive(Deserialize, PartialEq, Debug)] struct InnerThing { name: String, untagged: Untagged, } #[derive(Deserialize, PartialEq, Debug)] struct OuterThing { outer_name: String, items: Vec, } let hjson = r#" { outer_name: the thing items: [ { name: first item untagged: "xterm -e \"nvim {file}\"" } { name: "also an \"item\"" untagged: ["bla", "et", "bla"] } { name: third untagged: 4 } { name: fourth untagged: 4.3 } ] } "#; let outer_thing = OuterThing { outer_name: "the thing".to_owned(), items: vec![ InnerThing { name: "first item".to_owned(), untagged: Untagged::String("xterm -e \"nvim {file}\"".to_string()), }, InnerThing { name: r#"also an "item""#.to_owned(), untagged: Untagged::Array(vo!["bla", "et", "bla"]), }, InnerThing { name: "third".to_owned(), untagged: Untagged::Int(4), }, InnerThing { name: "fourth".to_owned(), untagged: Untagged::Float(4.3), }, ], }; assert_eq!(outer_thing, from_str::(hjson).unwrap()); } deser-hjson-2.2.4/tests/guess.rs000064400000000000000000000070421046102023000147130ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, }; #[macro_use] mod common; #[derive(Deserialize, PartialEq, Debug)] #[serde(untagged)] enum Guess { Bool(bool), U8(u8), I8(i8), U16(u16), I16(i16), U32(u32), I32(i32), U64(u64), I64(i64), F64(f64), Char(char), String(Option), U16Array(Vec), I16Array(Vec), StrArray(Vec), } fn string(s: &str) -> Guess { Guess::String(Some(s.to_owned())) } fn guess(hjson: &str, answer: Guess) { let guessed = from_str::(hjson) .unwrap_or_else(|e| panic!("Parsing failed for {:?} : {}", hjson, e)); if guessed != answer { panic!("Wrong guess for {:?} : guessed {:?} instead of {:?}", hjson, guessed, answer); } } #[derive(Deserialize, PartialEq, Debug)] struct WrappedGuess { gift: Guess, } fn guess_wrapped(hjson: &str, answer: Guess) { let wrapped = from_str::(hjson) .unwrap_or_else(|e| panic!("Parsing failed for {:?} : {}", hjson, e)); let guessed = wrapped.gift; if guessed != answer { panic!("Wrong guess for {:?} : guessed {:?} instead of {:?}", hjson, guessed, answer); } } /// test precise primitive type guessing. /// Note to users: be cautious with this, guessing types is /// dangerous as Hjson is inherently ambiguous. #[test] fn test_guess_type() { guess("false", Guess::Bool(false)); guess("-45", Guess::I8(-45)); guess("45", Guess::U8(45)); guess("453", Guess::U16(453)); guess("-15453", Guess::I16(-15453)); guess("39453", Guess::U16(39453)); guess("-39453", Guess::I32(-39453)); guess("139453", Guess::U32(139453)); guess("34359738368", Guess::U64(34359738368)); guess("-34359738368", Guess::I64(-34359738368)); guess("-34e3", Guess::F64(-34000.0)); guess("45.1", Guess::F64(45.1)); guess("a", Guess::Char('a')); guess("abcㅈ", string("abcㅈ")); guess("\"abc\"", string("abc")); guess("'abc'", string("abc")); guess("''", string("")); guess("\"\"", string("")); guess("null", Guess::String(None)); guess("[15, 50]", Guess::U16Array(vec![15, 50])); guess("[15, -50]", Guess::I16Array(vec![15, -50])); guess("[\"abc\"]", Guess::StrArray(vo!["abc"])); guess("[\"\"]", Guess::StrArray(vo![""])); } /// check a few tricky guesses, mostly the problems related /// to braces on the line of what looks like a quoteless string /// (see issue #3) #[test] fn test_wrapped_guess() { guess_wrapped("{gift:null}", Guess::String(None)); guess_wrapped("{gift:false}", Guess::Bool(false)); guess_wrapped("{gift: true}", Guess::Bool(true)); guess_wrapped("{gift:'bar'}", string("bar")); guess_wrapped("{ gift : 'bar' }", string("bar")); guess_wrapped(r#"{gift:"bar"}"#, string("bar")); guess_wrapped("{gift:42}", Guess::U8(42)); guess_wrapped("{gift: -2455}", Guess::I16(-2455)); guess_wrapped("{gift: -3.5e-48}", Guess::F64(-3.5e-48)); guess_wrapped(r#"{gift: [ " 34",] }"#, Guess::StrArray(vo![" 34"])); guess_wrapped( r#" { gift: [ "abc", "another string" and a third one (unquoted) ] }"#, Guess::StrArray(vo![ "abc", "another string", "and a third one (unquoted)", ]), ); guess_wrapped( r#" { gift: 55 # a comment }"#, Guess::U8(55), ); guess_wrapped( r#" { gift: false // comment }"#, Guess::Bool(false), ); } deser-hjson-2.2.4/tests/mix.rs000064400000000000000000000053011046102023000143560ustar 00000000000000use { serde::{ de::Error, Deserialize, Deserializer, }, std::collections::HashMap, }; #[macro_use] mod common; // this example tries to test all the hard things of Hjson #[test] fn test_struct() { #[derive(PartialEq, Debug)] enum Enum { A, B, } // read "a" or "A" as A and "b" or "B" as B impl<'de> Deserialize<'de> for Enum { fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de> { let s = String::deserialize(deserializer)?; let s = s.to_lowercase(); match s.as_ref() { "a" => Ok(Enum::A), "b" => Ok(Enum::B), _ => Err(D::Error::custom(format!("unrecognized enum variant: {:?}", s))), } } } #[derive(Deserialize, PartialEq, Debug)] struct Test { int: i32, float: f64, txt1: Option, txt2: Option, txt3: String, seq: Vec, enum_map: HashMap, numbers1: Vec, numbers2: Vec, } let hjson = r#" { # Hjson accepts several types of comments. /** * even the ugly java ones! * @WhatAmIDoingHere */ // quotes around keys are optional "int": -1 # this comment goes to end of line txt2: a quoteless string : with a colon! txt3: ''' you can have multiline strings and they're free of unexpected spacing ''' // Hjson accepts trailing commas seq : [ another quoteless string "b1\nb2", "c", ] enum_map: { "some key" : a "another key" : B } # order of keys doesn't matter and you can # have a single value after a map float: -5.7 numbers1: [ 559999, 87, 45,], numbers2: [ -32 876 -111 582 ] } "#; let mut enum_map = HashMap::new(); enum_map.insert("some key".to_owned(), Enum::A); enum_map.insert("another key".to_owned(), Enum::B); let expected = Test { int: -1, float: -5.7, txt1: None, txt2: Some("a quoteless string : with a colon!".to_owned()), txt3: "you can have multiline strings\nand they're free of unexpected spacing".to_owned(), seq: vo!["another quoteless string", "b1\nb2", "c"], enum_map, numbers1: vec![559999, 87, 45], numbers2: vec![-32, 876, -111, 582], }; assert_eq!(expected, deser_hjson::from_str(hjson).unwrap()); } deser-hjson-2.2.4/tests/multiline_strings.rs000064400000000000000000000041761046102023000173450ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, }; #[macro_use] mod common; #[test] fn test_weird_multiline_strings() { #[derive(Deserialize, PartialEq, Debug)] struct W { a: String, b: String, c: String, d: String, e: String, f: String, } let hjson = r#"{ a: '''all on the key line''' b: '''line 1 line 2''' c: ''' line 1 line 2''' d: ''' line 1 line 2 line 3''' e: ''' line 1 line 2 ''' f: ''' line 1 line 2 line 3 ''' } "#; let value = W { a: "all on the key line".to_string(), b: "line 1\n line 2".to_string(), c: "line 1\nline 2".to_string(), d: "line 1\nline 2\nline 3".to_string(), e: " line 1\n line 2\n ".to_string(), f: "line 1\n line 2\nline 3".to_string(), }; assert_eq!(value, from_str(hjson).unwrap()); } /// check issue #19 https://github.com/Canop/deser-hjson/issues/19 #[test] fn issue_19() { #[derive(Deserialize, PartialEq, Debug)] struct W { title: String, repo: String, target: String, prefix: String, meta_image: String, description: String, utilities: Vec, } let hjson = r#" { title : CodeStage example repo : https://github.com/shi-yan/codestage # need to have the slash prefix : "" target : dist url : "http://localhost:8000" meta_image : meta.png description : '''CodeStage is a static site generator to build JS playground demos.''' utilities : [ "khronos", "tdl" ] } "#; let value = W { title : "CodeStage example".to_string(), repo : "https://github.com/shi-yan/codestage".to_string(), prefix : "".to_string(), target : "dist".to_string(), meta_image : "meta.png".to_string(), description : "CodeStage is a static site generator to build JS playground demos.".to_string(), utilities : vo!["khronos", "tdl"], }; assert_eq!(value, from_str(hjson).unwrap()); } deser-hjson-2.2.4/tests/quoteless-key.rs000064400000000000000000000015171046102023000164000ustar 00000000000000use { std::{ collections::HashMap, fmt::Write, }, }; #[macro_use] mod common; #[test] fn test_quoteless_key() { // Build a hjson like this: // s1:s1 // s2:s2 // and check that it parses as a map even when // the si strings contain special characters let strings = [ "this-one-is-easy", r#"@?;'"\/."#, // see https://github.com/Canop/deser-hjson/issues/9 "abcd", "l'éléphant", "a=\"a\"", "z''''''", "こんにちわ", ]; let mut hjson = String::new(); for s in strings { writeln!(&mut hjson, "{}:{}", s, s).unwrap(); } println!("Hjson:\n{}", &hjson); let map: HashMap = deser_hjson::from_str(&hjson).unwrap(); for s in strings { assert_eq!(map.get(s).unwrap(), s); } } deser-hjson-2.2.4/tests/reader.rs000064400000000000000000000006171046102023000150300ustar 00000000000000use { deser_hjson::from_reader, serde:: Deserialize, }; #[macro_use] mod common; #[test] fn test_reader() { #[derive(Deserialize, PartialEq, Debug)] struct Test { a: i32, b: String, } let hjson = br#"{ a: 1, b: "2" }"#; let expected = Test { a: 1, b: "2".to_string(), }; assert_eq!(expected, from_reader(&hjson[..]).unwrap()); } deser-hjson-2.2.4/tests/serde-error.rs000064400000000000000000000027051046102023000160170ustar 00000000000000use serde::Deserialize; #[macro_use] mod common; /// this test checks we're converting serde message errors /// to errors with some (approximate) position #[test] fn test_no_raw_serde_error() { #[derive(Deserialize, PartialEq, Debug)] struct Data { name: String, pos: Vec, } #[derive(Deserialize, PartialEq, Debug)] enum Pos { Int(u8), Point(u8, u8), } let hjson_strings = vec![ r#"{}"#, r#"[]"#, r#""#, r#"a"#, r#"{name: "albert", name: "alfred"}"#, r#"{pos: "not a pos"}"#, r#"{name: "", pos: [{3}]}"#, r#"{name: "", pos: [{(3, 4)}]}"#, ]; for hjson in &hjson_strings { match deser_hjson::from_str::(hjson) { Ok(_) => { panic!("Unexpected Success deserializing {:?}", hjson); } Err(e@deser_hjson::Error::RawSerde(_)) => { panic!("Unexpected Raw Serde Error: {:?}", e); } Err(deser_hjson::Error::Serde{..}) => {}, Err(deser_hjson::Error::Syntax{..}) => {}, Err(e@deser_hjson::Error::Utf8(_)) => { panic!("Unexpected Utf8 Error: {:?}", e); } Err(e@deser_hjson::Error::Io(_)) => { panic!("Unexpected Io Error: {:?}", e); } Err(e) => { panic!("Unexpected Unknown Error: {:?}", e); } } } } deser-hjson-2.2.4/tests/spacing.rs000064400000000000000000000023201046102023000152030ustar 00000000000000use { serde::Deserialize, }; #[macro_use] mod common; // look for problems with tab spacing #[test] fn test_struct() { #[derive(Deserialize, PartialEq, Debug)] struct Verb { key: String, execution: String, } let hjson = r#" { key : up execution : ":line_up_no_cycle" // there's a tab before the colon } "#; let expected = Verb { key: "up".to_string(), execution: ":line_up_no_cycle".to_string(), }; assert_eq!(expected, deser_hjson::from_str(hjson).unwrap()); } // https://github.com/Canop/deser-hjson/issues/18 #[test] fn test_bool_after_whitespace() { #[derive(Deserialize, PartialEq, Debug)] struct Bool { ping: bool, } let b: Bool = deser_hjson::from_str("ping:true\n ").unwrap(); assert_eq!(b, Bool { ping: true }); let b: Bool = deser_hjson::from_str(r#"ping: true"#).unwrap(); assert_eq!(b, Bool { ping: true }); } /// cf https://github.com/Canop/deser-hjson/issues/20 #[test] fn issue_20() { #[derive(Deserialize, PartialEq, Debug)] struct Thing { dirs: Vec, } let hjson = "{\n dirs: [\n ]\n}"; let _: Thing = deser_hjson::from_str(hjson).unwrap(); } deser-hjson-2.2.4/tests/strings.rs000064400000000000000000000027421046102023000152600ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, std::collections::HashMap, }; #[macro_use] mod common; #[test] fn test_string() { #[derive(Deserialize, PartialEq, Debug)] struct W { c: String, } assert_eq!(W{c:"test".to_string()}, from_str("{c:test\n}").unwrap()); assert_eq!(W{c:"test".to_string()}, from_str("{c:\"test\"}").unwrap()); assert_eq!(W{c:"test".to_string()}, from_str("{c:'test'}").unwrap()); assert_eq!(W{c:"test".to_string()}, from_str("{'c':'test'}").unwrap()); assert_eq!(W{c:"test".to_string()}, from_str("{\"c\":'test'}").unwrap()); assert_eq!( W {c:"xterm -e \"vi /some/path\"".to_string()}, from_str(r#"{ c: "xterm -e \"vi /some/path\"" }"#).unwrap(), ); assert_eq!(W{c:"\x0C\x0C".to_string()}, from_str("{c:\"\\f\\u000C\"}").unwrap()); } #[test] fn test_weird_map_keys() { #[derive(Deserialize, PartialEq, Debug)] struct W { map: HashMap, } let hjson = r#"{ map: { : 0 // π: 3.14 τ: 6.28 'τ/2': π /: slash // hard one \: "" // no trap here } }"#; let value = W { map: mo!{ "": "0", "τ": "6.28", "τ/2": "π", "/": "slash // hard one", // quoteless string values go til line end "\\": "", }, }; assert_eq!(value, from_str(hjson).unwrap()); } deser-hjson-2.2.4/tests/trailing_chars.rs000064400000000000000000000017671046102023000165660ustar 00000000000000use { serde::{ Deserialize, }, }; #[macro_use] mod common; /// check we fixed the bug #1 /// ("1" was resulting in an EOF) #[test] fn test_dont_need_trailing_spaces() { #[derive(Debug, Deserialize)] struct T {} deser_hjson::from_str::("{}").unwrap(); deser_hjson::from_str::("1").unwrap(); deser_hjson::from_str::("1e-3").unwrap(); deser_hjson::from_str::("-1.3").unwrap(); deser_hjson::from_str::>("[]").unwrap(); } #[test] fn test_accept_trailing_spaces() { #[derive(Debug, Deserialize)] struct T {} deser_hjson::from_str::("{} ").unwrap(); deser_hjson::from_str::("1 ").unwrap(); deser_hjson::from_str::("1e-3 ").unwrap(); } #[test] fn test_choke_on_trailing_chars() { #[derive(Debug, Deserialize)] struct T {} assert!(deser_hjson::from_str::("{} e ").is_err()); assert!(deser_hjson::from_str::("1 -").is_err()); assert!(deser_hjson::from_str::("1e-3 e").is_err()); } deser-hjson-2.2.4/tests/trim_quoteless.rs000064400000000000000000000021341046102023000166410ustar 00000000000000use { deser_hjson::from_str, serde:: Deserialize, std::collections::HashMap, }; #[macro_use] mod common; /// Check that preceding and trailing whitespaces in /// quoteless strings are ignored #[test] fn preceding_and_trailing_whitespaces_in_quoteless() { #[derive(Deserialize, PartialEq, Debug)] struct W { map: HashMap, single: String, arr: Vec, } let hjson = "{\n\ map: {\n\ \" \": 0 \n\ pi: 3.14 \t\n\ τ:\t\t\t6.28 \n\ 'τ/2': π \n\ /: some tabs\t\t\t\n\ }, single: \t z -. \n\ arr: [\n\ \t bah\n\ zz \n\ ]\n\ }"; let w: W = from_str(hjson).unwrap(); dbg!(&w); let value = W { map: mo!{ "τ/2": "π", "/": "some tabs", " ": "0", "pi": "3.14", "τ": "6.28", }, single: "z -.".to_string(), arr: vo![ "bah", "zz", ], }; assert_eq!(value, w); }