protobuf-support-3.1.0/.cargo_vcs_info.json0000644000000001560000000000100144110ustar { "git": { "sha1": "c65c7d5c57c478504bd3f1f06c33d140abcff101" }, "path_in_vcs": "protobuf-support" }protobuf-support-3.1.0/Cargo.toml0000644000000020670000000000100124120ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "protobuf-support" version = "3.1.0" authors = ["Stepan Koltsov "] description = """ Code supporting protobuf implementation. None of code in this crate is public API. """ homepage = "https://github.com/stepancheg/rust-protobuf/" documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md" readme = "README.md" license = "MIT" repository = "https://github.com/stepancheg/rust-protobuf/" resolver = "2" [package.metadata.docs.rs] all-features = true [lib] bench = false [dependencies.thiserror] version = "1.0.30" [features] protobuf-support-3.1.0/Cargo.toml.orig000064400000000000000000000010710072674642500161150ustar 00000000000000[package] name = "protobuf-support" version = "3.1.0" authors = ["Stepan Koltsov "] edition = "2021" license = "MIT" homepage = "https://github.com/stepancheg/rust-protobuf/" repository = "https://github.com/stepancheg/rust-protobuf/" documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md" description = """ Code supporting protobuf implementation. None of code in this crate is public API. """ [lib] bench = false [features] [dependencies] thiserror = "1.0.30" [package.metadata.docs.rs] all-features = true protobuf-support-3.1.0/README.md000064400000000000000000000003470072674642500145120ustar 00000000000000 # Supporting code for protobuf crates Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`. None of code in this crate has public API. protobuf-support-3.1.0/src/json_name.rs000064400000000000000000000007730072674642500163440ustar 00000000000000/// Implementation must match exactly /// `ToJsonName()` function in C++ `descriptor.cc`. pub fn json_name(input: &str) -> String { let mut capitalize_next = false; let mut result = String::with_capacity(input.len()); for c in input.chars() { if c == '_' { capitalize_next = true; } else if capitalize_next { result.extend(c.to_uppercase()); capitalize_next = false; } else { result.push(c); } } result } protobuf-support-3.1.0/src/lexer/float.rs000064400000000000000000000025420072674642500166130ustar 00000000000000#[derive(Debug)] pub enum ProtobufFloatParseError { EmptyString, CannotParseFloat, } pub type ProtobufFloatParseResult = Result; pub const PROTOBUF_NAN: &str = "nan"; pub const PROTOBUF_INF: &str = "inf"; /// Format float as in protobuf `.proto` files pub fn format_protobuf_float(f: f64) -> String { if f.is_nan() { PROTOBUF_NAN.to_owned() } else if f.is_infinite() { if f > 0.0 { format!("{}", PROTOBUF_INF) } else { format!("-{}", PROTOBUF_INF) } } else { // TODO: make sure doesn't lose precision format!("{}", f) } } /// Parse float from `.proto` format pub fn parse_protobuf_float(s: &str) -> ProtobufFloatParseResult { if s.is_empty() { return Err(ProtobufFloatParseError::EmptyString); } if s == PROTOBUF_NAN { return Ok(f64::NAN); } if s == PROTOBUF_INF || s == format!("+{}", PROTOBUF_INF) { return Ok(f64::INFINITY); } if s == format!("-{}", PROTOBUF_INF) { return Ok(f64::NEG_INFINITY); } match s.parse() { Ok(f) => Ok(f), Err(_) => Err(ProtobufFloatParseError::CannotParseFloat), } } #[cfg(test)] mod test { use super::*; #[test] fn test_format_protobuf_float() { assert_eq!("10", format_protobuf_float(10.0)); } } protobuf-support-3.1.0/src/lexer/int.rs000064400000000000000000000004610072674642500162760ustar 00000000000000pub struct Overflow; /// Negate `u64` checking for overflow. pub fn neg(value: u64) -> Result { if value <= 0x7fff_ffff_ffff_ffff { Ok(-(value as i64)) } else if value == 0x8000_0000_0000_0000 { Ok(-0x8000_0000_0000_0000) } else { Err(Overflow) } } protobuf-support-3.1.0/src/lexer/json_number_lit.rs000064400000000000000000000003540072674642500206760ustar 00000000000000use std::fmt; #[derive(Clone, Debug, Eq, PartialEq)] pub struct JsonNumberLit(pub String); impl fmt::Display for JsonNumberLit { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&self.0, f) } } protobuf-support-3.1.0/src/lexer/lexer_impl.rs000064400000000000000000000514250072674642500176520ustar 00000000000000use std::char; use std::convert::TryFrom; use std::num::ParseFloatError; use std::num::ParseIntError; use crate::lexer::float; use crate::lexer::float::ProtobufFloatParseError; use crate::lexer::json_number_lit::JsonNumberLit; use crate::lexer::loc::Loc; use crate::lexer::loc::FIRST_COL; use crate::lexer::parser_language::ParserLanguage; use crate::lexer::str_lit::StrLit; use crate::lexer::str_lit::StrLitDecodeError; use crate::lexer::token::Token; use crate::lexer::token::TokenWithLocation; #[derive(Debug, thiserror::Error)] pub enum LexerError { // TODO: something better than this #[error("Incorrect input")] IncorrectInput, #[error("Unexpected EOF")] UnexpectedEof, #[error("Expecting char: {:?}", .0)] ExpectChar(char), #[error("Parse int error")] ParseIntError, #[error("Parse float error")] ParseFloatError, // TODO: how it is different from ParseFloatError? #[error("Incorrect float literal")] IncorrectFloatLit, #[error("Incorrect JSON escape")] IncorrectJsonEscape, #[error("Incorrect JSON number")] IncorrectJsonNumber, #[error("Incorrect Unicode character")] IncorrectUnicodeChar, #[error("Expecting hex digit")] ExpectHexDigit, #[error("Expecting oct digit")] ExpectOctDigit, #[error("Expecting dec digit")] ExpectDecDigit, #[error(transparent)] StrLitDecodeError(#[from] StrLitDecodeError), #[error("Expecting identifier")] ExpectedIdent, } pub type LexerResult = Result; impl From for LexerError { fn from(_: ParseIntError) -> Self { LexerError::ParseIntError } } impl From for LexerError { fn from(_: ParseFloatError) -> Self { LexerError::ParseFloatError } } impl From for LexerError { fn from(_: ProtobufFloatParseError) -> Self { LexerError::IncorrectFloatLit } } #[derive(Copy, Clone)] pub struct Lexer<'a> { language: ParserLanguage, input: &'a str, pos: usize, pub loc: Loc, } fn is_letter(c: char) -> bool { c.is_alphabetic() || c == '_' } impl<'a> Lexer<'a> { pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> { Lexer { language, input, pos: 0, loc: Loc::start(), } } /// No more chars pub fn eof(&self) -> bool { self.pos == self.input.len() } /// Remaining chars fn rem_chars(&self) -> &'a str { &self.input[self.pos..] } pub fn lookahead_char_is bool>(&self, p: P) -> bool { self.lookahead_char().map_or(false, p) } fn lookahead_char_is_in(&self, alphabet: &str) -> bool { self.lookahead_char_is(|c| alphabet.contains(c)) } fn next_char_opt(&mut self) -> Option { let rem = self.rem_chars(); if rem.is_empty() { None } else { let mut char_indices = rem.char_indices(); let (_, c) = char_indices.next().unwrap(); let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len()); self.pos += c_len; if c == '\n' { self.loc.line += 1; self.loc.col = FIRST_COL; } else { self.loc.col += 1; } Some(c) } } fn next_char(&mut self) -> LexerResult { self.next_char_opt().ok_or(LexerError::UnexpectedEof) } /// Skip whitespaces fn skip_whitespaces(&mut self) { self.take_while(|c| c.is_whitespace()); } fn skip_c_comment(&mut self) -> LexerResult<()> { if self.skip_if_lookahead_is_str("/*") { let end = "*/"; match self.rem_chars().find(end) { None => Err(LexerError::UnexpectedEof), Some(len) => { let new_pos = self.pos + len + end.len(); self.skip_to_pos(new_pos); Ok(()) } } } else { Ok(()) } } fn skip_cpp_comment(&mut self) { if self.skip_if_lookahead_is_str("//") { loop { match self.next_char_opt() { Some('\n') | None => break, _ => {} } } } } fn skip_sh_comment(&mut self) { if self.skip_if_lookahead_is_str("#") { loop { match self.next_char_opt() { Some('\n') | None => break, _ => {} } } } } fn skip_comment(&mut self) -> LexerResult<()> { match self.language { ParserLanguage::Proto => { self.skip_c_comment()?; self.skip_cpp_comment(); } ParserLanguage::TextFormat => { self.skip_sh_comment(); } ParserLanguage::Json => {} } Ok(()) } pub fn skip_ws(&mut self) -> LexerResult<()> { loop { let pos = self.pos; self.skip_whitespaces(); self.skip_comment()?; if pos == self.pos { // Did not advance return Ok(()); } } } pub fn take_while(&mut self, f: F) -> &'a str where F: Fn(char) -> bool, { let start = self.pos; while self.lookahead_char().map(&f) == Some(true) { self.next_char_opt().unwrap(); } let end = self.pos; &self.input[start..end] } fn lookahead_char(&self) -> Option { self.clone().next_char_opt() } fn lookahead_is_str(&self, s: &str) -> bool { self.rem_chars().starts_with(s) } fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool { if self.lookahead_is_str(s) { let new_pos = self.pos + s.len(); self.skip_to_pos(new_pos); true } else { false } } fn next_char_if

(&mut self, p: P) -> Option where P: FnOnce(char) -> bool, { let mut clone = self.clone(); match clone.next_char_opt() { Some(c) if p(c) => { *self = clone; Some(c) } _ => None, } } pub fn next_char_if_eq(&mut self, expect: char) -> bool { self.next_char_if(|c| c == expect) != None } fn next_char_if_in(&mut self, alphabet: &str) -> Option { for c in alphabet.chars() { if self.next_char_if_eq(c) { return Some(c); } } None } fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> { if self.next_char_if_eq(expect) { Ok(()) } else { Err(LexerError::ExpectChar(expect)) } } fn next_char_expect

(&mut self, expect: P, err: LexerError) -> LexerResult where P: FnOnce(char) -> bool, { self.next_char_if(expect).ok_or(err) } // str functions /// properly update line and column fn skip_to_pos(&mut self, new_pos: usize) -> &'a str { assert!(new_pos >= self.pos); assert!(new_pos <= self.input.len()); let pos = self.pos; while self.pos != new_pos { self.next_char_opt().unwrap(); } &self.input[pos..new_pos] } // Protobuf grammar // char functions // letter = "A" … "Z" | "a" … "z" // https://github.com/google/protobuf/issues/4565 fn next_letter_opt(&mut self) -> Option { self.next_char_if(is_letter) } // capitalLetter = "A" … "Z" fn _next_capital_letter_opt(&mut self) -> Option { self.next_char_if(|c| c >= 'A' && c <= 'Z') } fn next_ident_part(&mut self) -> Option { self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_') } // Identifiers // ident = letter { letter | decimalDigit | "_" } fn next_ident_opt(&mut self) -> LexerResult> { if let Some(c) = self.next_letter_opt() { let mut ident = String::new(); ident.push(c); while let Some(c) = self.next_ident_part() { ident.push(c); } Ok(Some(ident)) } else { Ok(None) } } // Integer literals // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit } fn next_hex_lit_opt(&mut self) -> LexerResult> { Ok( if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") { let s = self.take_while(|c| c.is_ascii_hexdigit()); Some(u64::from_str_radix(s, 16)? as u64) } else { None }, ) } // decimalLit = ( "1" … "9" ) { decimalDigit } // octalLit = "0" { octalDigit } fn next_decimal_octal_lit_opt(&mut self) -> LexerResult> { // do not advance on number parse error let mut clone = self.clone(); let pos = clone.pos; Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None { clone.take_while(|c| c.is_ascii_digit()); let value = clone.input[pos..clone.pos].parse()?; *self = clone; Some(value) } else { None }) } // hexDigit = "0" … "9" | "A" … "F" | "a" … "f" fn next_hex_digit(&mut self) -> LexerResult { let mut clone = self.clone(); let r = match clone.next_char()? { c if c >= '0' && c <= '9' => c as u32 - b'0' as u32, c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10, c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10, _ => return Err(LexerError::ExpectHexDigit), }; *self = clone; Ok(r) } // octalDigit = "0" … "7" fn next_octal_digit(&mut self) -> LexerResult { self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit) .map(|c| c as u32 - '0' as u32) } // decimalDigit = "0" … "9" fn next_decimal_digit(&mut self) -> LexerResult { self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit) .map(|c| c as u32 - '0' as u32) } // decimals = decimalDigit { decimalDigit } fn next_decimal_digits(&mut self) -> LexerResult<()> { self.next_decimal_digit()?; self.take_while(|c| c >= '0' && c <= '9'); Ok(()) } // intLit = decimalLit | octalLit | hexLit pub fn next_int_lit_opt(&mut self) -> LexerResult> { assert_ne!(ParserLanguage::Json, self.language); self.skip_ws()?; if let Some(i) = self.next_hex_lit_opt()? { return Ok(Some(i)); } if let Some(i) = self.next_decimal_octal_lit_opt()? { return Ok(Some(i)); } Ok(None) } // Floating-point literals // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals fn next_exponent_opt(&mut self) -> LexerResult> { if self.next_char_if_in("eE") != None { self.next_char_if_in("+-"); self.next_decimal_digits()?; Ok(Some(())) } else { Ok(None) } } // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan" fn next_float_lit(&mut self) -> LexerResult<()> { assert_ne!(ParserLanguage::Json, self.language); // "inf" and "nan" are handled as part of ident if self.next_char_if_eq('.') { self.next_decimal_digits()?; self.next_exponent_opt()?; } else { self.next_decimal_digits()?; if self.next_char_if_eq('.') { self.next_decimal_digits()?; self.next_exponent_opt()?; } else { if self.next_exponent_opt()? == None { return Err(LexerError::IncorrectFloatLit); } } } Ok(()) } // String literals // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/ // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit // https://github.com/google/protobuf/issues/4560 // octEscape = '\' octalDigit octalDigit octalDigit // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' ) // quote = "'" | '"' pub fn next_byte_value(&mut self) -> LexerResult { match self.next_char()? { '\\' => { match self.next_char()? { '\'' => Ok(b'\''), '"' => Ok(b'"'), '\\' => Ok(b'\\'), 'a' => Ok(b'\x07'), 'b' => Ok(b'\x08'), 'f' => Ok(b'\x0c'), 'n' => Ok(b'\n'), 'r' => Ok(b'\r'), 't' => Ok(b'\t'), 'v' => Ok(b'\x0b'), 'x' => { let d1 = self.next_hex_digit()? as u8; let d2 = self.next_hex_digit()? as u8; Ok(((d1 << 4) | d2) as u8) } d if d >= '0' && d <= '7' => { let mut r = d as u8 - b'0'; for _ in 0..2 { match self.next_octal_digit() { Err(_) => break, Ok(d) => r = (r << 3) + d as u8, } } Ok(r) } // https://github.com/google/protobuf/issues/4562 // TODO: overflow c => Ok(c as u8), } } '\n' | '\0' => Err(LexerError::IncorrectInput), // TODO: check overflow c => Ok(c as u8), } } fn char_try_from(i: u32) -> LexerResult { char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar) } pub fn next_json_char_value(&mut self) -> LexerResult { match self.next_char()? { '\\' => match self.next_char()? { '"' => Ok('"'), '\'' => Ok('\''), '\\' => Ok('\\'), '/' => Ok('/'), 'b' => Ok('\x08'), 'f' => Ok('\x0c'), 'n' => Ok('\n'), 'r' => Ok('\r'), 't' => Ok('\t'), 'u' => { let mut v = 0; for _ in 0..4 { let digit = self.next_hex_digit()?; v = v * 16 + digit; } Self::char_try_from(v) } _ => Err(LexerError::IncorrectJsonEscape), }, c => Ok(c), } } // https://github.com/google/protobuf/issues/4564 // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' ) fn next_str_lit_raw(&mut self) -> LexerResult { let mut raw = String::new(); let mut first = true; loop { if !first { self.skip_ws()?; } let start = self.pos; let q = match self.next_char_if_in("'\"") { Some(q) => q, None if !first => break, None => return Err(LexerError::IncorrectInput), }; first = false; while self.lookahead_char() != Some(q) { self.next_byte_value()?; } self.next_char_expect_eq(q)?; raw.push_str(&self.input[start + 1..self.pos - 1]); } Ok(raw) } fn next_str_lit_raw_opt(&mut self) -> LexerResult> { if self.lookahead_char_is_in("'\"") { Ok(Some(self.next_str_lit_raw()?)) } else { Ok(None) } } /// Parse next token as JSON number fn next_json_number_opt(&mut self) -> LexerResult> { assert_eq!(ParserLanguage::Json, self.language); fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } fn is_digit_1_9(c: char) -> bool { c >= '1' && c <= '9' } if !self.lookahead_char_is_in("-0123456789") { return Ok(None); } let mut s = String::new(); if self.next_char_if_eq('-') { s.push('-'); } if self.next_char_if_eq('0') { s.push('0'); } else { s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } if self.next_char_if_eq('.') { s.push('.'); s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } if let Some(c) = self.next_char_if_in("eE") { s.push(c); if let Some(c) = self.next_char_if_in("+-") { s.push(c); } s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } Ok(Some(JsonNumberLit(s))) } fn next_token_inner(&mut self) -> LexerResult { if self.language == ParserLanguage::Json { if let Some(v) = self.next_json_number_opt()? { return Ok(Token::JsonNumber(v)); } } if let Some(ident) = self.next_ident_opt()? { let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN { Token::FloatLit(f64::NAN) } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF { Token::FloatLit(f64::INFINITY) } else { Token::Ident(ident.to_owned()) }; return Ok(token); } if self.language != ParserLanguage::Json { let mut clone = self.clone(); let pos = clone.pos; if let Ok(_) = clone.next_float_lit() { let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?; *self = clone; return Ok(Token::FloatLit(f)); } if let Some(lit) = self.next_int_lit_opt()? { return Ok(Token::IntLit(lit)); } } if let Some(escaped) = self.next_str_lit_raw_opt()? { return Ok(Token::StrLit(StrLit { escaped })); } // This branch must be after str lit if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) { return Ok(Token::Symbol(c)); } if let Some(ident) = self.next_ident_opt()? { return Ok(Token::Ident(ident)); } Err(LexerError::IncorrectInput) } pub fn next_token(&mut self) -> LexerResult> { self.skip_ws()?; let loc = self.loc; Ok(if self.eof() { None } else { let token = self.next_token_inner()?; // Skip whitespace here to update location // to the beginning of the next token self.skip_ws()?; Some(TokenWithLocation { token, loc }) }) } } #[cfg(test)] mod test { use super::*; fn lex(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult, { let mut lexer = Lexer::new(input, ParserLanguage::Proto); let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); assert!(lexer.eof(), "check eof failed at {}", lexer.loc); r } fn lex_opt(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult>, { let mut lexer = Lexer::new(input, ParserLanguage::Proto); let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); let r = o.expect(&format!("lexer returned none at {}", lexer.loc)); assert!(lexer.eof(), "check eof failed at {}", lexer.loc); r } #[test] fn test_lexer_int_lit() { let msg = r#"10"#; let mess = lex_opt(msg, |p| p.next_int_lit_opt()); assert_eq!(10, mess); } #[test] fn test_lexer_float_lit() { let msg = r#"12.3"#; let mess = lex(msg, |p| p.next_token_inner()); assert_eq!(Token::FloatLit(12.3), mess); } #[test] fn test_lexer_float_lit_leading_zeros_in_exp() { let msg = r#"1e00009"#; let mess = lex(msg, |p| p.next_token_inner()); assert_eq!(Token::FloatLit(1_000_000_000.0), mess); } } protobuf-support-3.1.0/src/lexer/loc.rs000064400000000000000000000010120072674642500162520ustar 00000000000000use std::fmt; pub const FIRST_LINE: u32 = 1; pub const FIRST_COL: u32 = 1; /// Location in file #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] pub struct Loc { /// 1-based pub line: u32, /// 1-based pub col: u32, } impl fmt::Display for Loc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}:{}", self.line, self.col) } } impl Loc { pub fn start() -> Loc { Loc { line: FIRST_LINE, col: FIRST_COL, } } } protobuf-support-3.1.0/src/lexer/mod.rs000064400000000000000000000004050072674642500162610ustar 00000000000000//! Implementation of lexer for both protobuf parser and for text format parser. pub mod float; pub mod int; pub mod json_number_lit; pub mod lexer_impl; pub mod loc; pub mod num_lit; pub mod parser_language; pub mod str_lit; pub mod token; pub mod tokenizer; protobuf-support-3.1.0/src/lexer/num_lit.rs000064400000000000000000000001070072674642500171500ustar 00000000000000#[derive(Copy, Clone)] pub enum NumLit { U64(u64), F64(f64), } protobuf-support-3.1.0/src/lexer/parser_language.rs000064400000000000000000000003570072674642500206470ustar 00000000000000/// We use the same lexer/tokenizer for all parsers for simplicity #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ParserLanguage { // `.proto` files Proto, // Protobuf text format TextFormat, // JSON Json, } protobuf-support-3.1.0/src/lexer/str_lit.rs000064400000000000000000000036000072674642500171620ustar 00000000000000use std::fmt; use std::string::FromUtf8Error; use crate::lexer::lexer_impl::Lexer; use crate::lexer::parser_language::ParserLanguage; #[derive(Debug, thiserror::Error)] pub enum StrLitDecodeError { #[error(transparent)] FromUtf8Error(#[from] FromUtf8Error), #[error("String literal decode error")] OtherError, } pub type StrLitDecodeResult = Result; /// String literal, both `string` and `bytes`. #[derive(Clone, Eq, PartialEq, Debug)] pub struct StrLit { pub escaped: String, } impl fmt::Display for StrLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "\"{}\"", &self.escaped) } } impl StrLit { /// May fail if not valid UTF8 pub fn decode_utf8(&self) -> StrLitDecodeResult { let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json); let mut r = Vec::new(); while !lexer.eof() { r.push( lexer .next_byte_value() .map_err(|_| StrLitDecodeError::OtherError)?, ); } Ok(String::from_utf8(r)?) } pub fn decode_bytes(&self) -> StrLitDecodeResult> { let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json); let mut r = Vec::new(); while !lexer.eof() { r.push( lexer .next_byte_value() .map_err(|_| StrLitDecodeError::OtherError)?, ); } Ok(r) } pub fn quoted(&self) -> String { format!("\"{}\"", self.escaped) } } #[cfg(test)] mod test { use crate::lexer::str_lit::StrLit; #[test] fn decode_utf8() { assert_eq!( "\u{1234}".to_owned(), StrLit { escaped: "\\341\\210\\264".to_owned() } .decode_utf8() .unwrap() ) } } protobuf-support-3.1.0/src/lexer/token.rs000064400000000000000000000024220072674642500166230ustar 00000000000000use crate::lexer::json_number_lit::JsonNumberLit; use crate::lexer::lexer_impl::LexerError; use crate::lexer::lexer_impl::LexerResult; use crate::lexer::loc::Loc; use crate::lexer::num_lit::NumLit; use crate::lexer::str_lit::StrLit; #[derive(Clone, Debug, PartialEq)] pub enum Token { Ident(String), Symbol(char), // Protobuf tokenizer has separate tokens for int and float. // Tokens do not include sign. IntLit(u64), FloatLit(f64), JsonNumber(JsonNumberLit), // including quotes StrLit(StrLit), } impl Token { /// Back to original pub fn format(&self) -> String { match self { &Token::Ident(ref s) => s.clone(), &Token::Symbol(c) => c.to_string(), &Token::IntLit(ref i) => i.to_string(), &Token::StrLit(ref s) => s.quoted(), &Token::FloatLit(ref f) => f.to_string(), &Token::JsonNumber(ref f) => f.to_string(), } } pub fn to_num_lit(&self) -> LexerResult { match self { &Token::IntLit(i) => Ok(NumLit::U64(i)), &Token::FloatLit(f) => Ok(NumLit::F64(f)), _ => Err(LexerError::IncorrectInput), } } } #[derive(Clone)] pub struct TokenWithLocation { pub token: Token, pub loc: Loc, } protobuf-support-3.1.0/src/lexer/tokenizer.rs000064400000000000000000000235640072674642500175270ustar 00000000000000use crate::lexer::lexer_impl::Lexer; use crate::lexer::lexer_impl::LexerError; use crate::lexer::loc::Loc; use crate::lexer::parser_language::ParserLanguage; use crate::lexer::str_lit::StrLit; use crate::lexer::str_lit::StrLitDecodeError; use crate::lexer::token::Token; use crate::lexer::token::TokenWithLocation; #[derive(Debug, thiserror::Error)] pub enum TokenizerError { #[error(transparent)] LexerError(#[from] LexerError), #[error(transparent)] StrLitDecodeError(#[from] StrLitDecodeError), #[error("Internal tokenizer error")] InternalError, // TODO: too broad #[error("Incorrect input")] IncorrectInput, #[error("Not allowed in this context: {0}")] NotAllowedInThisContext(&'static str), #[error("Unexpected end of input")] UnexpectedEof, #[error("Expecting string literal")] ExpectStrLit, #[error("Expecting int literal")] ExpectIntLit, #[error("Expecting float literal")] ExpectFloatLit, #[error("Expecting identifier")] ExpectIdent, #[error("Expecting identifier `{}`", .0)] ExpectNamedIdent(String), #[error("While parsing {}, expecting char `{}`", .1, .0)] ExpectChar(char, &'static str), #[error("Expecting any char of: {}", .0.iter().map(|c| format!("`{}`", c)).collect::>().join(", "))] ExpectAnyChar(Vec), } pub type TokenizerResult = Result; #[derive(Clone)] pub struct Tokenizer<'a> { lexer: Lexer<'a>, next_token: Option, last_token_loc: Option, } impl<'a> Tokenizer<'a> { pub fn new(input: &'a str, comment_style: ParserLanguage) -> Tokenizer<'a> { Tokenizer { lexer: Lexer::new(input, comment_style), next_token: None, last_token_loc: None, } } pub fn loc(&self) -> Loc { // After lookahead return the location of the next token self.next_token .as_ref() .map(|t| t.loc.clone()) // After token consumed return the location of that token .or(self.last_token_loc.clone()) // Otherwise return the position of lexer .unwrap_or(self.lexer.loc) } pub fn lookahead_loc(&mut self) -> Loc { drop(self.lookahead()); // TODO: does not handle EOF properly self.loc() } fn lookahead(&mut self) -> TokenizerResult> { Ok(match self.next_token { Some(ref token) => Some(&token.token), None => { self.next_token = self.lexer.next_token()?; self.last_token_loc = self.next_token.as_ref().map(|t| t.loc.clone()); match self.next_token { Some(ref token) => Some(&token.token), None => None, } } }) } pub fn lookahead_some(&mut self) -> TokenizerResult<&Token> { match self.lookahead()? { Some(token) => Ok(token), None => Err(TokenizerError::UnexpectedEof), } } fn next(&mut self) -> TokenizerResult> { self.lookahead()?; Ok(self .next_token .take() .map(|TokenWithLocation { token, .. }| token)) } pub fn next_some(&mut self) -> TokenizerResult { match self.next()? { Some(token) => Ok(token), None => Err(TokenizerError::UnexpectedEof), } } /// Can be called only after lookahead, otherwise it's error pub fn advance(&mut self) -> TokenizerResult { self.next_token .take() .map(|TokenWithLocation { token, .. }| token) .ok_or(TokenizerError::InternalError) } /// No more tokens pub fn syntax_eof(&mut self) -> TokenizerResult { Ok(self.lookahead()?.is_none()) } pub fn next_token_if_map(&mut self, p: P) -> TokenizerResult> where P: FnOnce(&Token) -> Option, { self.lookahead()?; let v = match self.next_token { Some(ref token) => match p(&token.token) { Some(v) => v, None => return Ok(None), }, _ => return Ok(None), }; self.next_token = None; Ok(Some(v)) } pub fn next_token_check_map(&mut self, p: P) -> Result where P: FnOnce(&Token) -> Result, E: From, { self.lookahead()?; let r = match self.next_token { Some(ref token) => p(&token.token)?, None => return Err(TokenizerError::UnexpectedEof.into()), }; self.next_token = None; Ok(r) } fn next_token_if

(&mut self, p: P) -> TokenizerResult> where P: FnOnce(&Token) -> bool, { self.next_token_if_map(|token| if p(token) { Some(token.clone()) } else { None }) } pub fn next_ident_if_in(&mut self, idents: &[&str]) -> TokenizerResult> { let v = match self.lookahead()? { Some(&Token::Ident(ref next)) => { if idents.into_iter().find(|&i| i == next).is_some() { next.clone() } else { return Ok(None); } } _ => return Ok(None), }; self.advance()?; Ok(Some(v)) } pub fn next_ident_if_eq(&mut self, word: &str) -> TokenizerResult { Ok(self.next_ident_if_in(&[word])? != None) } pub fn next_ident_expect_eq(&mut self, word: &str) -> TokenizerResult<()> { if self.next_ident_if_eq(word)? { Ok(()) } else { Err(TokenizerError::ExpectNamedIdent(word.to_owned())) } } pub fn next_ident_if_eq_error(&mut self, word: &'static str) -> TokenizerResult<()> { if self.clone().next_ident_if_eq(word)? { // TODO: which context? return Err(TokenizerError::NotAllowedInThisContext(word)); } Ok(()) } pub fn next_symbol_if_eq(&mut self, symbol: char) -> TokenizerResult { Ok(self.next_token_if(|token| match token { &Token::Symbol(c) if c == symbol => true, _ => false, })? != None) } pub fn next_symbol_expect_eq( &mut self, symbol: char, desc: &'static str, ) -> TokenizerResult<()> { if self.lookahead_is_symbol(symbol)? { self.advance()?; Ok(()) } else { Err(TokenizerError::ExpectChar(symbol, desc)) } } pub fn next_symbol_expect_eq_oneof(&mut self, symbols: &[char]) -> TokenizerResult { for symbol in symbols { if let Ok(()) = self.next_symbol_expect_eq(*symbol, "ignored") { return Ok(*symbol); } } Err(TokenizerError::ExpectAnyChar(symbols.to_owned())) } pub fn lookahead_is_str_lit(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::StrLit(..)) => true, _ => false, }) } pub fn lookahead_is_int_lit(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::IntLit(..)) => true, _ => false, }) } pub fn lookahead_is_json_number(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::JsonNumber(..)) => true, _ => false, }) } pub fn lookahead_if_symbol(&mut self) -> TokenizerResult> { Ok(match self.lookahead()? { Some(&Token::Symbol(c)) => Some(c), _ => None, }) } pub fn lookahead_is_symbol(&mut self, symbol: char) -> TokenizerResult { Ok(self.lookahead_if_symbol()? == Some(symbol)) } pub fn lookahead_is_ident(&mut self, ident: &str) -> TokenizerResult { Ok(match self.lookahead()? { Some(Token::Ident(i)) => i == ident, _ => false, }) } pub fn next_ident(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::Ident(ref ident) => Ok(ident.clone()), _ => Err(TokenizerError::ExpectIdent), }) } pub fn next_str_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::StrLit(ref str_lit) => Ok(str_lit.clone()), _ => Err(TokenizerError::ExpectStrLit), }) } pub fn next_int_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::IntLit(v) => Ok(v), _ => Err(TokenizerError::ExpectIntLit), }) } pub fn next_float_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::FloatLit(v) => Ok(v), _ => Err(TokenizerError::ExpectFloatLit), }) } } #[cfg(test)] mod test { use super::*; fn tokenize(input: &str, what: P) -> R where P: FnOnce(&mut Tokenizer) -> TokenizerResult, { let mut tokenizer = Tokenizer::new(input, ParserLanguage::Proto); let r = what(&mut tokenizer).expect(&format!("parse failed at {}", tokenizer.loc())); let eof = tokenizer .syntax_eof() .expect(&format!("check eof failed at {}", tokenizer.loc())); assert!(eof, "{}", tokenizer.loc()); r } #[test] fn test_ident() { let msg = r#" aabb_c "#; let mess = tokenize(msg, |p| p.next_ident().map(|s| s.to_owned())); assert_eq!("aabb_c", mess); } #[test] fn test_str_lit() { let msg = r#" "a\nb" "#; let mess = tokenize(msg, |p| p.next_str_lit()); assert_eq!( StrLit { escaped: r#"a\nb"#.to_owned() }, mess ); } } protobuf-support-3.1.0/src/lib.rs000064400000000000000000000003760072674642500151400ustar 00000000000000//! # Supporting code for protobuf crates //! //! Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`. //! None of code in this crate has public API. pub mod json_name; pub mod lexer; pub mod text_format; pub mod toposort; protobuf-support-3.1.0/src/text_format.rs000064400000000000000000000046050072674642500167250ustar 00000000000000pub fn escape_bytes_to(bytes: &[u8], buf: &mut String) { for &c in bytes { match c { b'\n' => buf.push_str(r"\n"), b'\r' => buf.push_str(r"\r"), b'\t' => buf.push_str(r"\t"), b'\'' => buf.push_str("\\\'"), b'"' => buf.push_str("\\\""), b'\\' => buf.push_str(r"\\"), b'\x20'..=b'\x7e' => buf.push(c as char), _ => { buf.push('\\'); buf.push((b'0' + (c >> 6)) as char); buf.push((b'0' + ((c >> 3) & 7)) as char); buf.push((b'0' + (c & 7)) as char); } } } } pub fn quote_bytes_to(bytes: &[u8], buf: &mut String) { buf.push('"'); escape_bytes_to(bytes, buf); buf.push('"'); } #[cfg(test)] mod test { use crate::lexer::str_lit::StrLit; use crate::text_format::escape_bytes_to; fn escape(data: &[u8]) -> String { let mut s = String::with_capacity(data.len() * 4); escape_bytes_to(data, &mut s); s } fn unescape_string(escaped: &str) -> Vec { StrLit { escaped: escaped.to_owned(), } .decode_bytes() .expect("decode_bytes") } fn test_escape_unescape(text: &str, escaped: &str) { assert_eq!(text.as_bytes(), &unescape_string(escaped)[..]); assert_eq!(escaped, &escape(text.as_bytes())[..]); } #[test] fn test_print_to_bytes() { assert_eq!("ab", escape(b"ab")); assert_eq!("a\\\\023", escape(b"a\\023")); assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\")); assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes())); } #[test] fn test_unescape_string() { test_escape_unescape("", ""); test_escape_unescape("aa", "aa"); test_escape_unescape("\n", "\\n"); test_escape_unescape("\r", "\\r"); test_escape_unescape("\t", "\\t"); test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275"); // hex assert_eq!(b"aaa\x01bbb", &unescape_string("aaa\\x01bbb")[..]); assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]); assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]); // quotes assert_eq!(b"aaa\"bbb", &unescape_string("aaa\\\"bbb")[..]); assert_eq!(b"aaa\'bbb", &unescape_string("aaa\\\'bbb")[..]); } } protobuf-support-3.1.0/src/toposort.rs000064400000000000000000000055210072674642500162600ustar 00000000000000use std::collections::HashSet; use std::hash::Hash; #[derive(Debug, thiserror::Error)] #[error("Cycle detected")] pub struct TopoSortCycle; pub fn toposort( input: impl IntoIterator, deps: impl Fn(&K) -> I, ) -> Result, TopoSortCycle> where K: Eq + Hash + Clone, I: Iterator, { struct Ts where K: Eq + Hash + Clone, I: Iterator, D: Fn(&K) -> I, { result_set: HashSet, result: Vec, deps: D, stack: HashSet, } impl Ts where K: Eq + Hash + Clone, I: Iterator, D: Fn(&K) -> I, { fn visit(&mut self, i: &K) -> Result<(), TopoSortCycle> { if self.result_set.contains(i) { return Ok(()); } if !self.stack.insert(i.clone()) { return Err(TopoSortCycle); } for dep in (self.deps)(i) { self.visit(&dep)?; } let removed = self.stack.remove(i); assert!(removed); self.result.push(i.clone()); self.result_set.insert(i.clone()); Ok(()) } } let mut ts = Ts { result: Vec::new(), result_set: HashSet::new(), deps, stack: HashSet::new(), }; for i in input { ts.visit(&i)?; } Ok(ts.result) } #[cfg(test)] mod tests { use std::collections::HashMap; use crate::toposort::toposort; use crate::toposort::TopoSortCycle; fn test_toposort(input: &str) -> Result, TopoSortCycle> { let mut keys: Vec<&str> = Vec::new(); let mut edges: HashMap<&str, Vec<&str>> = HashMap::new(); for part in input.split(" ") { match part.split_once("->") { Some((k, vs)) => { keys.push(k); edges.insert(k, vs.split(",").collect()); } None => keys.push(part), }; } toposort(keys, |k| { edges .get(k) .map(|v| v.as_slice()) .unwrap_or_default() .into_iter() .copied() }) } fn test_toposort_check(input: &str, expected: &str) { let sorted = test_toposort(input).unwrap(); let expected = expected.split(" ").collect::>(); assert_eq!(expected, sorted); } #[test] fn test() { test_toposort_check("1 2 3", "1 2 3"); test_toposort_check("1->2 2->3 3", "3 2 1"); test_toposort_check("1 2->1 3->2", "1 2 3"); test_toposort_check("1->2,3 2->3 3", "3 2 1"); } #[test] fn cycle() { assert!(test_toposort("1->1").is_err()); assert!(test_toposort("1->2 2->1").is_err()); } }