mailparse-0.14.0/.cargo_vcs_info.json0000644000000001360000000000100130730ustar { "git": { "sha1": "b985c5b5166ab037a91fa17e6840efa957eb7d1c" }, "path_in_vcs": "" }mailparse-0.14.0/Cargo.lock0000644000000103000000000000100110400ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "Inflector" version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" [[package]] name = "aliasable" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" [[package]] name = "base64" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "charset" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e9079d1a12a2cc2bffb5db039c43661836ead4082120d5844f02555aca2d46" dependencies = [ "base64", "encoding_rs", ] [[package]] name = "data-encoding" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" [[package]] name = "encoding_rs" version = "0.8.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dc8abb250ffdda33912550faa54c88ec8b998dec0b2c55ab224921ce11df" dependencies = [ "cfg-if", ] [[package]] name = "mailparse" version = "0.14.0" dependencies = [ "charset", "data-encoding", "ouroboros", "quoted_printable", ] [[package]] name = "ouroboros" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06562f88448f4f22a6cfb76b3d0d01af62e82e3dacb2a88c1a3a8a1694a501c6" dependencies = [ "aliasable", "ouroboros_macro", "stable_deref_trait", ] [[package]] name = "ouroboros_macro" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cdc8f765173c2dad6c1f371d0997c2d21003bd14949da99d910ad409d88a85" dependencies = [ "Inflector", "proc-macro-error", "proc-macro2", "quote", "syn", ] [[package]] name = "proc-macro-error" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", "syn", "version_check", ] [[package]] name = "proc-macro-error-attr" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", "version_check", ] [[package]] name = "proc-macro2" version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1" dependencies = [ "unicode-xid", ] [[package]] name = "quote" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" dependencies = [ "proc-macro2", ] [[package]] name = "quoted_printable" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20f14e071918cbeefc5edc986a7aa92c425dae244e003a35e1cdddb5ca39b5cb" [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "unicode-xid" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" mailparse-0.14.0/Cargo.toml0000644000000023400000000000100110700ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "mailparse" version = "0.14.0" authors = ["Kartikaya Gupta"] exclude = [ ".gitattributes", ".gitignore", ".github/**", "examples/**", ] description = "A simple parser for MIME e-mail messages" homepage = "https://github.com/staktrace/mailparse/blob/master/README.md" readme = "README.md" keywords = [ "parser", "email", "rfc822", "mime", "maildir", ] categories = [ "email", "parsing", ] license = "0BSD" repository = "https://github.com/staktrace/mailparse" [dependencies.charset] version = "0.1.3" [dependencies.data-encoding] version = "2.3.3" [dependencies.quoted_printable] version = "0.4.6" [dev-dependencies.ouroboros] version = "0.14.0" [badges.maintenance] status = "passively-maintained" mailparse-0.14.0/Cargo.toml.orig000064400000000000000000000012371046102023000145550ustar 00000000000000[package] name = "mailparse" version = "0.14.0" authors = ["Kartikaya Gupta"] edition = "2018" license = "0BSD" description = "A simple parser for MIME e-mail messages" homepage = "https://github.com/staktrace/mailparse/blob/master/README.md" repository = "https://github.com/staktrace/mailparse" readme = "README.md" keywords = ["parser", "email", "rfc822", "mime", "maildir"] categories = ["email", "parsing"] exclude = [".gitattributes", ".gitignore", ".github/**", "examples/**"] [badges] maintenance = { status = "passively-maintained" } [dependencies] data-encoding = "2.3.3" quoted_printable = "0.4.6" charset = "0.1.3" [dev-dependencies] ouroboros = "0.14.0" mailparse-0.14.0/LICENSE000064400000000000000000000012071046102023000126700ustar 00000000000000Copyright (C) 2019 by Kartikaya Gupta Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. mailparse-0.14.0/README.md000064400000000000000000000062101046102023000131410ustar 00000000000000mailparse === ![Build Status](https://github.com/staktrace/mailparse/actions/workflows/test.yml/badge.svg) [![Crate](https://img.shields.io/crates/v/mailparse.svg)](https://crates.io/crates/mailparse) A simple parser for MIME email messages. API --- The primary entry point for this library is the following function: ```rust parse_mail(&[u8]) -> Result ``` This function takes the raw message data, including headers and body, and returns a structured object to more easily access pieces of the email message. There are other public functions that allow parsing smaller parts of the message as well; refer to the [full documentation](https://docs.rs/mailparse/). The library is designed to process real-world email data such as might be obtained by using the FETCH command on an IMAP server, or in a Maildir. As such, this library should successfully handle any valid MIME-formatted message, although it may not follow all the strict requirements in the various specifications that cover the format (predominantly IETF RFCs 822, 2045, 2047, 2822, and 5322). As an example, this library accepts raw message data which uses \n (ASCII LF) as line delimiters rather than the RFC-mandated \r\n (ASCII CRLF) line delimiters. Example usage --- ```rust use mailparse::*; let parsed = parse_mail(concat!( "Subject: This is a test email\n", "Content-Type: multipart/alternative; boundary=foobar\n", "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n", "\n", "--foobar\n", "Content-Type: text/plain; charset=utf-8\n", "Content-Transfer-Encoding: quoted-printable\n", "\n", "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n", "--foobar\n", "Content-Type: text/html\n", "Content-Transfer-Encoding: base64\n", "\n", "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n", "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n", "--foobar--\n", "After the final boundary stuff gets ignored.\n").as_bytes()) .unwrap(); assert_eq!(parsed.headers.get_first_value("Subject"), Some("This is a test email".to_string())); assert_eq!(parsed.subparts.len(), 2); assert_eq!(parsed.subparts[0].get_body().unwrap(), "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}"); assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64"); assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html"); assert!(parsed.subparts[1].get_body().unwrap().starts_with("")); assert_eq!(dateparse(parsed.headers.get_first_value("Date").unwrap().as_str()).unwrap(), 1475417182); ``` Documentation --- See the rustdoc at [docs.rs](https://docs.rs/mailparse/). Support mailparse --- If you want to support development of `mailparse`, please do so by donating your money, time, and/or energy to fighting climate change. A quick and easy way is to send a donation to [Replant.ca Environmental](http://www.replant-environmental.ca/donate.html), where every dollar gets a tree planted! mailparse-0.14.0/src/addrparse.rs000064400000000000000000001222451046102023000147730ustar 00000000000000use std::fmt; use crate::header::HeaderToken; use crate::{MailHeader, MailParseError}; /// A representation of a single mailbox. Each mailbox has /// a routing address `addr` and an optional display name. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct SingleInfo { pub display_name: Option, pub addr: String, } impl SingleInfo { fn new(name: Option, addr: String) -> Result { if addr.contains('@') { Ok(SingleInfo { display_name: name, addr: addr, }) } else { Err(MailParseError::Generic( "Invalid address found: must contain a '@' symbol", )) } } } impl fmt::Display for SingleInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(name) = &self.display_name { write!(f, r#""{}" <{}>"#, name.replace('"', r#"\""#), self.addr) } else { write!(f, "{}", self.addr) } } } /// A representation of a group address. It has a name and /// a list of mailboxes. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct GroupInfo { pub group_name: String, pub addrs: Vec, } impl GroupInfo { fn new(name: String, addrs: Vec) -> Self { GroupInfo { group_name: name, addrs: addrs, } } } impl fmt::Display for GroupInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, r#""{}":"#, self.group_name.replace('"', r#"\""#))?; for (i, addr) in self.addrs.iter().enumerate() { if i == 0 { write!(f, " ")?; } else { write!(f, ", ")?; } addr.fmt(f)?; } write!(f, ";") } } /// An abstraction over the two different kinds of top-level addresses allowed /// in email headers. Group addresses have a name and a list of mailboxes. Single /// addresses are just a mailbox. Each mailbox consists of what you would consider /// an email address (e.g. foo@bar.com) and optionally a display name ("Foo Bar"). /// Groups are represented in email headers with colons and semicolons, e.g. /// To: my-peeps: foo@peeps.org, bar@peeps.org; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum MailAddr { Group(GroupInfo), Single(SingleInfo), } #[derive(Debug)] enum AddrParseState { Initial, QuotedName, EscapedChar, AfterQuotedName, BracketedAddr, AfterBracketedAddr, Unquoted, NameWithEncodedWord, Comment, } /// A simple wrapper around `Vec`. This is primarily here so we can /// implement the Display trait on it, and allow user code to easily convert /// the return value from `addrparse` back into a string. However there are some /// additional utility functions on this wrapper as well. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct MailAddrList(Vec); impl std::ops::Deref for MailAddrList { type Target = Vec; fn deref(&self) -> &Vec { &self.0 } } impl std::ops::DerefMut for MailAddrList { fn deref_mut(&mut self) -> &mut Vec { &mut self.0 } } impl fmt::Display for MailAddrList { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut last_was_group = false; for (i, addr) in self.iter().enumerate() { if i > 0 { if last_was_group { write!(f, " ")?; } else { write!(f, ", ")?; } } match addr { MailAddr::Group(g) => { g.fmt(f)?; last_was_group = true; } MailAddr::Single(s) => { s.fmt(f)?; last_was_group = false; } } } Ok(()) } } impl From> for MailAddrList { fn from(addrs: Vec) -> Self { MailAddrList(addrs) } } impl MailAddrList { /// Count the number of `SingleInfo` instances in this list of addresses. pub fn count_addrs(&self) -> usize { self.iter().fold(0, |acc, elem| match elem { MailAddr::Single(_) => acc + 1, MailAddr::Group(g) => acc + g.addrs.len(), }) } /// Convenience function to check if this list of addresses contains exactly /// one `SingleInfo`, and if it does, to return it. If there is not exactly /// one `SingleInfo`, this function returns None. pub fn extract_single_info(self) -> Option { if self.len() == 1 { match &self[0] { MailAddr::Group(_) => None, MailAddr::Single(s) => Some(s.clone()), } } else { None } } /// Consumes the `MailAddrList`, returning the wrapped value. pub fn into_inner(self) -> Vec { self.0 } } enum HeaderTokenItem<'a> { Char(char), Whitespace(&'a str), Newline(String), DecodedWord(String), } struct HeaderTokenWalker<'a> { tokens: Vec>, cur_token: usize, cur_char_offset: usize, } impl<'a> Iterator for HeaderTokenWalker<'a> { type Item = HeaderTokenItem<'a>; fn next(&mut self) -> Option { loop { if self.cur_token >= self.tokens.len() { return None; } match &self.tokens[self.cur_token] { HeaderToken::Text(s) => { let s = &s[self.cur_char_offset..]; let mut chars = s.char_indices(); let c = chars.next(); if let Some((_, c)) = c { self.cur_char_offset += chars.next().map(|(o, _)| o).unwrap_or(s.len()); return Some(HeaderTokenItem::Char(c)); } else { self.cur_char_offset = 0; self.cur_token += 1; continue; } } HeaderToken::Whitespace(ws) => { self.cur_token += 1; return Some(HeaderTokenItem::Whitespace(ws)); } HeaderToken::Newline(Some(ws)) => { self.cur_token += 1; return Some(HeaderTokenItem::Newline(String::from(ws))); } HeaderToken::Newline(None) => { panic!("Should never reach here"); } HeaderToken::DecodedWord(word) => { self.cur_token += 1; return Some(HeaderTokenItem::DecodedWord(String::from(word))); } } } } } impl<'a> HeaderTokenWalker<'a> { fn new(tokens: Vec>) -> Self { Self { tokens, cur_token: 0, cur_char_offset: 0, } } } /// Convert an address field from an email header into a structured type. /// This function handles the most common formatting of to/from/cc/bcc fields /// found in email headers. Note that if you are attempting to parse the /// value of a `MailHeader`, it is better (both for correctness and performance /// to use the `addrparse_header` function instead of this one. Correctness /// is impacted because of the way encoded words within the header are /// processed; using `MailHeader::get_value()` will decode encoded words, /// which may then contain characters like commas that affect how `addrparse` /// parses the value. This can produce incorrect results in some cases; using /// `addrparse_header` will avoid this problem. /// /// # Examples /// ``` /// use mailparse::{addrparse, MailAddr, SingleInfo}; /// match &addrparse("John Doe ").unwrap()[0] { /// MailAddr::Single(info) => { /// assert_eq!(info.display_name, Some("John Doe".to_string())); /// assert_eq!(info.addr, "john@doe.com".to_string()); /// } /// _ => panic!() /// }; /// ``` pub fn addrparse(addrs: &str) -> Result { let v = vec![HeaderToken::Text(addrs)]; let mut w = HeaderTokenWalker::new(v); addrparse_inner(&mut w, false) } /// Take a `MailHeader` that contains addresses in the value (e.g. from/to/cc/bcc) /// and produce a structured type representing those addresses. /// /// # Examples /// ``` /// use mailparse::{addrparse_header, parse_mail, MailAddr, MailHeaderMap, SingleInfo}; /// let mail = parse_mail(b"From: John Doe \n\nBlah Blah").unwrap(); /// match &addrparse_header(mail.headers.get_first_header("From").unwrap()).unwrap()[0] { /// MailAddr::Single(info) => { /// assert_eq!(info.display_name, Some("John Doe".to_string())); /// assert_eq!(info.addr, "john@doe.com".to_string()); /// } /// _ => panic!() /// }; /// ``` pub fn addrparse_header(header: &MailHeader) -> Result { let chars = header.decode_utf8_or_latin1(); let v = crate::header::normalized_tokens(&chars); let mut w = HeaderTokenWalker::new(v); addrparse_inner(&mut w, false) } fn addrparse_inner( it: &mut HeaderTokenWalker, in_group: bool, ) -> Result { let mut result = vec![]; let mut state = AddrParseState::Initial; let mut hti = match it.next() { None => return Ok(MailAddrList(vec![])), Some(v) => v, }; let mut name = None; let mut addr = None; let mut post_quote_ws = None; let mut comment_return = None; loop { match state { AddrParseState::Initial => { match hti { HeaderTokenItem::Char(c) => { if c.is_whitespace() { // continue in same state } else if c == '"' { state = AddrParseState::QuotedName; name = Some(String::new()); } else if c == '<' { state = AddrParseState::BracketedAddr; addr = Some(String::new()); } else if c == ';' { if !in_group { return Err(MailParseError::Generic( "Unexpected group terminator found in initial list", )); } return Ok(MailAddrList(result)); } else { state = AddrParseState::Unquoted; addr = Some(String::new()); addr.as_mut().unwrap().push(c); } } HeaderTokenItem::Whitespace(_) => { // continue in same state } HeaderTokenItem::Newline(_) => { // continue in same state } HeaderTokenItem::DecodedWord(word) => { state = AddrParseState::NameWithEncodedWord; addr = Some(String::new()); addr.as_mut().unwrap().push_str(&word); } } } AddrParseState::QuotedName => match hti { HeaderTokenItem::Char(c) => { if c == '\\' { state = AddrParseState::EscapedChar; } else if c == '"' { state = AddrParseState::AfterQuotedName; } else { name.as_mut().unwrap().push(c); } } HeaderTokenItem::Whitespace(ws) => { name.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { name.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(word) => { name.as_mut().unwrap().push_str(&word); } }, AddrParseState::EscapedChar => match hti { HeaderTokenItem::Char(c) => { state = AddrParseState::QuotedName; name.as_mut().unwrap().push(c); } HeaderTokenItem::Whitespace(ws) => { state = AddrParseState::QuotedName; name.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { state = AddrParseState::QuotedName; name.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(_) => { return Err(MailParseError::Generic( "Unexpected encoded word found inside a quoted name", )); } }, AddrParseState::AfterQuotedName => { match hti { HeaderTokenItem::Char(c) => { if c.is_whitespace() { if post_quote_ws.is_none() { post_quote_ws = Some(String::new()); } post_quote_ws.as_mut().unwrap().push(c); } else if c == '<' { state = AddrParseState::BracketedAddr; addr = Some(String::new()); } else if c == ':' { if in_group { return Err(MailParseError::Generic( "Found unexpected nested group", )); } let group_addrs = addrparse_inner(it, true)?; state = AddrParseState::Initial; result.push(MailAddr::Group(GroupInfo::new( name.unwrap(), group_addrs .0 .into_iter() .map(|addr| match addr { MailAddr::Single(s) => s, MailAddr::Group(_) => { panic!("Unexpected nested group encountered") } }) .collect(), ))); name = None; } else { // I think technically not valid, but this occurs in real-world corpus, so // handle gracefully if c == '"' { post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws)); state = AddrParseState::QuotedName; } else { post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws)); name.as_mut().unwrap().push(c); } post_quote_ws = None; } } HeaderTokenItem::Whitespace(ws) => { if post_quote_ws.is_none() { post_quote_ws = Some(String::new()); } post_quote_ws.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { if post_quote_ws.is_none() { post_quote_ws = Some(String::new()); } post_quote_ws.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(word) => { post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws)); name.as_mut().unwrap().push_str(&word); post_quote_ws = None; } } } AddrParseState::BracketedAddr => match hti { HeaderTokenItem::Char(c) => { if c == '>' { state = AddrParseState::AfterBracketedAddr; result.push(MailAddr::Single(SingleInfo::new(name, addr.unwrap())?)); name = None; addr = None; } else { addr.as_mut().unwrap().push(c); } } HeaderTokenItem::Whitespace(ws) => { addr.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { addr.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(_) => { return Err(MailParseError::Generic( "Unexpected encoded word found inside bracketed address", )); } }, AddrParseState::AfterBracketedAddr => { match hti { HeaderTokenItem::Char(c) => { if c.is_whitespace() { // continue in same state } else if c == ',' { state = AddrParseState::Initial; } else if c == ';' { if in_group { return Ok(MailAddrList(result)); } // Technically not valid, but a similar case occurs in real-world corpus, so handle it gracefully state = AddrParseState::Initial; } else if c == '(' { comment_return = Some(AddrParseState::AfterBracketedAddr); state = AddrParseState::Comment; } else { return Err(MailParseError::Generic( "Unexpected char found after bracketed address", )); } } HeaderTokenItem::Whitespace(_) => { // continue in same state } HeaderTokenItem::Newline(_) => { // continue in same state } HeaderTokenItem::DecodedWord(_) => { return Err(MailParseError::Generic( "Unexpected encoded word found after bracketed address", )); } } } AddrParseState::NameWithEncodedWord => match hti { HeaderTokenItem::Char(c) => { if c == '<' { state = AddrParseState::BracketedAddr; name = addr.map(|s| s.trim_end().to_owned()); addr = Some(String::new()); } else if c == ':' { if in_group { return Err(MailParseError::Generic("Found unexpected nested group")); } let group_addrs = addrparse_inner(it, true)?; state = AddrParseState::Initial; result.push(MailAddr::Group(GroupInfo::new( addr.unwrap().trim_end().to_owned(), group_addrs .0 .into_iter() .map(|addr| match addr { MailAddr::Single(s) => s, MailAddr::Group(_) => { panic!("Unexpected nested group encountered") } }) .collect(), ))); addr = None; } else { addr.as_mut().unwrap().push(c); } } HeaderTokenItem::Whitespace(ws) => { addr.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { addr.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(word) => { addr.as_mut().unwrap().push_str(&word); } }, AddrParseState::Unquoted => { match hti { HeaderTokenItem::Char(c) => { if c == '<' { state = AddrParseState::BracketedAddr; name = addr.map(|s| s.trim_end().to_owned()); addr = Some(String::new()); } else if c == ',' { state = AddrParseState::Initial; result.push(MailAddr::Single(SingleInfo::new( None, addr.unwrap().trim_end().to_owned(), )?)); addr = None; } else if c == ';' { result.push(MailAddr::Single(SingleInfo::new( None, addr.unwrap().trim_end().to_owned(), )?)); if in_group { return Ok(MailAddrList(result)); } // Technically not valid, but occurs in real-world corpus, so handle it gracefully state = AddrParseState::Initial; addr = None; } else if c == ':' { if in_group { return Err(MailParseError::Generic( "Found unexpected nested group", )); } let group_addrs = addrparse_inner(it, true)?; state = AddrParseState::Initial; result.push(MailAddr::Group(GroupInfo::new( addr.unwrap().trim_end().to_owned(), group_addrs .0 .into_iter() .map(|addr| match addr { MailAddr::Single(s) => s, MailAddr::Group(_) => { panic!("Unexpected nested group encountered") } }) .collect(), ))); addr = None; } else if c == '(' { comment_return = Some(AddrParseState::Unquoted); state = AddrParseState::Comment; } else { addr.as_mut().unwrap().push(c); } } HeaderTokenItem::Whitespace(ws) => { addr.as_mut().unwrap().push_str(ws); } HeaderTokenItem::Newline(ws) => { addr.as_mut().unwrap().push_str(&ws); } HeaderTokenItem::DecodedWord(word) => { state = AddrParseState::NameWithEncodedWord; addr.as_mut().unwrap().push_str(&word); } } } AddrParseState::Comment => { match hti { HeaderTokenItem::Char(c) => { if c == ')' { state = comment_return.take().unwrap(); } } HeaderTokenItem::Whitespace(_) => { // ignore and stay in same state } HeaderTokenItem::Newline(_) => { // ignore and stay in same state } HeaderTokenItem::DecodedWord(_) => { // ignore and stay in same state } } } } hti = match it.next() { None => break, Some(v) => v, }; } if in_group { return Err(MailParseError::Generic("Found unterminated group address")); } match state { AddrParseState::QuotedName | AddrParseState::EscapedChar | AddrParseState::AfterQuotedName | AddrParseState::BracketedAddr | AddrParseState::Comment | AddrParseState::NameWithEncodedWord => Err(MailParseError::Generic( "Address string unexpectedly terminated", )), AddrParseState::Unquoted => { result.push(MailAddr::Single(SingleInfo::new( None, addr.unwrap().trim_end().to_owned(), )?)); Ok(MailAddrList(result)) } _ => Ok(MailAddrList(result)), } } #[cfg(test)] mod tests { use super::*; #[test] fn parse_basic() { assert_eq!( addrparse("foo bar ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()).unwrap() )]) ); assert_eq!( addrparse("\"foo bar\" ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()).unwrap() )]) ); assert_eq!( addrparse("foo@bar.com ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(None, "foo@bar.com".to_string()).unwrap() )]) ); assert_eq!( addrparse("foo ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("foo".to_string()), "bar@baz.com".to_string()).unwrap() )]) ); assert_eq!( addrparse("\"foo\" ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("foo".to_string()), "bar@baz.com".to_string()).unwrap() )]) ); assert_eq!( addrparse("\"foo \" ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("foo ".to_string()), "bar@baz.com".to_string()).unwrap() )]) ); } #[test] fn parse_backslashes() { assert_eq!( addrparse(r#" "First \"nick\" Last" "#).unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new( Some("First \"nick\" Last".to_string()), "user@host.tld".to_string() ) .unwrap() )]) ); assert_eq!( addrparse(r#" First \"nick\" Last "#).unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new( Some("First \\\"nick\\\" Last".to_string()), "user@host.tld".to_string() ) .unwrap() )]) ); } #[test] fn parse_multi() { assert_eq!( addrparse("foo , jo@e, baz ").unwrap(), MailAddrList(vec![ MailAddr::Single( SingleInfo::new(Some("foo".to_string()), "ba@r".to_string()).unwrap() ), MailAddr::Single(SingleInfo::new(None, "jo@e".to_string()).unwrap()), MailAddr::Single( SingleInfo::new(Some("baz".to_string()), "qu@ux".to_string()).unwrap() ), ]) ); } #[test] fn parse_empty_group() { assert_eq!( addrparse("empty-group:;").unwrap(), MailAddrList(vec![MailAddr::Group(GroupInfo::new( "empty-group".to_string(), vec![] ))]) ); assert_eq!( addrparse(" empty-group : ; ").unwrap(), MailAddrList(vec![MailAddr::Group(GroupInfo::new( "empty-group".to_string(), vec![] ))]) ); } #[test] fn parse_simple_group() { assert_eq!( addrparse("bar-group: foo ;").unwrap(), MailAddrList(vec![MailAddr::Group(GroupInfo::new( "bar-group".to_string(), vec![SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(),] ))]) ); assert_eq!( addrparse("bar-group: foo , baz@bar.com;").unwrap(), MailAddrList(vec![MailAddr::Group(GroupInfo::new( "bar-group".to_string(), vec![ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(), SingleInfo::new(None, "baz@bar.com".to_string()).unwrap(), ] ))]) ); } #[test] fn parse_mixed() { assert_eq!( addrparse("joe@bloe.com, bar-group: foo ;").unwrap(), MailAddrList(vec![ MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()), MailAddr::Group(GroupInfo::new( "bar-group".to_string(), vec![ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()) .unwrap(), ] )), ]) ); assert_eq!( addrparse("bar-group: foo ; joe@bloe.com").unwrap(), MailAddrList(vec![ MailAddr::Group(GroupInfo::new( "bar-group".to_string(), vec![ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()) .unwrap(), ] )), MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()), ]) ); assert_eq!( addrparse("flim@flam.com, bar-group: foo ; joe@bloe.com").unwrap(), MailAddrList(vec![ MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string()).unwrap()), MailAddr::Group(GroupInfo::new( "bar-group".to_string(), vec![ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()) .unwrap(), ] )), MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()), ]) ); assert_eq!( addrparse("first-group:; flim@flam.com, bar-group: foo ; joe@bloe.com, final-group: zi@p, za@p, \"Zaphod\" ;").unwrap(), MailAddrList(vec![ MailAddr::Group(GroupInfo::new("first-group".to_string(), vec![])), MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string()).unwrap()), MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()).unwrap(), ])), MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string()).unwrap()), MailAddr::Group(GroupInfo::new("final-group".to_string(), vec![ SingleInfo::new(None, "zi@p".to_string()).unwrap(), SingleInfo::new(None, "za@p".to_string()).unwrap(), SingleInfo::new(Some("Zaphod".to_string()), "zaphod@beeblebrox".to_string()).unwrap(), ])), ]) ); } #[test] fn real_world_examples() { // taken from a real "From" header. This might not be valid according to the RFC // but obviously made it through the internet so we should at least not crash. assert_eq!( addrparse("\"The Foo of Bar\" Course Staff ").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new( Some("The Foo of Bar Course Staff".to_string()), "foo-no-reply@bar.edx.org".to_string() ) .unwrap() )]) ); // This one has a comment tacked on to the end. Adding proper support for comments seems // complicated so I just added trailer comment support. assert_eq!( addrparse("John Doe (GitHub Staff)").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new( Some("John Doe".to_string()), "support@github.com".to_string() ) .unwrap() )]) ); // Taken from a real world "To" header. It was spam, but still... assert_eq!( addrparse("foo@bar.com;").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(None, "foo@bar.com".to_string()).unwrap() )]) ); // From https://github.com/deltachat/deltachat-core-rust/pull/1476#issuecomment-629681157 assert_eq!( addrparse("mailer-daemon@hq5.merlinux.eu (mail delivery system)").unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(None, "mailer-daemon@hq5.merlinux.eu".to_string()).unwrap() )]) ); } #[test] fn stringify_single() { let tc = SingleInfo::new(Some("John Doe".to_string()), "john@doe.com".to_string()).unwrap(); assert_eq!(tc.to_string(), r#""John Doe" "#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]) ); let tc = SingleInfo::new( Some(r#"John "Jack" Doe"#.to_string()), "john@doe.com".to_string(), ) .unwrap(); assert_eq!(tc.to_string(), r#""John \"Jack\" Doe" "#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]) ); let tc = SingleInfo::new(None, "foo@bar.com".to_string()).unwrap(); assert_eq!(tc.to_string(), r#"foo@bar.com"#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]) ); } #[test] fn stringify_group() { let tc = GroupInfo::new( "group-name".to_string(), vec![ SingleInfo::new(None, "foo@bar.com".to_string()).unwrap(), SingleInfo::new(Some("A".to_string()), "a@b".to_string()).unwrap(), ], ); assert_eq!(tc.to_string(), r#""group-name": foo@bar.com, "A" ;"#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]) ); let tc = GroupInfo::new("empty-group".to_string(), vec![]); assert_eq!(tc.to_string(), r#""empty-group":;"#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]) ); let tc = GroupInfo::new(r#"group-with"quote"#.to_string(), vec![]); assert_eq!(tc.to_string(), r#""group-with\"quote":;"#); assert_eq!( addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]) ); } #[test] fn stringify_list() { let tc = MailAddrList(vec![ MailAddr::Group(GroupInfo::new( "marvel".to_string(), vec![ SingleInfo::new(None, "ironman@marvel.com".to_string()).unwrap(), SingleInfo::new(None, "spiderman@marvel.com".to_string()).unwrap(), ], )), MailAddr::Single( SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string()).unwrap(), ), MailAddr::Group(GroupInfo::new( "dc".to_string(), vec![ SingleInfo::new(None, "batman@dc.com".to_string()).unwrap(), SingleInfo::new(None, "superman@dc.com".to_string()).unwrap(), ], )), MailAddr::Single( SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string()).unwrap(), ), ]); assert_eq!( tc.to_string(), r#""marvel": ironman@marvel.com, spiderman@marvel.com; "b-man" , "dc": batman@dc.com, superman@dc.com; "d-woman" "# ); } #[test] fn count_addrs() { let tc = MailAddrList(vec![ MailAddr::Group(GroupInfo::new( "marvel".to_string(), vec![ SingleInfo::new(None, "ironman@marvel.com".to_string()).unwrap(), SingleInfo::new(None, "spiderman@marvel.com".to_string()).unwrap(), ], )), MailAddr::Single( SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string()).unwrap(), ), MailAddr::Group(GroupInfo::new( "dc".to_string(), vec![ SingleInfo::new(None, "batman@dc.com".to_string()).unwrap(), SingleInfo::new(None, "superman@dc.com".to_string()).unwrap(), ], )), MailAddr::Single( SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string()).unwrap(), ), ]); assert_eq!(tc.count_addrs(), 6); assert_eq!(tc.extract_single_info(), None); let tc = MailAddrList(vec![]); assert_eq!(tc.count_addrs(), 0); assert_eq!(tc.extract_single_info(), None); let tc = MailAddrList(vec![MailAddr::Group(GroupInfo::new( "group".to_string(), vec![SingleInfo::new(None, "foo@bar.com".to_string()).unwrap()], ))]); assert_eq!(tc.count_addrs(), 1); assert_eq!(tc.extract_single_info(), None); let tc = MailAddrList(vec![MailAddr::Single( SingleInfo::new(None, "foo@bar.com".to_string()).unwrap(), )]); assert_eq!(tc.count_addrs(), 1); assert_eq!( tc.extract_single_info(), Some(SingleInfo::new(None, "foo@bar.com".to_string()).unwrap()) ); let tc = MailAddrList(vec![ MailAddr::Group(GroupInfo::new("group".to_string(), vec![])), MailAddr::Group(GroupInfo::new("group".to_string(), vec![])), ]); assert_eq!(tc.count_addrs(), 0); assert_eq!(tc.extract_single_info(), None); } #[test] fn parse_invalid() { assert!(addrparse("foo").is_err()); assert!(addrparse("foo ").is_err()); assert!(addrparse("group: foo ;").is_err()); } #[test] fn parse_with_encoded() { let (parsed, _) = crate::parse_header( b"From: =?UTF-8?B?0JjQvNGPLCDQpNCw0LzQuNC70LjRjw==?= ", ) .unwrap(); assert_eq!( addrparse_header(&parsed).unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new( Some("Имя, Фамилия".to_string()), "foobar@example.com".to_string() ) .unwrap() )]) ); } #[test] fn parse_quoted_encoded() { let (parsed, _) = crate::parse_header(b"From: \"=?utf-8?q?G=C3=B6tz?= C\" ").unwrap(); assert_eq!( addrparse_header(&parsed).unwrap(), MailAddrList(vec![MailAddr::Single( SingleInfo::new(Some("Götz C".to_string()), "g@c.de".to_string()).unwrap() )]) ); } #[test] fn parse_second_encoded() { let (parsed, _) = crate::parse_header( b"To: foo ,=?UTF-8?B?Zm9v8J+Qm2Jhcg==?= ", ) .unwrap(); assert_eq!( addrparse_header(&parsed).unwrap(), MailAddrList(vec![ MailAddr::Single( SingleInfo::new(Some("foo".to_string()), "foo@example.org".to_string()) .unwrap() ), MailAddr::Single( SingleInfo::new( Some("foo\u{1f41b}bar".to_string()), "bar@example.org".to_string() ) .unwrap() ) ]) ); } } mailparse-0.14.0/src/body.rs000064400000000000000000000130021046102023000137510ustar 00000000000000use crate::{MailParseError, ParsedContentType}; use charset::{decode_ascii, Charset}; /// Represents the body of an email (or mail subpart) pub enum Body<'a> { /// A body with 'base64' Content-Transfer-Encoding. Base64(EncodedBody<'a>), /// A body with 'quoted-printable' Content-Transfer-Encoding. QuotedPrintable(EncodedBody<'a>), /// A body with '7bit' Content-Transfer-Encoding. SevenBit(TextBody<'a>), /// A body with '8bit' Content-Transfer-Encoding. EightBit(TextBody<'a>), /// A body with 'binary' Content-Transfer-Encoding. Binary(BinaryBody<'a>), } impl<'a> Body<'a> { pub fn new( body: &'a [u8], ctype: &'a ParsedContentType, transfer_encoding: &Option, ) -> Body<'a> { transfer_encoding .as_ref() .map(|encoding| match encoding.as_ref() { "base64" => Body::Base64(EncodedBody { decoder: decode_base64, body, ctype, }), "quoted-printable" => Body::QuotedPrintable(EncodedBody { decoder: decode_quoted_printable, body, ctype, }), "7bit" => Body::SevenBit(TextBody { body, ctype }), "8bit" => Body::EightBit(TextBody { body, ctype }), "binary" => Body::Binary(BinaryBody { body, ctype }), _ => Body::get_default(body, ctype), }) .unwrap_or_else(|| Body::get_default(body, ctype)) } fn get_default(body: &'a [u8], ctype: &'a ParsedContentType) -> Body<'a> { Body::SevenBit(TextBody { body, ctype }) } } /// Struct that holds the encoded body representation of the message (or message subpart). pub struct EncodedBody<'a> { decoder: fn(&[u8]) -> Result, MailParseError>, ctype: &'a ParsedContentType, body: &'a [u8], } impl<'a> EncodedBody<'a> { /// Get the body Content-Type pub fn get_content_type(&self) -> &'a ParsedContentType { self.ctype } /// Get the raw body of the message exactly as it is written in the message (or message subpart). pub fn get_raw(&self) -> &'a [u8] { self.body } /// Get the decoded body of the message (or message subpart). pub fn get_decoded(&self) -> Result, MailParseError> { (self.decoder)(self.body) } /// Get the body of the message as a Rust string. /// This function tries to decode the body and then converts /// the result into a Rust UTF-8 string using the charset in the Content-Type /// (or "us-ascii" if the charset was missing or not recognized). /// This operation returns a valid result only if the decoded body /// has a text format. pub fn get_decoded_as_string(&self) -> Result { get_body_as_string(&self.get_decoded()?, &self.ctype) } } /// Struct that holds the textual body representation of the message (or message subpart). pub struct TextBody<'a> { ctype: &'a ParsedContentType, body: &'a [u8], } impl<'a> TextBody<'a> { /// Get the body Content-Type pub fn get_content_type(&self) -> &'a ParsedContentType { self.ctype } /// Get the raw body of the message exactly as it is written in the message (or message subpart). pub fn get_raw(&self) -> &'a [u8] { self.body } /// Get the body of the message as a Rust string. /// This function converts the body into a Rust UTF-8 string using the charset /// in the Content-Type /// (or "us-ascii" if the charset was missing or not recognized). pub fn get_as_string(&self) -> Result { get_body_as_string(self.body, &self.ctype) } } /// Struct that holds a binary body representation of the message (or message subpart). pub struct BinaryBody<'a> { ctype: &'a ParsedContentType, body: &'a [u8], } impl<'a> BinaryBody<'a> { /// Get the body Content-Type pub fn get_content_type(&self) -> &'a ParsedContentType { self.ctype } /// Get the raw body of the message exactly as it is written in the message (or message subpart). pub fn get_raw(&self) -> &'a [u8] { self.body } /// Get the body of the message as a Rust string. This function attempts /// to convert the body into a Rust UTF-8 string using the charset in the /// Content-Type header (or "us-ascii" as default). However, this may not /// always work for "binary" data. The API is provided anyway for /// convenient handling of real-world emails that may provide textual data /// with a binary transfer encoding, but use this at your own risk! pub fn get_as_string(&self) -> Result { get_body_as_string(self.body, &self.ctype) } } fn decode_base64(body: &[u8]) -> Result, MailParseError> { let cleaned = body .iter() .filter(|c| !c.is_ascii_whitespace()) .cloned() .collect::>(); Ok(data_encoding::BASE64_MIME.decode(&cleaned)?) } fn decode_quoted_printable(body: &[u8]) -> Result, MailParseError> { Ok(quoted_printable::decode( body, quoted_printable::ParseMode::Robust, )?) } fn get_body_as_string(body: &[u8], ctype: &ParsedContentType) -> Result { let cow = if let Some(charset) = Charset::for_label(ctype.charset.as_bytes()) { let (cow, _, _) = charset.decode(body); cow } else { decode_ascii(body) }; Ok(cow.into_owned()) } mailparse-0.14.0/src/dateparse.rs000064400000000000000000000160711046102023000147750ustar 00000000000000use crate::MailParseError; enum DateParseState { Date, Month, Year, Hour, Minute, Second, Timezone, } fn days_in_month(month: i64, year: i64) -> i64 { match month { 0 | 2 | 4 | 6 | 7 | 9 | 11 => 31, 3 | 5 | 8 | 10 => 30, 1 => { if (year % 400) == 0 { 29 } else if (year % 100) == 0 { 28 } else if (year % 4) == 0 { 29 } else { 28 } } _ => 0, } } fn seconds_to_date(year: i64, month: i64, day: i64) -> i64 { let mut result: i64 = 0; for y in 1970..2001 { if y == year { break; } result += 86400 * 365; if (y % 4) == 0 { result += 86400; } } let mut y = 2001; while y < year { if year - y >= 400 { result += (86400 * 365 * 400) + (86400 * 97); y += 400; continue; } if year - y >= 100 { result += (86400 * 365 * 100) + (86400 * 24); y += 100; continue; } if year - y >= 4 { result += (86400 * 365 * 4) + (86400); y += 4; continue; } result += 86400 * 365; y += 1; } for m in 0..month { result += 86400 * days_in_month(m, year) } result + 86400 * (day - 1) } /// Convert a date field from an email header into a UNIX epoch timestamp. /// This function handles the most common formatting of date fields found in /// email headers. It may fail to parse some of the more creative formattings. /// /// # Examples /// ``` /// use mailparse::dateparse; /// assert_eq!(dateparse("Sun, 02 Oct 2016 07:06:22 -0700 (PDT)").unwrap(), 1475417182); /// ``` pub fn dateparse(date: &str) -> Result { let mut result = 0; let mut month = 0; let mut day_of_month = 0; let mut state = DateParseState::Date; for tok in date.split(|c| c == ' ' || c == ':') { if tok.is_empty() { continue; } match state { DateParseState::Date => { if let Ok(v) = tok.parse::() { day_of_month = v; state = DateParseState::Month; }; continue; } DateParseState::Month => { month = match tok.to_uppercase().as_str() { "JAN" | "JANUARY" => 0, "FEB" | "FEBRUARY" => 1, "MAR" | "MARCH" => 2, "APR" | "APRIL" => 3, "MAY" => 4, "JUN" | "JUNE" => 5, "JUL" | "JULY" => 6, "AUG" | "AUGUST" => 7, "SEP" | "SEPTEMBER" => 8, "OCT" | "OCTOBER" => 9, "NOV" | "NOVEMBER" => 10, "DEC" | "DECEMBER" => 11, _ => return Err(MailParseError::Generic("Unrecognized month")), }; state = DateParseState::Year; continue; } DateParseState::Year => { let year = match tok.parse::() { Ok(v) if v < 70 => 2000 + v, Ok(v) if v < 100 => 1900 + v, Ok(v) if v < 1970 => return Err(MailParseError::Generic("Disallowed year")), Ok(v) => v, Err(_) => return Err(MailParseError::Generic("Invalid year")), }; result = seconds_to_date(i64::from(year), i64::from(month), i64::from(day_of_month)); state = DateParseState::Hour; continue; } DateParseState::Hour => { let hour = match tok.parse::() { Ok(v) => v, Err(_) => return Err(MailParseError::Generic("Invalid hour")), }; result += 3600 * i64::from(hour); state = DateParseState::Minute; continue; } DateParseState::Minute => { let minute = match tok.parse::() { Ok(v) => v, Err(_) => return Err(MailParseError::Generic("Invalid minute")), }; result += 60 * i64::from(minute); state = DateParseState::Second; continue; } DateParseState::Second => { let second = match tok.parse::() { Ok(v) => v, Err(_) => return Err(MailParseError::Generic("Invalid second")), }; result += i64::from(second); state = DateParseState::Timezone; continue; } DateParseState::Timezone => { let (tz, tz_sign) = match tok.parse::() { Ok(v) if v < 0 => (-v, -1), Ok(v) => (v, 1), Err(_) => { match tok.to_uppercase().as_str() { // This list taken from IETF RFC 822 "UTC" | "UT" | "GMT" | "Z" => (0, 1), "EDT" => (400, -1), "EST" | "CDT" => (500, -1), "CST" | "MDT" => (600, -1), "MST" | "PDT" => (700, -1), "PST" => (800, -1), "A" => (100, -1), "M" => (1200, -1), "N" => (100, 1), "Y" => (1200, 1), _ => return Err(MailParseError::Generic("Invalid timezone")), } } }; let tz_hours = tz / 100; let tz_mins = tz % 100; let tz_delta = (tz_hours * 3600) + (tz_mins * 60); if tz_sign < 0 { result += i64::from(tz_delta); } else { result -= i64::from(tz_delta); } break; } } } Ok(result) } #[cfg(test)] mod tests { use super::*; #[test] fn parse_dates() { assert_eq!( dateparse("Sun, 25 Sep 2016 18:36:33 -0400").unwrap(), 1474842993 ); assert_eq!( dateparse("Fri, 01 Jan 2100 11:12:13 +0000").unwrap(), 4102485133 ); assert_eq!( dateparse("Fri, 31 Dec 2100 00:00:00 +0000").unwrap(), 4133894400 ); assert_eq!( dateparse("Fri, 31 Dec 2399 00:00:00 +0000").unwrap(), 13569379200 ); assert_eq!( dateparse("Fri, 31 Dec 2400 00:00:00 +0000").unwrap(), 13601001600 ); assert_eq!(dateparse("17 Sep 2016 16:05:38 -1000").unwrap(), 1474164338); assert_eq!( dateparse("Fri, 30 Nov 2012 20:57:23 GMT").unwrap(), 1354309043 ); } } mailparse-0.14.0/src/header.rs000064400000000000000000000224671046102023000142630ustar 00000000000000use charset::Charset; use crate::find_from; /// Some types of tokens that might be present in a MIME header. This /// list is incomplete relative the types of tokens defined in the RFC, /// but can be expanded as needed. Currently the list of tokens is /// sufficient to properly handle encoded words and line unfolding. pub enum HeaderToken<'a> { /// A bunch of not-encoded text. This can include whitespace and /// non-whitespace chars. Text(&'a str), /// A bunch of text that is purely whitespace. Whitespace(&'a str), /// An end-of-line marker. If it contains None, then it represents /// a raw CRLF that has not yet been line-unfolded. If it contains /// a string, that represents the whitespace that was produced /// around that CRLF during line unfolding. This may include whitespace /// from the end of the previous line. Newline(Option), /// The decoded value of an encoded word found in the header. DecodedWord(String), } fn is_boundary(line: &str, ix: Option) -> bool { ix.and_then(|v| line.chars().nth(v)) .map(|c| { c.is_whitespace() || c == '"' || c == '(' || c == ')' || c == '<' || c == '>' || c == ',' }) .unwrap_or(true) } fn decode_word(encoded: &str) -> Option { let ix_delim1 = encoded.find('?')?; let ix_delim2 = find_from(encoded, ix_delim1 + 1, "?")?; let charset = &encoded[0..ix_delim1]; let transfer_coding = &encoded[ix_delim1 + 1..ix_delim2]; let input = &encoded[ix_delim2 + 1..]; let decoded = match transfer_coding { "B" | "b" => data_encoding::BASE64_MIME.decode(input.as_bytes()).ok()?, "Q" | "q" => { // The quoted_printable module does a trim_end on the input, so if // that affects the output we should save and restore the trailing // whitespace let to_decode = input.replace("_", " "); let trimmed = to_decode.trim_end(); let mut d = quoted_printable::decode(&trimmed, quoted_printable::ParseMode::Robust); if d.is_ok() && to_decode.len() != trimmed.len() { d.as_mut() .unwrap() .extend_from_slice(to_decode[trimmed.len()..].as_bytes()); } d.ok()? } _ => return None, }; let charset = Charset::for_label_no_replacement(charset.as_bytes())?; let (cow, _) = charset.decode_without_bom_handling(&decoded); Some(cow.into_owned()) } /// Tokenizes a single line of the header and produces a vector of /// tokens. Because this only processes a single line, it will never /// generate `HeaderToken::Newline` tokens. fn tokenize_header_line(line: &str) -> Vec { fn maybe_whitespace(text: &str) -> HeaderToken { if text.trim_end().len() == 0 { HeaderToken::Whitespace(text) } else { HeaderToken::Text(text) } } let mut result = Vec::new(); let mut ix_search = 0; loop { match find_from(line, ix_search, "=?") { Some(v) => { let ix_begin = v + 2; if !is_boundary(line, ix_begin.checked_sub(3)) { result.push(HeaderToken::Text(&line[ix_search..ix_begin])); ix_search = ix_begin; continue; } result.push(maybe_whitespace(&line[ix_search..ix_begin - 2])); let mut ix_end_search = ix_begin; loop { match find_from(line, ix_end_search, "?=") { Some(ix_end) => { if !is_boundary(line, ix_end.checked_add(2)) { ix_end_search = ix_end + 2; continue; } match decode_word(&line[ix_begin..ix_end]) { Some(v) => result.push(HeaderToken::DecodedWord(v)), None => { result.push(HeaderToken::Text(&line[ix_begin - 2..ix_end + 2])); } }; ix_search = ix_end; } None => { result.push(HeaderToken::Text("=?")); ix_search = ix_begin - 2; } }; break; } ix_search += 2; continue; } None => { result.push(maybe_whitespace(&line[ix_search..])); break; } }; } result } /// Tokenize an entire header, including newlines. This includes /// decoded words, but doesn't do line unfolding, so any `HeaderToken::Newline` /// tokens will always have a `None` inner value. Whitespace preceding /// the newline will be in a separate `HeaderToken::Whitespace` or /// `HeaderToken::Text` token. Semantically the `HeaderToken::Newline` /// tokens that come out of this still represent the CRLF newline. fn tokenize_header(value: &str) -> Vec { let mut tokens = Vec::new(); let mut lines = value.lines(); let mut first = true; while let Some(line) = lines.next().map(str::trim_start) { if first { first = false; } else { tokens.push(HeaderToken::Newline(None)); } let mut line_tokens = tokenize_header_line(line); tokens.append(&mut line_tokens); } tokens } /// Takes in a list of tokens and processes them to normalize the whitespace /// per the RFC. This includes dropping any whitespace between two adjacent /// encoded words, and also doing line unfolding. As a result, the `HeaderToken::Newline` /// tokens that come out of this no longer represent the CRLF newline, but instead /// their contained `Option` will be populated with whatever whitespace gets /// generated from unfolding the line. This might include end-of-line whitespace from /// the previous line. fn normalize_header_whitespace(tokens: Vec) -> Vec { let mut result = Vec::::new(); let mut saved_token = None; // See RFC 2047 section 6.2 for what's going on here. Basically whitespace // that's between two adjacent encoded words should be thrown away. for tok in tokens { match &tok { HeaderToken::Text(_) => { // If we saved some whitespace, put it in since we encountered // non-whitespace chars that weren't part of an encoded word. if let Some(HeaderToken::Whitespace(_)) = &saved_token { result.push(saved_token.unwrap()); } else if let Some(HeaderToken::Newline(Some(_))) = &saved_token { result.push(saved_token.unwrap()); } // Also put the actual non-whitespace chars. result.push(tok); saved_token = None; } HeaderToken::Whitespace(_) => { // If the previous token was an encoded word, save the whitespace // as whitespace that's between two encoded words should be dropped. // We only know if this whitespace goes into `result` after parsing // the next token. if let Some(HeaderToken::DecodedWord(_)) = saved_token { saved_token = Some(tok); } else { result.push(tok); saved_token = None; } } HeaderToken::Newline(_) => { // If we saved whitespace at the end of the line, add an extra space // to it from the line unfolding. if let Some(HeaderToken::Whitespace(ws)) = saved_token { let new_ws = ws.to_owned() + " "; saved_token = Some(HeaderToken::Newline(Some(new_ws))); // If the end of the line had an encoded word, save the space from // line unfolding. } else if let Some(HeaderToken::DecodedWord(_)) = saved_token { saved_token = Some(HeaderToken::Newline(Some(" ".to_string()))); } else { result.push(HeaderToken::Newline(Some(" ".to_string()))); saved_token = None; } } HeaderToken::DecodedWord(_) => { // Note that saved_token might be a whitespace thing here. But we // throw it away because that means it fell between two adjacent // encoded words. saved_token = Some(HeaderToken::DecodedWord(String::new())); result.push(tok); } } } result } pub fn normalized_tokens(raw_value: &str) -> Vec { normalize_header_whitespace(tokenize_header(&raw_value)) } #[cfg(test)] mod tests { use super::*; #[test] fn test_is_boundary_multibyte() { // Bug #26, Incorrect unwrap() guard in is_boundary() // 6x'REPLACEMENT CHARACTER', but 18 bytes of data: let test = "\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}"; assert!(is_boundary(test, Some(8))); } } mailparse-0.14.0/src/headers.rs000064400000000000000000000076141046102023000144430ustar 00000000000000use crate::{MailHeader, MailHeaderMap}; use std::fmt; use std::slice; /// A struct that wrapps the header portion of a message and provides /// utility functions to look up specific headers. pub struct Headers<'a> { raw_bytes: &'a [u8], headers: &'a [MailHeader<'a>], } impl<'a> Headers<'a> { pub(crate) fn new(raw_bytes: &'a [u8], headers: &'a [MailHeader<'a>]) -> Headers<'a> { Headers { raw_bytes, headers } } /// Returns the raw, unparsed bytes that make up the header block of /// the message. This includes everything up to and including the empty /// line at the end of the header block. /// /// # Examples /// ``` /// use mailparse::{parse_mail, headers::Headers}; /// let mail = parse_mail(concat!( /// "SubJECT : foo\n", /// "\n", /// "Body starts here").as_bytes()) /// .unwrap(); /// assert_eq!(mail.get_headers().get_raw_bytes(), b"SubJECT : foo\n\n"); pub fn get_raw_bytes(&self) -> &'a [u8] { self.raw_bytes } } /// Allows iterating over the individual `MailHeader` items in this block of /// headers. /// /// # Examples /// ``` /// use mailparse::{parse_mail, headers::Headers}; /// let mail = parse_mail(concat!( /// "Subject: foo\n", /// "Another header: bar\n", /// "\n", /// "Body starts here").as_bytes()) /// .unwrap(); /// let mut iter = mail.get_headers().into_iter(); /// assert_eq!(iter.next().unwrap().get_key(), "Subject"); /// assert_eq!(iter.next().unwrap().get_key(), "Another header"); /// ``` impl<'a> IntoIterator for Headers<'a> { type Item = &'a MailHeader<'a>; type IntoIter = slice::Iter<'a, MailHeader<'a>>; fn into_iter(self) -> Self::IntoIter { self.headers.into_iter() } } /// Allows formatting and printing the `Headers` struct items. /// /// # Examples /// ``` /// use mailparse::parse_mail; /// let mail = parse_mail(concat!( /// "Subject: foo\n", /// "Another header: bar\n", /// "\n", /// "Body starts here").as_bytes()) /// .unwrap(); /// let mut headers = mail.get_headers(); /// assert_eq!(format!("{:?}", headers), "Headers { \ /// headers: [MailHeader { key: \"Subject\", value: \"foo\" }, \ /// MailHeader { key: \"Another header\", value: \"bar\" }] }"); /// ``` impl<'a> fmt::Debug for Headers<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Headers") .field("headers", &self.headers) .finish() } } impl<'a> MailHeaderMap for Headers<'a> { /// # Examples /// ``` /// use mailparse::{parse_mail, MailHeaderMap, headers::Headers}; /// let mail = parse_mail(concat!( /// "Subject: Test\n", /// "\n", /// "This is a test message").as_bytes()) /// .unwrap(); /// assert_eq!(mail.get_headers().get_first_value("Subject"), Some("Test".to_string())); /// ``` fn get_first_value(&self, key: &str) -> Option { self.headers.get_first_value(key) } fn get_first_header(&self, key: &str) -> Option<&MailHeader> { self.headers.get_first_header(key) } /// # Examples /// ``` /// use mailparse::{parse_mail, MailHeaderMap, headers::Headers}; /// let mail = parse_mail(concat!( /// "Key: Value1\n", /// "Key: Value2").as_bytes()) /// .unwrap(); /// assert_eq!(mail.get_headers().get_all_values("Key"), /// vec!["Value1".to_string(), "Value2".to_string()]); /// ``` fn get_all_values(&self, key: &str) -> Vec { self.headers.get_all_values(key) } fn get_all_headers(&self, key: &str) -> Vec<&MailHeader> { self.headers.get_all_headers(key) } } mailparse-0.14.0/src/lib.rs000064400000000000000000002536261046102023000136040ustar 00000000000000#![forbid(unsafe_code)] extern crate charset; extern crate data_encoding; extern crate quoted_printable; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; use std::error; use std::fmt; use charset::{decode_latin1, Charset}; mod addrparse; pub mod body; mod dateparse; mod header; pub mod headers; mod msgidparse; pub use crate::addrparse::{ addrparse, addrparse_header, GroupInfo, MailAddr, MailAddrList, SingleInfo, }; use crate::body::Body; pub use crate::dateparse::dateparse; use crate::header::HeaderToken; use crate::headers::Headers; pub use crate::msgidparse::{msgidparse, MessageIdList}; /// An error type that represents the different kinds of errors that may be /// encountered during message parsing. #[derive(Debug)] pub enum MailParseError { /// Data that was specified as being in the quoted-printable transfer-encoding /// could not be successfully decoded as quoted-printable data. QuotedPrintableDecodeError(quoted_printable::QuotedPrintableError), /// Data that was specified as being in the base64 transfer-encoding could /// not be successfully decoded as base64 data. Base64DecodeError(data_encoding::DecodeError), /// An error occurred when converting the raw byte data to Rust UTF-8 string /// format using the charset specified in the message. EncodingError(std::borrow::Cow<'static, str>), /// Some other error occurred while parsing the message; the description string /// provides additional details. Generic(&'static str), } impl fmt::Display for MailParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { MailParseError::QuotedPrintableDecodeError(ref err) => { write!(f, "QuotedPrintable decode error: {}", err) } MailParseError::Base64DecodeError(ref err) => write!(f, "Base64 decode error: {}", err), MailParseError::EncodingError(ref err) => write!(f, "Encoding error: {}", err), MailParseError::Generic(ref description) => write!(f, "{}", description), } } } impl error::Error for MailParseError { fn cause(&self) -> Option<&dyn error::Error> { match *self { MailParseError::QuotedPrintableDecodeError(ref err) => Some(err), MailParseError::Base64DecodeError(ref err) => Some(err), _ => None, } } fn source(&self) -> Option<&(dyn error::Error + 'static)> { match *self { MailParseError::QuotedPrintableDecodeError(ref err) => Some(err), MailParseError::Base64DecodeError(ref err) => Some(err), _ => None, } } } impl From for MailParseError { fn from(err: quoted_printable::QuotedPrintableError) -> MailParseError { MailParseError::QuotedPrintableDecodeError(err) } } impl From for MailParseError { fn from(err: data_encoding::DecodeError) -> MailParseError { MailParseError::Base64DecodeError(err) } } impl From> for MailParseError { fn from(err: std::borrow::Cow<'static, str>) -> MailParseError { MailParseError::EncodingError(err) } } /// A struct that represents a single header in the message. /// It holds slices into the raw byte array passed to parse_mail, and so the /// lifetime of this struct must be contained within the lifetime of the raw /// input. There are additional accessor functions on this struct to extract /// the data as Rust strings. pub struct MailHeader<'a> { key: &'a [u8], value: &'a [u8], } /// Custom Debug trait for better formatting and printing of MailHeader items. impl<'a> fmt::Debug for MailHeader<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("MailHeader") .field("key", &String::from_utf8_lossy(&self.key)) .field("value", &String::from_utf8_lossy(&self.value)) .finish() } } pub(crate) fn find_from(line: &str, ix_start: usize, key: &str) -> Option { line[ix_start..].find(key).map(|v| ix_start + v) } fn find_from_u8(line: &[u8], ix_start: usize, key: &[u8]) -> Option { assert!(!key.is_empty()); assert!(ix_start < line.len()); if line.len() < key.len() { return None; } let ix_end = line.len() - key.len(); if ix_start <= ix_end { for i in ix_start..ix_end { if line[i] == key[0] { let mut success = true; for j in 1..key.len() { if line[i + j] != key[j] { success = false; break; } } if success { return Some(i); } } } } None } #[test] fn test_find_from_u8() { assert_eq!(find_from_u8(b"hello world", 0, b"hell"), Some(0)); assert_eq!(find_from_u8(b"hello world", 0, b"o"), Some(4)); assert_eq!(find_from_u8(b"hello world", 4, b"o"), Some(4)); assert_eq!(find_from_u8(b"hello world", 5, b"o"), Some(7)); assert_eq!(find_from_u8(b"hello world", 8, b"o"), None); assert_eq!(find_from_u8(b"hello world", 10, b"d"), None); } // Like find_from_u8, but additionally filters such that `key` is at the start // of a line (preceded by `\n`) or at the start of the search space. fn find_from_u8_line_prefix(line: &[u8], ix_start: usize, key: &[u8]) -> Option { let mut start = ix_start; while let Some(ix) = find_from_u8(line, start, key) { if ix == ix_start || line[ix - 1] == b'\n' { return Some(ix); } start = ix + 1; } None } #[test] fn test_find_from_u8_line_prefix() { assert_eq!(find_from_u8_line_prefix(b"hello world", 0, b"he"), Some(0)); assert_eq!(find_from_u8_line_prefix(b"hello\nhello", 0, b"he"), Some(0)); assert_eq!(find_from_u8_line_prefix(b"hello\nhello", 1, b"he"), Some(6)); assert_eq!(find_from_u8_line_prefix(b"hello world", 0, b"wo"), None); assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 0, b"wo"), Some(6)); assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 6, b"wo"), Some(6)); assert_eq!(find_from_u8_line_prefix(b"hello\nworld", 7, b"wo"), None); } impl<'a> MailHeader<'a> { /// Get the name of the header. Note that header names are case-insensitive. /// Prefer using get_key_ref where possible for better performance. pub fn get_key(&self) -> String { decode_latin1(self.key).into_owned() } /// Get the name of the header, borrowing if it's ASCII-only. /// Note that header names are case-insensitive. pub fn get_key_ref(&self) -> Cow { decode_latin1(self.key) } pub(crate) fn decode_utf8_or_latin1(&'a self) -> Cow<'a, str> { // RFC 6532 says that header values can be UTF-8. Let's try that first, and // fall back to latin1 if that fails, for better backwards-compatibility with // older versions of this library that didn't try UTF-8. match std::str::from_utf8(self.value) { Ok(s) => Cow::Borrowed(s), Err(_) => decode_latin1(self.value), } } /// Get the value of the header. Any sequences of newlines characters followed /// by whitespace are collapsed into a single space. In effect, header values /// wrapped across multiple lines are compacted back into one line, while /// discarding the extra whitespace required by the MIME format. Additionally, /// any quoted-printable words in the value are decoded. /// Note that this function attempts to decode the header value bytes as UTF-8 /// first, and falls back to Latin-1 if the UTF-8 decoding fails. This attempts /// to be compliant with both RFC 6532 as well as older versions of this library. /// To avoid the Latin-1 fallback decoding, which may end up returning "garbage", /// prefer using the get_value_utf8 function instead, which will fail and return /// an error instead of falling back to Latin-1. /// /// # Examples /// ``` /// use mailparse::parse_header; /// let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap(); /// assert_eq!(parsed.get_key(), "Subject"); /// assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!"); /// ``` pub fn get_value(&self) -> String { let chars = self.decode_utf8_or_latin1(); self.normalize_header(chars) } fn normalize_header(&'a self, chars: Cow<'a, str>) -> String { let mut result = String::new(); for tok in header::normalized_tokens(&chars) { match tok { HeaderToken::Text(t) => { result.push_str(t); } HeaderToken::Whitespace(ws) => { result.push_str(ws); } HeaderToken::Newline(Some(ws)) => { result.push_str(&ws); } HeaderToken::Newline(None) => {} HeaderToken::DecodedWord(dw) => { result.push_str(&dw); } } } result } /// Get the value of the header. Any sequences of newlines characters followed /// by whitespace are collapsed into a single space. In effect, header values /// wrapped across multiple lines are compacted back into one line, while /// discarding the extra whitespace required by the MIME format. Additionally, /// any quoted-printable words in the value are decoded. As per RFC 6532, this /// function assumes the raw header value is encoded as UTF-8, and does that /// decoding prior to tokenization and other processing. An EncodingError is /// returned if the raw header value cannot be decoded as UTF-8. /// /// # Examples /// ``` /// use mailparse::parse_header; /// let (parsed, _) = parse_header(b"Subject: \xC2\xA1Hola, se\xC3\xB1or!").unwrap(); /// assert_eq!(parsed.get_key(), "Subject"); /// assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!"); /// ``` pub fn get_value_utf8(&self) -> Result { let chars = std::str::from_utf8(self.value).map_err(|_| { MailParseError::EncodingError(Cow::Borrowed("Invalid UTF-8 in header value")) })?; Ok(self.normalize_header(Cow::Borrowed(chars))) } /// Get the raw, unparsed value of the header key. /// /// # Examples /// ``` /// use mailparse::parse_header; /// let (parsed, _) = parse_header(b"SuBJect : =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap(); /// assert_eq!(parsed.get_key_raw(), "SuBJect ".as_bytes()); /// ``` pub fn get_key_raw(&self) -> &[u8] { self.key } /// Get the raw, unparsed value of the header value. /// /// # Examples /// ``` /// use mailparse::parse_header; /// let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap(); /// assert_eq!(parsed.get_key(), "Subject"); /// assert_eq!(parsed.get_value_raw(), "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes()); /// ``` pub fn get_value_raw(&self) -> &[u8] { self.value } } #[derive(Debug)] enum HeaderParseState { Initial, Key, PreValue, Value, ValueNewline, } /// Parse a single header from the raw data given. /// This function takes raw byte data, and starts parsing it, expecting there /// to be a MIME header key-value pair right at the beginning. It parses that /// header and returns it, along with the index at which the next header is /// expected to start. If you just want to parse a single header, you can ignore /// the second component of the tuple, which is the index of the next header. /// Error values are returned if the data could not be successfully interpreted /// as a MIME key-value pair. /// /// # Examples /// ``` /// use mailparse::parse_header; /// let (parsed, _) = parse_header(concat!( /// "Subject: Hello, sir,\n", /// " I am multiline\n", /// "Next:Header").as_bytes()) /// .unwrap(); /// assert_eq!(parsed.get_key(), "Subject"); /// assert_eq!(parsed.get_value(), "Hello, sir, I am multiline"); /// ``` pub fn parse_header(raw_data: &[u8]) -> Result<(MailHeader, usize), MailParseError> { let mut it = raw_data.iter(); let mut ix = 0; let mut c = match it.next() { None => return Err(MailParseError::Generic("Empty string provided")), Some(v) => *v, }; let mut ix_key_end = None; let mut ix_value_start = 0; let mut ix_value_end = 0; let mut state = HeaderParseState::Initial; loop { match state { HeaderParseState::Initial => { if c == b' ' { return Err(MailParseError::Generic( "Header cannot start with a space; it is \ likely an overhanging line from a \ previous header", )); }; state = HeaderParseState::Key; continue; } HeaderParseState::Key => { if c == b':' { ix_key_end = Some(ix); state = HeaderParseState::PreValue; } else if c == b'\n' { // Technically this is invalid. We'll handle it gracefully // since it does appear to happen in the wild and other // MTAs deal with it. Our handling is to just treat everything // encountered so far on this line as the header key, and // leave the value empty. ix_key_end = Some(ix); ix_value_start = ix; ix_value_end = ix; ix += 1; break; } } HeaderParseState::PreValue => { if c != b' ' { ix_value_start = ix; ix_value_end = ix; state = HeaderParseState::Value; continue; } } HeaderParseState::Value => { if c == b'\n' { state = HeaderParseState::ValueNewline; } else if c != b'\r' { ix_value_end = ix + 1; } } HeaderParseState::ValueNewline => { if c == b' ' || c == b'\t' { state = HeaderParseState::Value; continue; } else { break; } } } ix += 1; c = match it.next() { None => break, Some(v) => *v, }; } match ix_key_end { Some(v) => Ok(( MailHeader { key: &raw_data[0..v], value: &raw_data[ix_value_start..ix_value_end], }, ix, )), None => Ok(( // Technically this is invalid. We'll handle it gracefully // since we handle the analogous situation above. Our handling // is to just treat everything encountered on this line as // the header key, and leave the value empty. MailHeader { key: &raw_data[0..ix], value: &raw_data[ix..ix], }, ix, )), } } /// A trait that is implemented by the [MailHeader] slice. These functions are /// also available on Vec which is returned by the parse_headers /// function. It provides a map-like interface to look up header values by their /// name. pub trait MailHeaderMap { /// Look through the list of headers and return the value of the first one /// that matches the provided key. It returns Ok(None) if the no matching /// header was found. Header names are matched case-insensitively. /// /// # Examples /// ``` /// use mailparse::{parse_mail, MailHeaderMap}; /// let headers = parse_mail(concat!( /// "Subject: Test\n", /// "\n", /// "This is a test message").as_bytes()) /// .unwrap().headers; /// assert_eq!(headers.get_first_value("Subject"), Some("Test".to_string())); /// ``` fn get_first_value(&self, key: &str) -> Option; /// Similar to `get_first_value`, except it returns a reference to the /// MailHeader struct instead of just extracting the value. fn get_first_header(&self, key: &str) -> Option<&MailHeader>; /// Look through the list of headers and return the values of all headers /// matching the provided key. Returns an empty vector if no matching headers /// were found. The order of the returned values is the same as the order /// of the matching headers in the message. Header names are matched /// case-insensitively. /// /// # Examples /// ``` /// use mailparse::{parse_mail, MailHeaderMap}; /// let headers = parse_mail(concat!( /// "Key: Value1\n", /// "Key: Value2").as_bytes()) /// .unwrap().headers; /// assert_eq!(headers.get_all_values("Key"), /// vec!["Value1".to_string(), "Value2".to_string()]); /// ``` fn get_all_values(&self, key: &str) -> Vec; /// Similar to `get_all_values`, except it returns references to the /// MailHeader structs instead of just extracting the values. fn get_all_headers(&self, key: &str) -> Vec<&MailHeader>; } impl<'a> MailHeaderMap for [MailHeader<'a>] { fn get_first_value(&self, key: &str) -> Option { for x in self { if x.get_key_ref().eq_ignore_ascii_case(key) { return Some(x.get_value()); } } None } fn get_first_header(&self, key: &str) -> Option<&MailHeader> { for x in self { if x.get_key_ref().eq_ignore_ascii_case(key) { return Some(x); } } None } fn get_all_values(&self, key: &str) -> Vec { let mut values: Vec = Vec::new(); for x in self { if x.get_key_ref().eq_ignore_ascii_case(key) { values.push(x.get_value()); } } values } fn get_all_headers(&self, key: &str) -> Vec<&MailHeader> { let mut headers: Vec<&MailHeader> = Vec::new(); for x in self { if x.get_key_ref().eq_ignore_ascii_case(key) { headers.push(x); } } headers } } /// Parses all the headers from the raw data given. /// This function takes raw byte data, and starts parsing it, expecting there /// to be zero or more MIME header key-value pair right at the beginning, /// followed by two consecutive newlines (i.e. a blank line). It parses those /// headers and returns them in a vector. The normal vector functions can be /// used to access the headers linearly, or the MailHeaderMap trait can be used /// to access them in a map-like fashion. Along with this vector, the function /// returns the index at which the message body is expected to start. If you /// just care about the headers, you can ignore the second component of the /// returned tuple. /// Error values are returned if there was some sort of parsing error. /// /// # Examples /// ``` /// use mailparse::{parse_headers, MailHeaderMap}; /// let (headers, _) = parse_headers(concat!( /// "Subject: Test\n", /// "From: me@myself.com\n", /// "To: you@yourself.com").as_bytes()) /// .unwrap(); /// assert_eq!(headers[1].get_key(), "From"); /// assert_eq!(headers.get_first_value("To"), Some("you@yourself.com".to_string())); /// ``` pub fn parse_headers(raw_data: &[u8]) -> Result<(Vec, usize), MailParseError> { let mut headers: Vec = Vec::new(); let mut ix = 0; loop { if ix >= raw_data.len() { break; } else if raw_data[ix] == b'\n' { ix += 1; break; } else if raw_data[ix] == b'\r' { if ix + 1 < raw_data.len() && raw_data[ix + 1] == b'\n' { ix += 2; break; } else { return Err(MailParseError::Generic( "Headers were followed by an unexpected lone \ CR character!", )); } } let (header, ix_next) = parse_header(&raw_data[ix..])?; headers.push(header); ix += ix_next; } Ok((headers, ix)) } /// A struct to hold a more structured representation of the Content-Type header. /// This is provided mostly as a convenience since this metadata is usually /// needed to interpret the message body properly. #[derive(Debug)] pub struct ParsedContentType { /// The type of the data, for example "text/plain" or "application/pdf". pub mimetype: String, /// The charset used to decode the raw byte data, for example "iso-8859-1" /// or "utf-8". pub charset: String, /// The additional params of Content-Type, e.g. filename and boundary. The /// keys in the map will be lowercased, and the values will have any /// enclosing quotes stripped. pub params: BTreeMap, } impl Default for ParsedContentType { fn default() -> Self { ParsedContentType { mimetype: "text/plain".to_string(), charset: "us-ascii".to_string(), params: BTreeMap::new(), } } } impl ParsedContentType { fn default_conditional(in_multipart_digest: bool) -> Self { let mut default = Self::default(); if in_multipart_digest { default.mimetype = "message/rfc822".to_string(); } default } } /// Helper method to parse a header value as a Content-Type header. Note that /// the returned object's `params` map will contain a charset key if a charset /// was explicitly specified in the header; otherwise the `params` map will not /// contain a charset key. Regardless, the `charset` field will contain a /// charset - either the one explicitly specified or the default of "us-ascii". /// /// # Examples /// ``` /// use mailparse::{parse_header, parse_content_type}; /// let (parsed, _) = parse_header( /// b"Content-Type: text/html; charset=foo; boundary=\"quotes_are_removed\"") /// .unwrap(); /// let ctype = parse_content_type(&parsed.get_value()); /// assert_eq!(ctype.mimetype, "text/html"); /// assert_eq!(ctype.charset, "foo"); /// assert_eq!(ctype.params.get("boundary"), Some(&"quotes_are_removed".to_string())); /// assert_eq!(ctype.params.get("charset"), Some(&"foo".to_string())); /// ``` /// ``` /// use mailparse::{parse_header, parse_content_type}; /// let (parsed, _) = parse_header(b"Content-Type: bogus").unwrap(); /// let ctype = parse_content_type(&parsed.get_value()); /// assert_eq!(ctype.mimetype, "bogus"); /// assert_eq!(ctype.charset, "us-ascii"); /// assert_eq!(ctype.params.get("boundary"), None); /// assert_eq!(ctype.params.get("charset"), None); /// ``` /// ``` /// use mailparse::{parse_header, parse_content_type}; /// let (parsed, _) = parse_header(br#"Content-Type: application/octet-stream;name="=?utf8?B?6L+O5ai255m95a+M576O?=";charset="utf8""#).unwrap(); /// let ctype = parse_content_type(&parsed.get_value()); /// assert_eq!(ctype.mimetype, "application/octet-stream"); /// assert_eq!(ctype.charset, "utf8"); /// assert_eq!(ctype.params.get("boundary"), None); /// assert_eq!(ctype.params.get("name"), Some(&"迎娶白富美".to_string())); /// ``` pub fn parse_content_type(header: &str) -> ParsedContentType { let params = parse_param_content(header); let mimetype = params.value.to_lowercase(); let charset = params .params .get("charset") .cloned() .unwrap_or_else(|| "us-ascii".to_string()); ParsedContentType { mimetype, charset, params: params.params, } } /// The possible disposition types in a Content-Disposition header. A more /// comprehensive list of IANA-recognized types can be found at /// https://www.iana.org/assignments/cont-disp/cont-disp.xhtml. This library /// only enumerates the types most commonly found in email messages, and /// provides the `Extension` value for holding all other types. #[derive(Debug, Clone, PartialEq)] pub enum DispositionType { /// Default value, indicating the content is to be displayed inline as /// part of the enclosing document. Inline, /// A disposition indicating the content is not meant for inline display, /// but whose content can be accessed for use. Attachment, /// A disposition indicating the content contains a form submission. FormData, /// Extension type to hold any disposition not explicitly enumerated. Extension(String), } impl Default for DispositionType { fn default() -> Self { DispositionType::Inline } } /// Convert the string represented disposition type to enum. fn parse_disposition_type(disposition: &str) -> DispositionType { match &disposition.to_lowercase()[..] { "inline" => DispositionType::Inline, "attachment" => DispositionType::Attachment, "form-data" => DispositionType::FormData, extension => DispositionType::Extension(extension.to_string()), } } /// A struct to hold a more structured representation of the Content-Disposition header. /// This is provided mostly as a convenience since this metadata is usually /// needed to interpret the message body properly. #[derive(Debug, Default)] pub struct ParsedContentDisposition { /// The disposition type of the Content-Disposition header. If this /// is an extension type, the string will be lowercased. pub disposition: DispositionType, /// The additional params of Content-Disposition, e.g. filename. The /// keys in the map will be lowercased, and the values will have any /// enclosing quotes stripped. pub params: BTreeMap, } /// Helper method to parse a header value as a Content-Disposition header. The disposition /// defaults to "inline" if no disposition parameter is provided in the header /// value. /// /// # Examples /// ``` /// use mailparse::{parse_header, parse_content_disposition, DispositionType}; /// let (parsed, _) = parse_header( /// b"Content-Disposition: attachment; filename=\"yummy dummy\"") /// .unwrap(); /// let dis = parse_content_disposition(&parsed.get_value()); /// assert_eq!(dis.disposition, DispositionType::Attachment); /// assert_eq!(dis.params.get("name"), None); /// assert_eq!(dis.params.get("filename"), Some(&"yummy dummy".to_string())); /// ``` pub fn parse_content_disposition(header: &str) -> ParsedContentDisposition { let params = parse_param_content(header); let disposition = parse_disposition_type(¶ms.value); ParsedContentDisposition { disposition, params: params.params, } } /// Struct that holds the structured representation of the message. Note that /// since MIME allows for nested multipart messages, a tree-like structure is /// necessary to represent it properly. This struct accomplishes that by holding /// a vector of other ParsedMail structures for the subparts. #[derive(Debug)] pub struct ParsedMail<'a> { /// The raw bytes that make up this message (or subpart). pub raw_bytes: &'a [u8], /// The raw bytes that make up the header block for this message (or subpart). header_bytes: &'a [u8], /// The headers for the message (or message subpart). pub headers: Vec>, /// The Content-Type information for the message (or message subpart). pub ctype: ParsedContentType, /// The raw bytes that make up the body of the message (or message subpart). body_bytes: &'a [u8], /// The subparts of this message or subpart. This vector is only non-empty /// if ctype.mimetype starts with "multipart/". pub subparts: Vec>, } impl<'a> ParsedMail<'a> { /// Get the body of the message as a Rust string. This function tries to /// unapply the Content-Transfer-Encoding if there is one, and then converts /// the result into a Rust UTF-8 string using the charset in the Content-Type /// (or "us-ascii" if the charset was missing or not recognized). Note that /// in some cases the body may be binary data that doesn't make sense as a /// Rust string - it is up to the caller to handle those cases gracefully. /// These cases may occur in particular when the body is of a "binary" /// Content-Transfer-Encoding (i.e. where `get_body_encoded()` returns a /// `Body::Binary` variant) but may also occur in other cases because of the /// messiness of the real world and non-compliant mail implementations. /// /// # Examples /// ``` /// use mailparse::parse_mail; /// let p = parse_mail(concat!( /// "Subject: test\n", /// "\n", /// "This is the body").as_bytes()) /// .unwrap(); /// assert_eq!(p.get_body().unwrap(), "This is the body"); /// ``` pub fn get_body(&self) -> Result { match self.get_body_encoded() { Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded_as_string(), Body::SevenBit(body) | Body::EightBit(body) => body.get_as_string(), Body::Binary(body) => body.get_as_string(), } } /// Get the body of the message as a Rust Vec. This function tries to /// unapply the Content-Transfer-Encoding if there is one, but won't do /// any charset decoding. /// /// # Examples /// ``` /// use mailparse::parse_mail; /// let p = parse_mail(concat!( /// "Subject: test\n", /// "\n", /// "This is the body").as_bytes()) /// .unwrap(); /// assert_eq!(p.get_body_raw().unwrap(), b"This is the body"); /// ``` pub fn get_body_raw(&self) -> Result, MailParseError> { match self.get_body_encoded() { Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded(), Body::SevenBit(body) | Body::EightBit(body) => Ok(Vec::::from(body.get_raw())), Body::Binary(body) => Ok(Vec::::from(body.get_raw())), } } /// Get the body of the message. /// This function returns the original body without attempting to /// unapply the Content-Transfer-Encoding. The returned object /// contains information that allows the caller to control decoding /// as desired. /// /// # Examples /// ``` /// use mailparse::parse_mail; /// use mailparse::body::Body; /// /// let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap(); /// /// match mail.get_body_encoded() { /// Body::Base64(body) => { /// assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ="); /// assert_eq!(body.get_decoded().unwrap(), b"hello world"); /// assert_eq!(body.get_decoded_as_string().unwrap(), "hello world"); /// }, /// _ => assert!(false), /// }; /// /// /// // An email whose body encoding is not known upfront /// let another_mail = parse_mail(b"").unwrap(); /// /// match another_mail.get_body_encoded() { /// Body::Base64(body) | Body::QuotedPrintable(body) => { /// println!("mail body encoded: {:?}", body.get_raw()); /// println!("mail body decoded: {:?}", body.get_decoded().unwrap()); /// println!("mail body decoded as string: {}", body.get_decoded_as_string().unwrap()); /// }, /// Body::SevenBit(body) | Body::EightBit(body) => { /// println!("mail body: {:?}", body.get_raw()); /// println!("mail body as string: {}", body.get_as_string().unwrap()); /// }, /// Body::Binary(body) => { /// println!("mail body binary: {:?}", body.get_raw()); /// } /// } /// ``` pub fn get_body_encoded(&'a self) -> Body<'a> { let transfer_encoding = self .headers .get_first_value("Content-Transfer-Encoding") .map(|s| s.to_lowercase()); Body::new(self.body_bytes, &self.ctype, &transfer_encoding) } /// Returns a struct that wraps the headers for this message. /// The struct provides utility methods to read the individual headers. pub fn get_headers(&'a self) -> Headers<'a> { Headers::new(&self.header_bytes, &self.headers) } /// Returns a struct containing a parsed representation of the /// Content-Disposition header. The first header with this name /// is used, if there are multiple. See the `parse_content_disposition` /// method documentation for more details on the semantics of the /// returned object. pub fn get_content_disposition(&self) -> ParsedContentDisposition { let disposition = self .headers .get_first_value("Content-Disposition") .map(|s| parse_content_disposition(&s)) .unwrap_or_default(); disposition } /// Returns a depth-first pre-order traversal of the subparts of /// this ParsedMail instance. The first item returned will be this /// ParsedMail itself. pub fn parts(&'a self) -> PartsIterator<'a> { PartsIterator { parts: vec![&self], index: 0, } } } pub struct PartsIterator<'a> { parts: Vec<&'a ParsedMail<'a>>, index: usize, } impl<'a> Iterator for PartsIterator<'a> { type Item = &'a ParsedMail<'a>; fn next(&mut self) -> Option { if self.index >= self.parts.len() { return None; } let cur = self.parts[self.index]; self.index += 1; self.parts .splice(self.index..self.index, cur.subparts.iter()); Some(cur) } } /// The main mail-parsing entry point. /// This function takes the raw data making up the message body and returns a /// structured version of it, which allows easily accessing the header and body /// information as needed. /// /// # Examples /// ``` /// use mailparse::*; /// let parsed = parse_mail(concat!( /// "Subject: This is a test email\n", /// "Content-Type: multipart/alternative; boundary=foobar\n", /// "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n", /// "\n", /// "--foobar\n", /// "Content-Type: text/plain; charset=utf-8\n", /// "Content-Transfer-Encoding: quoted-printable\n", /// "\n", /// "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n", /// "--foobar\n", /// "Content-Type: text/html\n", /// "Content-Transfer-Encoding: base64\n", /// "\n", /// "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n", /// "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n", /// "--foobar--\n", /// "After the final boundary stuff gets ignored.\n").as_bytes()) /// .unwrap(); /// assert_eq!(parsed.headers.get_first_value("Subject"), /// Some("This is a test email".to_string())); /// assert_eq!(parsed.subparts.len(), 2); /// assert_eq!(parsed.subparts[0].get_body().unwrap(), /// "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}"); /// assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64"); /// assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html"); /// assert!(parsed.subparts[1].get_body().unwrap().starts_with("")); /// assert_eq!(dateparse(parsed.headers.get_first_value("Date").unwrap().as_str()).unwrap(), 1475417182); /// ``` pub fn parse_mail(raw_data: &[u8]) -> Result { parse_mail_recursive(raw_data, false) } fn parse_mail_recursive( raw_data: &[u8], in_multipart_digest: bool, ) -> Result { let (headers, ix_body) = parse_headers(raw_data)?; let ctype = headers .get_first_value("Content-Type") .map(|s| parse_content_type(&s)) .unwrap_or_else(|| ParsedContentType::default_conditional(in_multipart_digest)); let mut result = ParsedMail { raw_bytes: raw_data, header_bytes: &raw_data[0..ix_body], headers, ctype, body_bytes: &raw_data[ix_body..], subparts: Vec::::new(), }; if result.ctype.mimetype.starts_with("multipart/") && result.ctype.params.get("boundary").is_some() && raw_data.len() > ix_body { let in_multipart_digest = result.ctype.mimetype == "multipart/digest"; let boundary = String::from("--") + &result.ctype.params["boundary"]; if let Some(ix_body_end) = find_from_u8_line_prefix(raw_data, ix_body, boundary.as_bytes()) { result.body_bytes = &raw_data[ix_body..ix_body_end]; let mut ix_boundary_end = ix_body_end + boundary.len(); while let Some(ix_part_start) = find_from_u8(raw_data, ix_boundary_end, b"\n").map(|v| v + 1) { // if there is no terminating boundary, assume the part end is the end of the email let ix_part_end = find_from_u8_line_prefix(raw_data, ix_part_start, boundary.as_bytes()) .unwrap_or_else(|| raw_data.len()); result.subparts.push(parse_mail_recursive( &raw_data[ix_part_start..ix_part_end], in_multipart_digest, )?); ix_boundary_end = ix_part_end + boundary.len(); if ix_boundary_end + 2 > raw_data.len() || (raw_data[ix_boundary_end] == b'-' && raw_data[ix_boundary_end + 1] == b'-') { break; } } } } Ok(result) } /// Used to store params for content-type and content-disposition struct ParamContent { value: String, params: BTreeMap, } /// Parse parameterized header values such as that for Content-Type /// e.g. `multipart/alternative; boundary=foobar` /// Note: this function is not made public as it may require /// significant changes to be fully correct. For instance, /// it does not handle quoted parameter values containing the /// semicolon (';') character. It also produces a BTreeMap, /// which implicitly does not support multiple parameters with /// the same key. Also, the parameter values may contain language /// information in a format specified by RFC 2184 which is thrown /// away. The format for parameterized header values doesn't /// appear to be strongly specified anywhere. fn parse_param_content(content: &str) -> ParamContent { let mut tokens = content.split(';'); // There must be at least one token produced by split, even if it's empty. let value = tokens.next().unwrap().trim(); let mut map: BTreeMap = tokens .filter_map(|kv| { kv.find('=').map(|idx| { let key = kv[0..idx].trim().to_lowercase(); let mut value = kv[idx + 1..].trim(); if value.starts_with('"') && value.ends_with('"') && value.len() > 1 { value = &value[1..value.len() - 1]; } (key, value.to_string()) }) }) .collect(); // Decode charset encoding, as described in RFC 2184, Section 4. let decode_key_list: Vec = map .keys() .filter_map(|k| k.strip_suffix("*")) .map(String::from) // Skip encoded keys where there is already an equivalent decoded key in the map .filter(|k| !map.contains_key(k)) .collect(); let encodings = compute_parameter_encodings(&map, &decode_key_list); // Note that when we get here, we might still have entries in `encodings` for continuation segments // that didn't have a *0 segment at all. These shouldn't exist per spec so we can do whatever we want, // as long as we don't panic. for (k, (e, strip)) in encodings { if let Some(charset) = Charset::for_label_no_replacement(e.as_bytes()) { let key = format!("{}*", k); let percent_encoded_value = map.remove(&key).unwrap(); let encoded_value = if strip { percent_decode(percent_encoded_value.splitn(3, '\'').nth(2).unwrap_or("")) } else { percent_decode(&percent_encoded_value) }; let decoded_value = charset.decode_without_bom_handling(&encoded_value).0; map.insert(k, decoded_value.to_string()); } } // Unwrap parameter value continuations, as described in RFC 2184, Section 3. let unwrap_key_list: Vec = map .keys() .filter_map(|k| k.strip_suffix("*0")) .map(String::from) // Skip wrapped keys where there is already an unwrapped equivalent in the map .filter(|k| !map.contains_key(k)) .collect(); for unwrap_key in unwrap_key_list { let mut unwrapped_value = String::new(); let mut index = 0; while let Some(wrapped_value_part) = map.remove(&format!("{}*{}", &unwrap_key, index)) { index = index + 1; unwrapped_value.push_str(&wrapped_value_part); } let old_value = map.insert(unwrap_key, unwrapped_value); assert!(old_value.is_none()); } ParamContent { value: value.into(), params: map, } } /// In the returned map, the key is one of the entries from the decode_key_list, /// (i.e. the parameter key with the trailing '*' stripped). The value is a tuple /// containing the encoding (or empty string for no encoding found) and a flag /// that indicates if the encoding needs to be stripped from the value. This is /// set to true for non-continuation parameter values. fn compute_parameter_encodings( map: &BTreeMap, decode_key_list: &Vec, ) -> HashMap { // To handle section 4.1 (combining encodings with continuations), we first // compute the encoding for each parameter value or parameter value segment // that is encoded. For continuation segments the encoding from the *0 segment // overwrites the continuation segment's encoding, if there is one. let mut encodings: HashMap = HashMap::new(); for decode_key in decode_key_list { if let Some(unwrap_key) = decode_key.strip_suffix("*0") { // Per spec, there should always be an encoding. If it's missing, handle that case gracefully // by setting it to an empty string that we handle specially later. let encoding = map .get(&format!("{}*", decode_key)) .unwrap() .split('\'') .next() .unwrap_or(""); let continuation_prefix = format!("{}*", unwrap_key); for continuation_key in decode_key_list { if continuation_key.starts_with(&continuation_prefix) { // This may (intentionally) overwite encodings previously found for the // continuation segments (which are bogus). In those cases, the flag // in the tuple should get updated from true to false. encodings.insert( continuation_key.clone(), (encoding.to_string(), continuation_key == decode_key), ); } } } else if !encodings.contains_key(decode_key) { let encoding = map .get(&format!("{}*", decode_key)) .unwrap() .split('\'') .next() .unwrap_or("") .to_string(); let old_value = encodings.insert(decode_key.clone(), (encoding, true)); assert!(old_value.is_none()); } // else this is a continuation segment and the encoding has already been populated // by the initial *0 segment, so we can ignore it. } encodings } fn percent_decode(encoded: &str) -> Vec { let mut decoded = Vec::with_capacity(encoded.len()); let mut bytes = encoded.bytes(); let mut next = bytes.next(); while next.is_some() { let b = next.unwrap(); if b != b'%' { decoded.push(b); next = bytes.next(); continue; } let top = match bytes.next() { Some(n) if n.is_ascii_hexdigit() => n, n @ _ => { decoded.push(b); next = n; continue; } }; let bottom = match bytes.next() { Some(n) if n.is_ascii_hexdigit() => n, n @ _ => { decoded.push(b); decoded.push(top); next = n; continue; } }; let decoded_byte = (hex_to_nybble(top) << 4) | hex_to_nybble(bottom); decoded.push(decoded_byte); next = bytes.next(); } decoded } fn hex_to_nybble(byte: u8) -> u8 { match byte { b'0'..=b'9' => byte - b'0', b'a'..=b'f' => byte - b'a' + 10, b'A'..=b'F' => byte - b'A' + 10, _ => panic!("Not a hex character!"), } } #[cfg(test)] mod tests { use super::*; #[test] fn parse_basic_header() { let (parsed, _) = parse_header(b"Key: Value").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.get_key(), "Key"); assert_eq!(parsed.get_key_ref(), "Key"); assert_eq!(parsed.value, b"Value"); assert_eq!(parsed.get_value(), "Value"); assert_eq!(parsed.get_value_raw(), "Value".as_bytes()); let (parsed, _) = parse_header(b"Key : Value ").unwrap(); assert_eq!(parsed.key, b"Key "); assert_eq!(parsed.value, b"Value "); assert_eq!(parsed.get_value(), "Value "); assert_eq!(parsed.get_value_raw(), "Value ".as_bytes()); let (parsed, _) = parse_header(b"Key:").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b""); let (parsed, _) = parse_header(b":\n").unwrap(); assert_eq!(parsed.key, b""); assert_eq!(parsed.value, b""); let (parsed, _) = parse_header(b"Key:Multi-line\n value").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b"Multi-line\n value"); assert_eq!(parsed.get_value(), "Multi-line value"); assert_eq!(parsed.get_value_raw(), "Multi-line\n value".as_bytes()); let (parsed, _) = parse_header(b"Key: Multi\n line\n value\n").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b"Multi\n line\n value"); assert_eq!(parsed.get_value(), "Multi line value"); assert_eq!(parsed.get_value_raw(), "Multi\n line\n value".as_bytes()); let (parsed, _) = parse_header(b"Key: One\nKey2: Two").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b"One"); let (parsed, _) = parse_header(b"Key: One\n\tOverhang").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b"One\n\tOverhang"); assert_eq!(parsed.get_value(), "One Overhang"); assert_eq!(parsed.get_value_raw(), "One\n\tOverhang".as_bytes()); let (parsed, _) = parse_header(b"SPAM: VIAGRA \xAE").unwrap(); assert_eq!(parsed.key, b"SPAM"); assert_eq!(parsed.value, b"VIAGRA \xAE"); assert_eq!(parsed.get_value(), "VIAGRA \u{ae}"); assert_eq!(parsed.get_value_raw(), b"VIAGRA \xAE"); parse_header(b" Leading: Space").unwrap_err(); let (parsed, _) = parse_header(b"Just a string").unwrap(); assert_eq!(parsed.key, b"Just a string"); assert_eq!(parsed.value, b""); assert_eq!(parsed.get_value(), ""); assert_eq!(parsed.get_value_raw(), b""); let (parsed, _) = parse_header(b"Key\nBroken: Value").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b""); assert_eq!(parsed.get_value(), ""); assert_eq!(parsed.get_value_raw(), b""); let (parsed, _) = parse_header(b"Key: With CRLF\r\n").unwrap(); assert_eq!(parsed.key, b"Key"); assert_eq!(parsed.value, b"With CRLF"); assert_eq!(parsed.get_value(), "With CRLF"); assert_eq!(parsed.get_value_raw(), b"With CRLF"); let (parsed, _) = parse_header(b"Key: With spurious CRs\r\r\r\n").unwrap(); assert_eq!(parsed.value, b"With spurious CRs"); assert_eq!(parsed.get_value(), "With spurious CRs"); assert_eq!(parsed.get_value_raw(), b"With spurious CRs"); let (parsed, _) = parse_header(b"Key: With \r mixed CR\r\n").unwrap(); assert_eq!(parsed.value, b"With \r mixed CR"); assert_eq!(parsed.get_value(), "With \r mixed CR"); assert_eq!(parsed.get_value_raw(), b"With \r mixed CR"); let (parsed, _) = parse_header(b"Key:\r\n Value after linebreak").unwrap(); assert_eq!(parsed.value, b"\r\n Value after linebreak"); assert_eq!(parsed.get_value(), " Value after linebreak"); assert_eq!(parsed.get_value_raw(), b"\r\n Value after linebreak"); } #[test] fn parse_encoded_headers() { let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!"); assert_eq!( parsed.get_value_raw(), "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes() ); let (parsed, _) = parse_header( b"Subject: =?iso-8859-1?Q?=A1Hola,?=\n \ =?iso-8859-1?Q?_se=F1or!?=", ) .unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!"); assert_eq!( parsed.get_value_raw(), "=?iso-8859-1?Q?=A1Hola,?=\n \ =?iso-8859-1?Q?_se=F1or!?=" .as_bytes() ); let (parsed, _) = parse_header(b"Euro: =?utf-8?Q?=E2=82=AC?=").unwrap(); assert_eq!(parsed.get_key(), "Euro"); assert_eq!(parsed.get_key_ref(), "Euro"); assert_eq!(parsed.get_value(), "\u{20ac}"); assert_eq!(parsed.get_value_raw(), "=?utf-8?Q?=E2=82=AC?=".as_bytes()); let (parsed, _) = parse_header(b"HelloWorld: =?utf-8?B?aGVsbG8gd29ybGQ=?=").unwrap(); assert_eq!(parsed.get_value(), "hello world"); assert_eq!( parsed.get_value_raw(), "=?utf-8?B?aGVsbG8gd29ybGQ=?=".as_bytes() ); let (parsed, _) = parse_header(b"Empty: =?utf-8?Q??=").unwrap(); assert_eq!(parsed.get_value(), ""); assert_eq!(parsed.get_value_raw(), "=?utf-8?Q??=".as_bytes()); let (parsed, _) = parse_header(b"Incomplete: =?").unwrap(); assert_eq!(parsed.get_value(), "=?"); assert_eq!(parsed.get_value_raw(), "=?".as_bytes()); let (parsed, _) = parse_header(b"BadEncoding: =?garbage?Q??=").unwrap(); assert_eq!(parsed.get_value(), "=?garbage?Q??="); assert_eq!(parsed.get_value_raw(), "=?garbage?Q??=".as_bytes()); let (parsed, _) = parse_header(b"Invalid: =?utf-8?Q?=E2=AC?=").unwrap(); assert_eq!(parsed.get_value(), "\u{fffd}"); let (parsed, _) = parse_header(b"LineBreak: =?utf-8?Q?=E2=82\n =AC?=").unwrap(); assert_eq!(parsed.get_value(), "=?utf-8?Q?=E2=82 =AC?="); let (parsed, _) = parse_header(b"NotSeparateWord: hello=?utf-8?Q?world?=").unwrap(); assert_eq!(parsed.get_value(), "hello=?utf-8?Q?world?="); let (parsed, _) = parse_header(b"NotSeparateWord2: =?utf-8?Q?hello?=world").unwrap(); assert_eq!(parsed.get_value(), "=?utf-8?Q?hello?=world"); let (parsed, _) = parse_header(b"Key: \"=?utf-8?Q?value?=\"").unwrap(); assert_eq!(parsed.get_value(), "\"value\""); let (parsed, _) = parse_header(b"Subject: =?utf-8?q?=5BOntario_Builder=5D_Understanding_home_shopping_=E2=80=93_a_q?=\n \ =?utf-8?q?uick_survey?=") .unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!( parsed.get_value(), "[Ontario Builder] Understanding home shopping \u{2013} a quick survey" ); let (parsed, _) = parse_header( b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= non-qp words\n \ and the subject continues", ) .unwrap(); assert_eq!( parsed.get_value(), "[Ontario Builder] non-qp words and the subject continues" ); let (parsed, _) = parse_header( b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= \n \ and the subject continues", ) .unwrap(); assert_eq!( parsed.get_value(), "[Ontario Builder] and the subject continues" ); assert_eq!( parsed.get_value_raw(), "=?utf-8?q?=5BOntario_Builder=5D?= \n \ and the subject continues" .as_bytes() ); let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=") .unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!(parsed.get_key_raw(), "Subject".as_bytes()); assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}"); assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=".as_bytes()); let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=") .unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!(parsed.get_key_raw(), "Subject".as_bytes()); assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}"); assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=".as_bytes()); let (parsed, _) = parse_header(b"Subject: =?UTF-7?Q?+JgM-?=").unwrap(); assert_eq!(parsed.get_key(), "Subject"); assert_eq!(parsed.get_key_ref(), "Subject"); assert_eq!(parsed.get_key_raw(), "Subject".as_bytes()); assert_eq!(parsed.get_value(), "\u{2603}"); assert_eq!(parsed.get_value_raw(), b"=?UTF-7?Q?+JgM-?="); let (parsed, _) = parse_header(b"Content-Type: image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"") .unwrap(); assert_eq!(parsed.get_key(), "Content-Type"); assert_eq!(parsed.get_key_ref(), "Content-Type"); assert_eq!(parsed.get_key_raw(), "Content-Type".as_bytes()); assert_eq!(parsed.get_value(), "image/jpeg; name=\"06613981.jpg\""); assert_eq!( parsed.get_value_raw(), "image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"".as_bytes() ); let (parsed, _) = parse_header( b"From: =?UTF-8?Q?\"Motorola_Owners=E2=80=99_Forums\"_?=", ) .unwrap(); assert_eq!(parsed.get_key(), "From"); assert_eq!(parsed.get_key_ref(), "From"); assert_eq!(parsed.get_key_raw(), "From".as_bytes()); assert_eq!( parsed.get_value(), "\"Motorola Owners\u{2019} Forums\" " ); } #[test] fn encoded_words_and_spaces() { let (parsed, _) = parse_header(b"K: an =?utf-8?q?encoded?=\n word").unwrap(); assert_eq!(parsed.get_value(), "an encoded word"); assert_eq!( parsed.get_value_raw(), "an =?utf-8?q?encoded?=\n word".as_bytes() ); let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= =?utf-8?q?these?= \n words").unwrap(); assert_eq!(parsed.get_value(), "gluethese words"); assert_eq!( parsed.get_value_raw(), "=?utf-8?q?glue?= =?utf-8?q?these?= \n words".as_bytes() ); let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= \n =?utf-8?q?again?=").unwrap(); assert_eq!(parsed.get_value(), "glueagain"); assert_eq!( parsed.get_value_raw(), "=?utf-8?q?glue?= \n =?utf-8?q?again?=".as_bytes() ); } #[test] fn parse_multiple_headers() { let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second").unwrap(); assert_eq!(parsed.len(), 2); assert_eq!(parsed[0].key, b"Key"); assert_eq!(parsed[0].value, b"Value"); assert_eq!(parsed[1].key, b"Two"); assert_eq!(parsed[1].value, b"Second"); let (parsed, _) = parse_headers(b"Key: Value\n Overhang\nTwo: Second\nThree: Third").unwrap(); assert_eq!(parsed.len(), 3); assert_eq!(parsed[0].key, b"Key"); assert_eq!(parsed[0].value, b"Value\n Overhang"); assert_eq!(parsed[1].key, b"Two"); assert_eq!(parsed[1].value, b"Second"); assert_eq!(parsed[2].key, b"Three"); assert_eq!(parsed[2].value, b"Third"); let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second\n\nBody").unwrap(); assert_eq!(parsed.len(), 2); assert_eq!(parsed[0].key, b"Key"); assert_eq!(parsed[0].value, b"Value"); assert_eq!(parsed[1].key, b"Two"); assert_eq!(parsed[1].value, b"Second"); let (parsed, _) = parse_headers( concat!( "Return-Path: \n", "X-Original-To: kats@baz.staktrace.com\n", "Delivered-To: kats@baz.staktrace.com\n", "Received: from foobar.staktrace.com (localhost [127.0.0.1])\n", " by foobar.staktrace.com (Postfix) with ESMTP id \ 139F711C1C34\n", " for ; Fri, 27 May 2016 02:34:26 \ -0400 (EDT)\n", "Date: Fri, 27 May 2016 02:34:25 -0400\n", "To: kats@baz.staktrace.com\n", "From: kats@foobar.staktrace.com\n", "Subject: test Fri, 27 May 2016 02:34:25 -0400\n", "X-Mailer: swaks v20130209.0 jetmore.org/john/code/swaks/\n", "Message-Id: \ <20160527063426.139F711C1C34@foobar.staktrace.com>\n", "\n", "This is a test mailing\n" ) .as_bytes(), ) .unwrap(); assert_eq!(parsed.len(), 10); assert_eq!(parsed[0].key, b"Return-Path"); assert_eq!(parsed[9].key, b"Message-Id"); let (parsed, _) = parse_headers(b"Key: Value\nAnotherKey: AnotherValue\nKey: Value2\nKey: Value3\n") .unwrap(); assert_eq!(parsed.len(), 4); assert_eq!(parsed.get_first_value("Key"), Some("Value".to_string())); assert_eq!( parsed.get_all_values("Key"), vec!["Value", "Value2", "Value3"] ); assert_eq!( parsed.get_first_value("AnotherKey"), Some("AnotherValue".to_string()) ); assert_eq!(parsed.get_all_values("AnotherKey"), vec!["AnotherValue"]); assert_eq!(parsed.get_first_value("NoKey"), None); assert_eq!(parsed.get_all_values("NoKey"), Vec::::new()); let (parsed, _) = parse_headers(b"Key: value\r\nWith: CRLF\r\n\r\nBody").unwrap(); assert_eq!(parsed.len(), 2); assert_eq!(parsed.get_first_value("Key"), Some("value".to_string())); assert_eq!(parsed.get_first_value("With"), Some("CRLF".to_string())); let (parsed, _) = parse_headers(b"Bad\nKey\n").unwrap(); assert_eq!(parsed.len(), 2); assert_eq!(parsed.get_first_value("Bad"), Some("".to_string())); assert_eq!(parsed.get_first_value("Key"), Some("".to_string())); let (parsed, _) = parse_headers(b"K:V\nBad\nKey").unwrap(); assert_eq!(parsed.len(), 3); assert_eq!(parsed.get_first_value("K"), Some("V".to_string())); assert_eq!(parsed.get_first_value("Bad"), Some("".to_string())); assert_eq!(parsed.get_first_value("Key"), Some("".to_string())); } #[test] fn test_parse_content_type() { let ctype = parse_content_type("text/html; charset=utf-8"); assert_eq!(ctype.mimetype, "text/html"); assert_eq!(ctype.charset, "utf-8"); assert_eq!(ctype.params.get("boundary"), None); let ctype = parse_content_type(" foo/bar; x=y; charset=\"fake\" ; x2=y2"); assert_eq!(ctype.mimetype, "foo/bar"); assert_eq!(ctype.charset, "fake"); assert_eq!(ctype.params.get("boundary"), None); let ctype = parse_content_type(" multipart/bar; boundary=foo "); assert_eq!(ctype.mimetype, "multipart/bar"); assert_eq!(ctype.charset, "us-ascii"); assert_eq!(ctype.params.get("boundary").unwrap(), "foo"); } #[test] fn test_parse_content_disposition() { let dis = parse_content_disposition("inline"); assert_eq!(dis.disposition, DispositionType::Inline); assert_eq!(dis.params.get("name"), None); assert_eq!(dis.params.get("filename"), None); let dis = parse_content_disposition( " attachment; x=y; charset=\"fake\" ; x2=y2; name=\"King Joffrey.death\"", ); assert_eq!(dis.disposition, DispositionType::Attachment); assert_eq!( dis.params.get("name"), Some(&"King Joffrey.death".to_string()) ); assert_eq!(dis.params.get("filename"), None); let dis = parse_content_disposition(" form-data"); assert_eq!(dis.disposition, DispositionType::FormData); assert_eq!(dis.params.get("name"), None); assert_eq!(dis.params.get("filename"), None); } #[test] fn test_parse_mail() { let mail = parse_mail(b"Key: value\r\n\r\nSome body stuffs").unwrap(); assert_eq!(mail.header_bytes, b"Key: value\r\n\r\n"); assert_eq!(mail.headers.len(), 1); assert_eq!(mail.headers[0].get_key(), "Key"); assert_eq!(mail.headers[0].get_key_ref(), "Key"); assert_eq!(mail.headers[0].get_value(), "value"); assert_eq!(mail.ctype.mimetype, "text/plain"); assert_eq!(mail.ctype.charset, "us-ascii"); assert_eq!(mail.ctype.params.get("boundary"), None); assert_eq!(mail.body_bytes, b"Some body stuffs"); assert_eq!(mail.get_body_raw().unwrap(), b"Some body stuffs"); assert_eq!(mail.get_body().unwrap(), "Some body stuffs"); assert_eq!(mail.subparts.len(), 0); let mail = parse_mail( concat!( "Content-Type: MULTIpart/alternative; bounDAry=myboundary\r\n\r\n", "--myboundary\r\n", "Content-Type: text/plain\r\n\r\n", "This is the plaintext version.\r\n", "--myboundary\r\n", "Content-Type: text/html;chARset=utf-8\r\n\r\n", "This is the HTML version with fake --MYBOUNDARY.\r\n", "--myboundary--" ) .as_bytes(), ) .unwrap(); assert_eq!(mail.headers.len(), 1); assert_eq!(mail.headers[0].get_key(), "Content-Type"); assert_eq!(mail.headers[0].get_key_ref(), "Content-Type"); assert_eq!(mail.ctype.mimetype, "multipart/alternative"); assert_eq!(mail.ctype.charset, "us-ascii"); assert_eq!(mail.ctype.params.get("boundary").unwrap(), "myboundary"); assert_eq!(mail.subparts.len(), 2); assert_eq!(mail.subparts[0].headers.len(), 1); assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain"); assert_eq!(mail.subparts[0].ctype.charset, "us-ascii"); assert_eq!(mail.subparts[0].ctype.params.get("boundary"), None); assert_eq!(mail.subparts[1].ctype.mimetype, "text/html"); assert_eq!(mail.subparts[1].ctype.charset, "utf-8"); assert_eq!(mail.subparts[1].ctype.params.get("boundary"), None); let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap(); assert_eq!(mail.get_body_raw().unwrap(), b"hello world"); assert_eq!(mail.get_body().unwrap(), "hello world"); let mail = parse_mail(b"Content-Type: text/plain; charset=x-unknown\r\n\r\nhello world").unwrap(); assert_eq!(mail.get_body_raw().unwrap(), b"hello world"); assert_eq!(mail.get_body().unwrap(), "hello world"); let mail = parse_mail(b"ConTENT-tyPE: text/html\r\n\r\nhello world").unwrap(); assert_eq!(mail.ctype.mimetype, "text/html"); assert_eq!(mail.get_body_raw().unwrap(), b"hello world"); assert_eq!(mail.get_body().unwrap(), "hello world"); let mail = parse_mail( b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-", ).unwrap(); assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-"); assert_eq!(mail.get_body().unwrap(), "\u{2603}"); let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap(); assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-"); assert_eq!(mail.get_body().unwrap(), "\u{2603}"); } #[test] fn test_missing_terminating_boundary() { let mail = parse_mail( concat!( "Content-Type: multipart/alternative; boundary=myboundary\r\n\r\n", "--myboundary\r\n", "Content-Type: text/plain\r\n\r\n", "part0\r\n", "--myboundary\r\n", "Content-Type: text/html\r\n\r\n", "part1\r\n" ) .as_bytes(), ) .unwrap(); assert_eq!(mail.subparts[0].get_body().unwrap(), "part0\r\n"); assert_eq!(mail.subparts[1].get_body().unwrap(), "part1\r\n"); } #[test] fn test_missing_body() { let parsed = parse_mail("Content-Type: multipart/related; boundary=\"----=_\"\n".as_bytes()) .unwrap(); assert_eq!(parsed.headers[0].get_key(), "Content-Type"); assert_eq!(parsed.get_body_raw().unwrap(), b""); assert_eq!(parsed.get_body().unwrap(), ""); } #[test] fn test_no_headers_in_subpart() { let mail = parse_mail( concat!( "Content-Type: multipart/report; report-type=delivery-status;\n", "\tboundary=\"1404630116.22555.postech.q0.x.x.x\"\n", "\n", "--1404630116.22555.postech.q0.x.x.x\n", "\n", "--1404630116.22555.postech.q0.x.x.x--\n" ) .as_bytes(), ) .unwrap(); assert_eq!(mail.ctype.mimetype, "multipart/report"); assert_eq!(mail.subparts[0].headers.len(), 0); assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain"); assert_eq!(mail.subparts[0].get_body_raw().unwrap(), b""); assert_eq!(mail.subparts[0].get_body().unwrap(), ""); } #[test] fn test_empty() { let mail = parse_mail("".as_bytes()).unwrap(); assert_eq!(mail.get_body_raw().unwrap(), b""); assert_eq!(mail.get_body().unwrap(), ""); } #[test] fn test_dont_panic_for_value_with_new_lines() { let parsed = parse_param_content(r#"application/octet-stream; name=""#); assert_eq!(parsed.params["name"], "\""); } #[test] fn test_parameter_value_continuations() { let parsed = parse_param_content("attachment;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\""); assert_eq!(parsed.value, "attachment"); assert_eq!(parsed.params["filename"], "XY.pdf"); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*1"), false); let parsed = parse_param_content( "attachment;\n\tfilename=XX.pdf;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\"", ); assert_eq!(parsed.value, "attachment"); assert_eq!(parsed.params["filename"], "XX.pdf"); assert_eq!(parsed.params["filename*0"], "X"); assert_eq!(parsed.params["filename*1"], "Y.pdf"); let parsed = parse_param_content("attachment; filename*1=\"Y.pdf\""); assert_eq!(parsed.params["filename*1"], "Y.pdf"); assert_eq!(parsed.params.contains_key("filename"), false); } #[test] fn test_parameter_encodings() { let parsed = parse_param_content("attachment;\n\tfilename*0*=us-ascii''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf"); // Note this is a real-world case from mutt, but it's wrong. The original filename had an en dash \u{2013} but mutt // declared us-ascii as the encoding instead of utf-8 for some reason. assert_eq!( parsed.params["filename"], "(X) 801 - X \u{00E2}\u{20AC}\u{201C} X X X.pdf" ); assert_eq!(parsed.params.contains_key("filename*0*"), false); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*1*"), false); assert_eq!(parsed.params.contains_key("filename*1"), false); assert_eq!(parsed.params.contains_key("filename*2*"), false); assert_eq!(parsed.params.contains_key("filename*2"), false); // Here is the corrected version. let parsed = parse_param_content("attachment;\n\tfilename*0*=utf-8''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf"); assert_eq!(parsed.params["filename"], "(X) 801 - X \u{2013} X X X.pdf"); assert_eq!(parsed.params.contains_key("filename*0*"), false); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*1*"), false); assert_eq!(parsed.params.contains_key("filename*1"), false); assert_eq!(parsed.params.contains_key("filename*2*"), false); assert_eq!(parsed.params.contains_key("filename*2"), false); let parsed = parse_param_content("attachment; filename*=utf-8'en'%e2%80%A1.bin"); assert_eq!(parsed.params["filename"], "\u{2021}.bin"); assert_eq!(parsed.params.contains_key("filename*"), false); let parsed = parse_param_content("attachment; filename*='foo'%e2%80%A1.bin"); assert_eq!(parsed.params["filename*"], "'foo'%e2%80%A1.bin"); assert_eq!(parsed.params.contains_key("filename"), false); let parsed = parse_param_content("attachment; filename*=nonexistent'foo'%e2%80%a1.bin"); assert_eq!(parsed.params["filename*"], "nonexistent'foo'%e2%80%a1.bin"); assert_eq!(parsed.params.contains_key("filename"), false); let parsed = parse_param_content( "attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1*=%e2%80%A1.bin", ); assert_eq!(parsed.params["filename"], "\u{2021}\u{2021}.bin"); assert_eq!(parsed.params.contains_key("filename*0*"), false); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*1*"), false); assert_eq!(parsed.params.contains_key("filename*1"), false); let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1=%20.bin"); assert_eq!(parsed.params["filename"], "\u{2021}%20.bin"); assert_eq!(parsed.params.contains_key("filename*0*"), false); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*1*"), false); assert_eq!(parsed.params.contains_key("filename*1"), false); let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*2*=%20.bin"); assert_eq!(parsed.params["filename"], "\u{2021}"); assert_eq!(parsed.params["filename*2"], " .bin"); assert_eq!(parsed.params.contains_key("filename*0*"), false); assert_eq!(parsed.params.contains_key("filename*0"), false); assert_eq!(parsed.params.contains_key("filename*2*"), false); let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*0=foo.bin"); assert_eq!(parsed.params["filename"], "foo.bin"); assert_eq!(parsed.params["filename*0*"], "utf-8'en'%e2%80%a1"); assert_eq!(parsed.params.contains_key("filename*0"), false); } #[test] fn test_default_content_encoding() { let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap(); let body = mail.get_body_encoded(); match body { Body::SevenBit(body) => { assert_eq!(body.get_raw(), b"+JgM-"); assert_eq!(body.get_as_string().unwrap(), "\u{2603}"); } _ => assert!(false), }; } #[test] fn test_7bit_content_encoding() { let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 7bit\r\n\r\n+JgM-").unwrap(); let body = mail.get_body_encoded(); match body { Body::SevenBit(body) => { assert_eq!(body.get_raw(), b"+JgM-"); assert_eq!(body.get_as_string().unwrap(), "\u{2603}"); } _ => assert!(false), }; } #[test] fn test_8bit_content_encoding() { let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 8bit\r\n\r\n+JgM-").unwrap(); let body = mail.get_body_encoded(); match body { Body::EightBit(body) => { assert_eq!(body.get_raw(), b"+JgM-"); assert_eq!(body.get_as_string().unwrap(), "\u{2603}"); } _ => assert!(false), }; } #[test] fn test_quoted_printable_content_encoding() { let mail = parse_mail( b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-", ).unwrap(); match mail.get_body_encoded() { Body::QuotedPrintable(body) => { assert_eq!(body.get_raw(), b"+JgM-"); assert_eq!(body.get_decoded().unwrap(), b"+JgM-"); assert_eq!(body.get_decoded_as_string().unwrap(), "\u{2603}"); } _ => assert!(false), }; } #[test] fn test_base64_content_encoding() { let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap(); match mail.get_body_encoded() { Body::Base64(body) => { assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ="); assert_eq!(body.get_decoded().unwrap(), b"hello world"); assert_eq!(body.get_decoded_as_string().unwrap(), "hello world"); } _ => assert!(false), }; } #[test] fn test_base64_content_encoding_multiple_strings() { let mail = parse_mail( b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=\r\nZm9vCg==", ) .unwrap(); match mail.get_body_encoded() { Body::Base64(body) => { assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=\r\nZm9vCg=="); assert_eq!(body.get_decoded().unwrap(), b"hello worldfoo\n"); assert_eq!(body.get_decoded_as_string().unwrap(), "hello worldfoo\n"); } _ => assert!(false), }; } #[test] fn test_binary_content_encoding() { let mail = parse_mail(b"Content-Transfer-Encoding: binary\r\n\r\n######").unwrap(); let body = mail.get_body_encoded(); match body { Body::Binary(body) => { assert_eq!(body.get_raw(), b"######"); } _ => assert!(false), }; } #[test] fn test_body_content_encoding_with_multipart() { let mail_filepath = "./tests/files/test_email_01.txt"; let mail = std::fs::read(mail_filepath) .expect(&format!("Unable to open the file [{}]", mail_filepath)); let mail = parse_mail(&mail).unwrap(); let subpart_0 = mail.subparts.get(0).unwrap(); match subpart_0.get_body_encoded() { Body::SevenBit(body) => { assert_eq!( body.get_as_string().unwrap().trim(), "Test with attachments" ); } _ => assert!(false), }; let subpart_1 = mail.subparts.get(1).unwrap(); match subpart_1.get_body_encoded() { Body::Base64(body) => { let pdf_filepath = "./tests/files/test_email_01_sample.pdf"; let original_pdf = std::fs::read(pdf_filepath) .expect(&format!("Unable to open the file [{}]", pdf_filepath)); assert_eq!(body.get_decoded().unwrap(), original_pdf); } _ => assert!(false), }; let subpart_2 = mail.subparts.get(2).unwrap(); match subpart_2.get_body_encoded() { Body::Base64(body) => { assert_eq!( body.get_decoded_as_string().unwrap(), "txt file context for email collector\n1234567890987654321\n" ); } _ => assert!(false), }; } #[test] fn test_fuzzer_testcase() { const INPUT: &'static str = "U3ViamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IW11bAAAAAAAAAAAamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IG11bAAAAAAAAAAAAAAAAABTTUFZdWJqZf86OiP/dCBTdWJqZWN0Ol8KRGF0ZTog/////////////////////wAAAAAAAAAAAHQgYnJmAHQgYnJmZXItRW5jeXBlOnY9NmU3OjA2OgAAAAAAAAAAAAAAADEAAAAAAP/8mAAAAAAAAAAA+f///wAAAAAAAP8AAAAAAAAAAAAAAAAAAAAAAAAAPT0/PzEAAAEAAA=="; if let Ok(parsed) = parse_mail(&data_encoding::BASE64.decode(INPUT.as_bytes()).unwrap()) { if let Some(date) = parsed.headers.get_first_value("Date") { let _ = dateparse(&date); } } } #[test] fn test_fuzzer_testcase_2() { const INPUT: &'static str = "U3ViamVjdDogVGhpcyBpcyBhIHRlc3QgZW1haWwKQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5PczMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMZm9vYmFyCkRhdGU6IFN1biwgMDIgT2MKCi1TdWJqZWMtZm9vYmFydDo="; if let Ok(parsed) = parse_mail(&data_encoding::BASE64.decode(INPUT.as_bytes()).unwrap()) { if let Some(date) = parsed.headers.get_first_value("Date") { let _ = dateparse(&date); } } } #[test] fn test_header_split() { let mail = parse_mail( b"Content-Type: text/plain;\r\ncharset=\"utf-8\"\r\nContent-Transfer-Encoding: 8bit\r\n\r\n", ).unwrap(); assert_eq!(mail.ctype.mimetype, "text/plain"); assert_eq!(mail.ctype.charset, "us-ascii"); } #[test] fn test_percent_decoder() { assert_eq!(percent_decode("hi %0d%0A%%2A%zz%"), b"hi \r\n%*%zz%"); } #[test] fn test_default_content_type_in_multipart_digest() { // Per https://datatracker.ietf.org/doc/html/rfc2046#section-5.1.5 let mail = parse_mail( concat!( "Content-Type: multipart/digest; boundary=myboundary\r\n\r\n", "--myboundary\r\n\r\n", "blah blah blah\r\n\r\n", "--myboundary--\r\n" ) .as_bytes(), ) .unwrap(); assert_eq!(mail.headers.len(), 1); assert_eq!(mail.ctype.mimetype, "multipart/digest"); assert_eq!(mail.subparts[0].headers.len(), 0); assert_eq!(mail.subparts[0].ctype.mimetype, "message/rfc822"); let mail = parse_mail( concat!( "Content-Type: multipart/whatever; boundary=myboundary\n", "\n", "--myboundary\n", "\n", "blah blah blah\n", "--myboundary\n", "Content-Type: multipart/digest; boundary=nestedboundary\n", "\n", "--nestedboundary\n", "\n", "nested default part\n", "--nestedboundary\n", "Content-Type: text/html\n", "\n", "nested html part\n", "--nestedboundary\n", "Content-Type: multipart/insidedigest; boundary=insideboundary\n", "\n", "--insideboundary\n", "\n", "inside part\n", "--insideboundary--\n", "--nestedboundary--\n", "--myboundary--\n" ) .as_bytes(), ) .unwrap(); let mut parts = mail.parts(); let mut part = parts.next().unwrap(); // mail assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "multipart/whatever"); part = parts.next().unwrap(); // mail.subparts[0] assert_eq!(part.headers.len(), 0); assert_eq!(part.ctype.mimetype, "text/plain"); assert_eq!(part.get_body_raw().unwrap(), b"blah blah blah\n"); part = parts.next().unwrap(); // mail.subparts[1] assert_eq!(part.ctype.mimetype, "multipart/digest"); part = parts.next().unwrap(); // mail.subparts[1].subparts[0] assert_eq!(part.headers.len(), 0); assert_eq!(part.ctype.mimetype, "message/rfc822"); assert_eq!(part.get_body_raw().unwrap(), b"nested default part\n"); part = parts.next().unwrap(); // mail.subparts[1].subparts[1] assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "text/html"); assert_eq!(part.get_body_raw().unwrap(), b"nested html part\n"); part = parts.next().unwrap(); // mail.subparts[1].subparts[2] assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "multipart/insidedigest"); part = parts.next().unwrap(); // mail.subparts[1].subparts[2].subparts[0] assert_eq!(part.headers.len(), 0); assert_eq!(part.ctype.mimetype, "text/plain"); assert_eq!(part.get_body_raw().unwrap(), b"inside part\n"); assert!(parts.next().is_none()); } #[test] fn boundary_is_suffix_of_another_boundary() { // From https://github.com/staktrace/mailparse/issues/100 let mail = parse_mail( concat!( "Content-Type: multipart/mixed; boundary=\"section_boundary\"\n", "\n", "--section_boundary\n", "Content-Type: multipart/alternative; boundary=\"--section_boundary\"\n", "\n", "----section_boundary\n", "Content-Type: text/html;\n", "\n", "Good evening!\n", "----section_boundary\n", "Content-Type: text/plain;\n", "\n", "Good evening!\n", "----section_boundary\n", "--section_boundary\n" ) .as_bytes(), ) .unwrap(); let mut parts = mail.parts(); let mut part = parts.next().unwrap(); // mail assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "multipart/mixed"); assert_eq!(part.subparts.len(), 1); part = parts.next().unwrap(); // mail.subparts[0] assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "multipart/alternative"); assert_eq!(part.subparts.len(), 2); part = parts.next().unwrap(); // mail.subparts[0].subparts[0] assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "text/html"); assert_eq!(part.get_body_raw().unwrap(), b"Good evening!\n"); assert_eq!(part.subparts.len(), 0); part = parts.next().unwrap(); // mail.subparts[0].subparts[1] assert_eq!(part.headers.len(), 1); assert_eq!(part.ctype.mimetype, "text/plain"); assert_eq!(part.get_body_raw().unwrap(), b"Good evening!\n"); assert_eq!(part.subparts.len(), 0); assert!(parts.next().is_none()); } #[test] fn test_parts_iterator() { let mail = parse_mail( concat!( "Content-Type: multipart/mixed; boundary=\"top_boundary\"\n", "\n", "--top_boundary\n", "Content-Type: multipart/alternative; boundary=\"internal_boundary\"\n", "\n", "--internal_boundary\n", "Content-Type: text/html;\n", "\n", "Good evening!\n", "--internal_boundary\n", "Content-Type: text/plain;\n", "\n", "Good evening!\n", "--internal_boundary\n", "--top_boundary\n", "Content-Type: text/unknown;\n", "\n", "You read this?\n", "--top_boundary\n" ) .as_bytes(), ) .unwrap(); let mut parts = mail.parts(); assert_eq!(parts.next().unwrap().ctype.mimetype, "multipart/mixed"); assert_eq!( parts.next().unwrap().ctype.mimetype, "multipart/alternative" ); assert_eq!(parts.next().unwrap().ctype.mimetype, "text/html"); assert_eq!(parts.next().unwrap().ctype.mimetype, "text/plain"); assert_eq!(parts.next().unwrap().ctype.mimetype, "text/unknown"); assert!(parts.next().is_none()); let mail = parse_mail(concat!("Content-Type: text/plain\n").as_bytes()).unwrap(); let mut parts = mail.parts(); assert_eq!(parts.next().unwrap().ctype.mimetype, "text/plain"); assert!(parts.next().is_none()); } } mailparse-0.14.0/src/msgidparse.rs000064400000000000000000000113071046102023000151600ustar 00000000000000use std::fmt; use crate::MailParseError; /// A simple wrapper around `Vec`. This is primarily here so we can /// implement the Display trait on it, and allow user code to easily convert /// the return value from `msgidparse` back into a string. This also allows /// to add additional methods on this type in the future. #[derive(Clone, Debug, PartialEq)] pub struct MessageIdList(Vec); impl std::ops::Deref for MessageIdList { type Target = Vec; fn deref(&self) -> &Vec { &self.0 } } impl std::ops::DerefMut for MessageIdList { fn deref_mut(&mut self) -> &mut Vec { &mut self.0 } } impl fmt::Display for MessageIdList { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut first = true; for msgid in self.iter() { if !first { write!(f, " ")?; } write!(f, "<{}>", msgid)?; first = false; } Ok(()) } } /// Parse an email header into a structured type holding a list of message ids. /// This function can be used to parse headers containing message IDs, such as /// `Message-ID`, `In-Reply-To`, and `References`. /// This function is currently mostly trivial (splits on whitespace and strips /// angle-brackets) but may be enhanced in the future to strip comments (which /// are technically allowed by the RFCs but never really used in practice). /// /// # Examples /// ``` /// use mailparse::{msgidparse, MessageIdList}; /// let parsed_ids = msgidparse(" ").unwrap(); /// assert_eq!(parsed_ids[0], "msg_one@foo.com"); /// assert_eq!(parsed_ids[1], "msg_two@bar.com"); /// ``` pub fn msgidparse(ids: &str) -> Result { let mut msgids = Vec::new(); // The remaining section of the header, not yet chomped let mut remaining = ids.trim_start(); // While we have some value of the header remaining while remaining.len() > 0 { // The next character should be the start of a Message ID if !remaining.starts_with('<') { return Err(MailParseError::Generic("Message IDs must start with <")); } // The ID ends at the next '>' let end_index = remaining .find('>') .ok_or_else(|| MailParseError::Generic("Message IDs must end with >"))?; msgids.push(remaining[1..end_index].to_string()); // Chomp the part of the string we just processed, and any trailing whitespace remaining = &remaining[end_index + 1..].trim_start(); } Ok(MessageIdList(msgids)) } #[cfg(test)] mod tests { use super::*; #[test] fn parse_message_ids() { assert_eq!( msgidparse("").expect("Empty string"), MessageIdList(Vec::new()) ); assert_eq!( msgidparse("").expect("Single reference"), MessageIdList(vec!["msg_one@foo.com".to_string()]) ); assert_eq!( msgidparse(" ").expect("Single reference, leading whitespace"), MessageIdList(vec!["msg_one@foo.com".to_string()]) ); assert_eq!( msgidparse(" ").expect("Single reference, trailing whitespace"), MessageIdList(vec!["msg_one@foo.com".to_string()]) ); assert_eq!( msgidparse(" ") .expect("Multiple references separated by space"), MessageIdList(vec![ "msg_one@foo.com".to_string(), "msg_two@bar.com".to_string(), ]) ); assert_eq!( msgidparse("\n \t\r ") .expect("Multiple references separated by various whitespace"), MessageIdList(vec![ "msg_one@foo.com".to_string(), "msg_two@bar.com".to_string(), "msg_three@qux.com".to_string(), ]) ); // Non whitespace separator tests assert_eq!( msgidparse("") .expect("Multiple references, no whitespace"), MessageIdList(vec![ "msg_one@foo.com".to_string(), "msg_two@bar.com".to_string(), ]) ); assert_eq!( msgidparse(" ") .expect("Mixed whitespace/non-whitespace separator"), MessageIdList(vec![ "msg_one@foo.com".to_string(), "msg_two@bar.com".to_string(), "msg_three@spam.com".to_string(), ]) ); } } mailparse-0.14.0/tests/files/test_email_01.txt000064400000000000000000000114621046102023000173220ustar 00000000000000Subject: Test with attachments Content-Type: multipart/mixed; boundary="------------E5401F4DD68F2F7A872C2A83" Content-Language: en-US This is a multi-part message in MIME format. --------------E5401F4DD68F2F7A872C2A83 Content-Type: text/html; charset=utf-8 Content-Transfer-Encoding: 7bit Test with attachments --------------E5401F4DD68F2F7A872C2A83 Content-Type: application/pdf; name="sample.pdf" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="sample.pdf" JVBERi0xLjMNCiXi48/TDQoNCjEgMCBvYmoNCjw8DQovVHlwZSAvQ2F0YWxvZw0KL091dGxp bmVzIDIgMCBSDQovUGFnZXMgMyAwIFINCj4+DQplbmRvYmoNCg0KMiAwIG9iag0KPDwNCi9U eXBlIC9PdXRsaW5lcw0KL0NvdW50IDANCj4+DQplbmRvYmoNCg0KMyAwIG9iag0KPDwNCi9U eXBlIC9QYWdlcw0KL0NvdW50IDINCi9LaWRzIFsgNCAwIFIgNiAwIFIgXSANCj4+DQplbmRv YmoNCg0KNCAwIG9iag0KPDwNCi9UeXBlIC9QYWdlDQovUGFyZW50IDMgMCBSDQovUmVzb3Vy Y2VzIDw8DQovRm9udCA8PA0KL0YxIDkgMCBSIA0KPj4NCi9Qcm9jU2V0IDggMCBSDQo+Pg0K L01lZGlhQm94IFswIDAgNjEyLjAwMDAgNzkyLjAwMDBdDQovQ29udGVudHMgNSAwIFINCj4+ DQplbmRvYmoNCg0KNSAwIG9iag0KPDwgL0xlbmd0aCAxMDc0ID4+DQpzdHJlYW0NCjIgSg0K QlQNCjAgMCAwIHJnDQovRjEgMDAyNyBUZg0KNTcuMzc1MCA3MjIuMjgwMCBUZA0KKCBBIFNp bXBsZSBQREYgRmlsZSApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY4OC42 MDgwIFRkDQooIFRoaXMgaXMgYSBzbWFsbCBkZW1vbnN0cmF0aW9uIC5wZGYgZmlsZSAtICkg VGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNjY0LjcwNDAgVGQNCigganVzdCBm b3IgdXNlIGluIHRoZSBWaXJ0dWFsIE1lY2hhbmljcyB0dXRvcmlhbHMuIE1vcmUgdGV4dC4g QW5kIG1vcmUgKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUwMCA2NTIuNzUyMCBU ZA0KKCB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0 LiApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDYyOC44NDgwIFRkDQooIEFu ZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRleHQuIEFuZCBtb3JlIHRl eHQuIEFuZCBtb3JlICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNjE2Ljg5 NjAgVGQNCiggdGV4dC4gQW5kIG1vcmUgdGV4dC4gQm9yaW5nLCB6enp6ei4gQW5kIG1vcmUg dGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5 LjI1MDAgNjA0Ljk0NDAgVGQNCiggbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9y ZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiApIFRqDQpFVA0KQlQNCi9G MSAwMDEwIFRmDQo2OS4yNTAwIDU5Mi45OTIwIFRkDQooIEFuZCBtb3JlIHRleHQuIEFuZCBt b3JlIHRleHQuICkgVGoNCkVUDQpCVA0KL0YxIDAwMTAgVGYNCjY5LjI1MDAgNTY5LjA4ODAg VGQNCiggQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5k IG1vcmUgdGV4dC4gQW5kIG1vcmUgKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUw MCA1NTcuMTM2MCBUZA0KKCB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBF dmVuIG1vcmUuIENvbnRpbnVlZCBvbiBwYWdlIDIgLi4uKSBUag0KRVQNCmVuZHN0cmVhbQ0K ZW5kb2JqDQoNCjYgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCAzIDAgUg0KL1Jl c291cmNlcyA8PA0KL0ZvbnQgPDwNCi9GMSA5IDAgUiANCj4+DQovUHJvY1NldCA4IDAgUg0K Pj4NCi9NZWRpYUJveCBbMCAwIDYxMi4wMDAwIDc5Mi4wMDAwXQ0KL0NvbnRlbnRzIDcgMCBS DQo+Pg0KZW5kb2JqDQoNCjcgMCBvYmoNCjw8IC9MZW5ndGggNjc2ID4+DQpzdHJlYW0NCjIg Sg0KQlQNCjAgMCAwIHJnDQovRjEgMDAyNyBUZg0KNTcuMzc1MCA3MjIuMjgwMCBUZA0KKCBT aW1wbGUgUERGIEZpbGUgMiApIFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY4 OC42MDgwIFRkDQooIC4uLmNvbnRpbnVlZCBmcm9tIHBhZ2UgMS4gWWV0IG1vcmUgdGV4dC4g QW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gKSBUag0KRVQNCkJUDQovRjEgMDAxMCBU Zg0KNjkuMjUwMCA2NzYuNjU2MCBUZA0KKCBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0 LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSB0ZXh0LiBBbmQgbW9yZSApIFRqDQpFVA0KQlQN Ci9GMSAwMDEwIFRmDQo2OS4yNTAwIDY2NC43MDQwIFRkDQooIHRleHQuIE9oLCBob3cgYm9y aW5nIHR5cGluZyB0aGlzIHN0dWZmLiBCdXQgbm90IGFzIGJvcmluZyBhcyB3YXRjaGluZyAp IFRqDQpFVA0KQlQNCi9GMSAwMDEwIFRmDQo2OS4yNTAwIDY1Mi43NTIwIFRkDQooIHBhaW50 IGRyeS4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5kIG1vcmUgdGV4dC4gQW5k IG1vcmUgdGV4dC4gKSBUag0KRVQNCkJUDQovRjEgMDAxMCBUZg0KNjkuMjUwMCA2NDAuODAw MCBUZA0KKCBCb3JpbmcuICBNb3JlLCBhIGxpdHRsZSBtb3JlIHRleHQuIFRoZSBlbmQsIGFu ZCBqdXN0IGFzIHdlbGwuICkgVGoNCkVUDQplbmRzdHJlYW0NCmVuZG9iag0KDQo4IDAgb2Jq DQpbL1BERiAvVGV4dF0NCmVuZG9iag0KDQo5IDAgb2JqDQo8PA0KL1R5cGUgL0ZvbnQNCi9T dWJ0eXBlIC9UeXBlMQ0KL05hbWUgL0YxDQovQmFzZUZvbnQgL0hlbHZldGljYQ0KL0VuY29k aW5nIC9XaW5BbnNpRW5jb2RpbmcNCj4+DQplbmRvYmoNCg0KMTAgMCBvYmoNCjw8DQovQ3Jl YXRvciAoUmF2ZSBcKGh0dHA6Ly93d3cubmV2cm9uYS5jb20vcmF2ZVwpKQ0KL1Byb2R1Y2Vy IChOZXZyb25hIERlc2lnbnMpDQovQ3JlYXRpb25EYXRlIChEOjIwMDYwMzAxMDcyODI2KQ0K Pj4NCmVuZG9iag0KDQp4cmVmDQowIDExDQowMDAwMDAwMDAwIDY1NTM1IGYNCjAwMDAwMDAw MTkgMDAwMDAgbg0KMDAwMDAwMDA5MyAwMDAwMCBuDQowMDAwMDAwMTQ3IDAwMDAwIG4NCjAw MDAwMDAyMjIgMDAwMDAgbg0KMDAwMDAwMDM5MCAwMDAwMCBuDQowMDAwMDAxNTIyIDAwMDAw IG4NCjAwMDAwMDE2OTAgMDAwMDAgbg0KMDAwMDAwMjQyMyAwMDAwMCBuDQowMDAwMDAyNDU2 IDAwMDAwIG4NCjAwMDAwMDI1NzQgMDAwMDAgbg0KDQp0cmFpbGVyDQo8PA0KL1NpemUgMTEN Ci9Sb290IDEgMCBSDQovSW5mbyAxMCAwIFINCj4+DQoNCnN0YXJ0eHJlZg0KMjcxNA0KJSVF T0YNCg== --------------E5401F4DD68F2F7A872C2A83 Content-Type: text/plain; charset=UTF-8; name="sample.txt" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="sample.txt" dHh0IGZpbGUgY29udGV4dCBmb3IgZW1haWwgY29sbGVjdG9yCjEyMzQ1Njc4OTA5ODc2NTQz MjEK --------------E5401F4DD68F2F7A872C2A83-- mailparse-0.14.0/tests/files/test_email_01_sample.pdf000064400000000000000000000057241046102023000206210ustar 00000000000000%PDF-1.3 % 1 0 obj << /Type /Catalog /Outlines 2 0 R /Pages 3 0 R >> endobj 2 0 obj << /Type /Outlines /Count 0 >> endobj 3 0 obj << /Type /Pages /Count 2 /Kids [ 4 0 R 6 0 R ] >> endobj 4 0 obj << /Type /Page /Parent 3 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 8 0 R >> /MediaBox [0 0 612.0000 792.0000] /Contents 5 0 R >> endobj 5 0 obj << /Length 1074 >> stream 2 J BT 0 0 0 rg /F1 0027 Tf 57.3750 722.2800 Td ( A Simple PDF File ) Tj ET BT /F1 0010 Tf 69.2500 688.6080 Td ( This is a small demonstration .pdf file - ) Tj ET BT /F1 0010 Tf 69.2500 664.7040 Td ( just for use in the Virtual Mechanics tutorials. More text. And more ) Tj ET BT /F1 0010 Tf 69.2500 652.7520 Td ( text. And more text. And more text. And more text. ) Tj ET BT /F1 0010 Tf 69.2500 628.8480 Td ( And more text. And more text. And more text. And more text. And more ) Tj ET BT /F1 0010 Tf 69.2500 616.8960 Td ( text. And more text. Boring, zzzzz. And more text. And more text. And ) Tj ET BT /F1 0010 Tf 69.2500 604.9440 Td ( more text. And more text. And more text. And more text. And more text. ) Tj ET BT /F1 0010 Tf 69.2500 592.9920 Td ( And more text. And more text. ) Tj ET BT /F1 0010 Tf 69.2500 569.0880 Td ( And more text. And more text. And more text. And more text. And more ) Tj ET BT /F1 0010 Tf 69.2500 557.1360 Td ( text. And more text. And more text. Even more. Continued on page 2 ...) Tj ET endstream endobj 6 0 obj << /Type /Page /Parent 3 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 8 0 R >> /MediaBox [0 0 612.0000 792.0000] /Contents 7 0 R >> endobj 7 0 obj << /Length 676 >> stream 2 J BT 0 0 0 rg /F1 0027 Tf 57.3750 722.2800 Td ( Simple PDF File 2 ) Tj ET BT /F1 0010 Tf 69.2500 688.6080 Td ( ...continued from page 1. Yet more text. And more text. And more text. ) Tj ET BT /F1 0010 Tf 69.2500 676.6560 Td ( And more text. And more text. And more text. And more text. And more ) Tj ET BT /F1 0010 Tf 69.2500 664.7040 Td ( text. Oh, how boring typing this stuff. But not as boring as watching ) Tj ET BT /F1 0010 Tf 69.2500 652.7520 Td ( paint dry. And more text. And more text. And more text. And more text. ) Tj ET BT /F1 0010 Tf 69.2500 640.8000 Td ( Boring. More, a little more text. The end, and just as well. ) Tj ET endstream endobj 8 0 obj [/PDF /Text] endobj 9 0 obj << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 10 0 obj << /Creator (Rave \(http://www.nevrona.com/rave\)) /Producer (Nevrona Designs) /CreationDate (D:20060301072826) >> endobj xref 0 11 0000000000 65535 f 0000000019 00000 n 0000000093 00000 n 0000000147 00000 n 0000000222 00000 n 0000000390 00000 n 0000001522 00000 n 0000001690 00000 n 0000002423 00000 n 0000002456 00000 n 0000002574 00000 n trailer << /Size 11 /Root 1 0 R /Info 10 0 R >> startxref 2714 %%EOF