rfc2047-decoder-0.2.2/.cargo_vcs_info.json0000644000000001360000000000100135270ustar { "git": { "sha1": "a2054255712c28192abef234bdd35eb62969de3a" }, "path_in_vcs": "" }rfc2047-decoder-0.2.2/.github/FUNDING.yml000064400000000000000000000000171046102023000154720ustar 00000000000000github: soywod rfc2047-decoder-0.2.2/.github/workflows/release.yaml000064400000000000000000000017111046102023000202200ustar 00000000000000name: release on: push: tags: - v* jobs: create_release: runs-on: ubuntu-latest steps: - name: Create release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref }} release_name: ${{ github.ref }} draft: false prerelease: false publish: runs-on: ubuntu-latest needs: create_release steps: - name: Checkout code uses: actions/checkout@v2 - name: Install Nix uses: cachix/install-nix-action@v15 with: nix_path: nixpkgs=channel:nixos-21.11 extra_nix_config: | experimental-features = nix-command flakes - name: Publish library to crates.io env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} run: nix develop -c cargo publish --token ${CARGO_REGISTRY_TOKEN} rfc2047-decoder-0.2.2/.github/workflows/tests.yaml000064400000000000000000000007271046102023000177500ustar 00000000000000name: tests on: pull_request: push: branches: - master jobs: tests: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v2 - name: Install Nix uses: cachix/install-nix-action@v15 with: nix_path: nixpkgs=channel:nixos-21.11 extra_nix_config: | experimental-features = nix-command flakes - name: Run test suite run: nix develop -c cargo test rfc2047-decoder-0.2.2/.gitignore000064400000000000000000000000521046102023000143040ustar 00000000000000/target Cargo.lock .idea/ .envrc .direnv/ rfc2047-decoder-0.2.2/CHANGELOG.md000064400000000000000000000054301046102023000141320ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [0.2.2] - 2023-03-29 ### Changed - Bumped `base64` to `v0.21.0`. - Bumped `chumsky` to `v0.9.2`. ## [0.2.1] - 2023-01-08 ### Fixed - Fixed discarded errors [#20]. ## [0.2.0] - 2022-10-11 ### Added - Added Nix support - Allowed decoding strings bigger than 76 chars [#15] ### Changed - Renamed error variants to match the [Rust API guidelines](https://rust-lang.github.io/api-guidelines/naming.html#names-use-a-consistent-word-order-c-word-order): - `lexer::Error::EncodingIssue` becomes `ParseBytesError` - `lexer::Error::EncodedWordTooLong` becomes `ParseEncodedWordTooLongError` - `parser::Error::UnknownCharset` becomes `ParseEncodingError` - `parser::Error::UnknownCharset` has been removed (unused) - `parser::Error::UnknownEncoding` becomes `ParseEncodingError` - `parser::Error::EncodedWordTooBig` becomes `ParseEncodingTooBigError` - `parser::Error::EmptyEncoding` becomes `ParseEncodingEmptyError` - `evaluator::Error::DecodeUtf8` becomes `DecodeUtf8Error` - `evaluator::Error::DecodeBase64` becomes `DecodeBase64Error` - `evaluator::Error::DecodeQuotedPrintable` becomes `DecodeQuotedPrintableError` ## [0.1.3] - 2022-10-10 ### Fixed - Max length of encoded words [#1] - Manage tokens special chars [#3] ### Changed - Refactored parser using chumsky [#7] ## [0.1.2] - 2020-12-30 ### Fixed - Multiple encoded words separator ## [0.1.1] - 2020-12-30 ### Added - Added evaluator with AST ### Changed - Decoded fn accepts now `&[u8]` instead of `&str` ### Fixed - Removed space between encoded words [#2] ## [0.1.0] - 2020-12-28 First official release. [unreleased]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.2...HEAD [0.2.2]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.3...v0.2.0 [0.1.3]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.2...v0.1.3 [0.1.2]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.0...v0.1.1 [0.1.0]: https://github.com/soywod/rfc2047-decoder/releases/tag/v0.1.0 [#1]: https://github.com/soywod/rfc2047-decoder/issues/1 [#2]: https://github.com/soywod/rfc2047-decoder/issues/2 [#3]: https://github.com/soywod/rfc2047-decoder/issues/3 [#7]: https://github.com/soywod/rfc2047-decoder/issues/7 [#15]: https://github.com/soywod/rfc2047-decoder/issues/15 [#20]: https://github.com/soywod/rfc2047-decoder/issues/20 rfc2047-decoder-0.2.2/Cargo.lock0000644000000140540000000000100115060ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "ahash" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ "getrandom", "once_cell", "version_check", ] [[package]] name = "base64" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" [[package]] name = "cc" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "charset" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18e9079d1a12a2cc2bffb5db039c43661836ead4082120d5844f02555aca2d46" dependencies = [ "base64 0.13.1", "encoding_rs", ] [[package]] name = "chumsky" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" dependencies = [ "hashbrown", "stacker", ] [[package]] name = "encoding_rs" version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" dependencies = [ "cfg-if", ] [[package]] name = "getrandom" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash", ] [[package]] name = "libc" version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "proc-macro2" version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" dependencies = [ "unicode-ident", ] [[package]] name = "psm" version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" dependencies = [ "cc", ] [[package]] name = "quote" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] [[package]] name = "quoted_printable" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a24039f627d8285853cc90dcddf8c1ebfaa91f834566948872b225b9a28ed1b6" [[package]] name = "rfc2047-decoder" version = "0.2.2" dependencies = [ "base64 0.21.0", "charset", "chumsky", "memchr", "quoted_printable", "thiserror", ] [[package]] name = "stacker" version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" dependencies = [ "cc", "cfg-if", "libc", "psm", "winapi", ] [[package]] name = "syn" version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21e3787bb71465627110e7d87ed4faaa36c1f61042ee67badb9e2ef173accc40" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thiserror" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "unicode-ident" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" rfc2047-decoder-0.2.2/Cargo.toml0000644000000025360000000000100115330ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "rfc2047-decoder" version = "0.2.2" authors = [ "soywod ", "TornaxO7 ", ] description = "Rust library for decoding RFC 2047 MIME Message Headers." homepage = "https://github.com/soywod/rfc2047-decoder" documentation = "https://docs.rs/rfc2047-decoder" readme = "README.md" keywords = [ "rfc2047", "mime", "header", "decoder", "decoding", ] categories = [ "email", "encoding", "parsing", "parser-implementations", ] license = "MIT" repository = "https://github.com/soywod/rfc2047-decoder" [lib] name = "rfc2047_decoder" [dependencies.base64] version = "0.21.0" [dependencies.charset] version = "0.1.2" [dependencies.chumsky] version = "0.9.2" [dependencies.memchr] version = "2.5.0" [dependencies.quoted_printable] version = "0.4.2" [dependencies.thiserror] version = "1.0.31" rfc2047-decoder-0.2.2/Cargo.toml.orig000064400000000000000000000013641046102023000152120ustar 00000000000000[package] name = "rfc2047-decoder" description = "Rust library for decoding RFC 2047 MIME Message Headers." version = "0.2.2" # do not forget html_root_url authors = ["soywod ", "TornaxO7 "] edition = "2018" repository = "https://github.com/soywod/rfc2047-decoder" documentation = "https://docs.rs/rfc2047-decoder" homepage = "https://github.com/soywod/rfc2047-decoder" categories = ["email", "encoding", "parsing", "parser-implementations"] keywords = ["rfc2047", "mime", "header", "decoder", "decoding"] license = "MIT" readme = "README.md" [lib] name = "rfc2047_decoder" [dependencies] base64 = "0.21.0" charset = "0.1.2" chumsky = "0.9.2" memchr = "2.5.0" quoted_printable = "0.4.2" thiserror = "1.0.31" rfc2047-decoder-0.2.2/LICENSE000064400000000000000000000021071046102023000133240ustar 00000000000000MIT License Copyright (c) 2020-2022 soywod Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rfc2047-decoder-0.2.2/README.md000064400000000000000000000031521046102023000135770ustar 00000000000000# rfc2047-decoder [![Crates.io](https://img.shields.io/crates/v/rfc2047-decoder?style=flat-square)](https://crates.io/crates/rfc2047-decoder) [![Crates.io](https://img.shields.io/crates/d/rfc2047-decoder?style=flat-square)](https://crates.io/crates/rfc2047-decoder) Rust library for decoding [RFC 2047 MIME Message Headers](https://tools.ietf.org/html/rfc2047). ```rust use rfc2047_decoder; fn main() { let encoded_str = "=?UTF-8?Q?str?="; let decoded_str = "str"; // using the decode helper (default options) assert_eq!( rfc2047_decoder::decode(encoded_str.as_bytes()).unwrap(), decoded_str ); // using the decoder builder (custom options) assert_eq!( rfc2047_decoder::Decoder::new() .skip_encoded_word_length(true) .decode(encoded_str.as_bytes()) .unwrap(), decoded_str ); } ``` ## Sponsoring [![github](https://img.shields.io/badge/-GitHub%20Sponsors-fafbfc?logo=GitHub%20Sponsors&style=flat-square)](https://github.com/sponsors/soywod) [![paypal](https://img.shields.io/badge/-PayPal-0079c1?logo=PayPal&logoColor=ffffff&style=flat-square)](https://www.paypal.com/paypalme/soywod) [![ko-fi](https://img.shields.io/badge/-Ko--fi-ff5e5a?logo=Ko-fi&logoColor=ffffff&style=flat-square)](https://ko-fi.com/soywod) [![buy-me-a-coffee](https://img.shields.io/badge/-Buy%20Me%20a%20Coffee-ffdd00?logo=Buy%20Me%20A%20Coffee&logoColor=000000&style=flat-square)](https://www.buymeacoffee.com/soywod) [![liberapay](https://img.shields.io/badge/-Liberapay-f6c915?logo=Liberapay&logoColor=222222&style=flat-square)](https://liberapay.com/soywod) rfc2047-decoder-0.2.2/examples/decode.rs000064400000000000000000000007761046102023000157400ustar 00000000000000use rfc2047_decoder; fn main() { let encoded_str = "=?UTF-8?Q?str?="; let decoded_str = "str"; // using the decode helper (default options) assert_eq!( rfc2047_decoder::decode(encoded_str.as_bytes()).unwrap(), decoded_str ); // using the decoder builder (custom options) assert_eq!( rfc2047_decoder::Decoder::new() .skip_encoded_word_length(true) .decode(encoded_str.as_bytes()) .unwrap(), decoded_str ); } rfc2047-decoder-0.2.2/flake.lock000064400000000000000000000054471046102023000142650ustar 00000000000000{ "nodes": { "flake-compat": { "flake": false, "locked": { "lastModified": 1650374568, "narHash": "sha256-Z+s0J8/r907g149rllvwhb4pKi8Wam5ij0st8PwAh+E=", "owner": "edolstra", "repo": "flake-compat", "rev": "b4a34015c698c7793d592d66adbab377907a2be8", "type": "github" }, "original": { "owner": "edolstra", "repo": "flake-compat", "type": "github" } }, "flake-utils": { "locked": { "lastModified": 1656928814, "narHash": "sha256-RIFfgBuKz6Hp89yRr7+NR5tzIAbn52h8vT6vXkYjZoM=", "owner": "numtide", "repo": "flake-utils", "rev": "7e2a3b3dfd9af950a856d66b0a7d01e3c18aa249", "type": "github" }, "original": { "owner": "numtide", "repo": "flake-utils", "type": "github" } }, "nixpkgs": { "locked": { "lastModified": 1664356419, "narHash": "sha256-PD0hM9YWp2lepAJk7edh8g1VtzJip5rals1fpoQUlY0=", "owner": "NixOS", "repo": "nixpkgs", "rev": "46e8398474ac3b1b7bb198bf9097fc213bbf59b1", "type": "github" }, "original": { "id": "nixpkgs", "type": "indirect" } }, "nixpkgs_2": { "locked": { "lastModified": 1659102345, "narHash": "sha256-Vbzlz254EMZvn28BhpN8JOi5EuKqnHZ3ujFYgFcSGvk=", "owner": "NixOS", "repo": "nixpkgs", "rev": "11b60e4f80d87794a2a4a8a256391b37c59a1ea7", "type": "github" }, "original": { "owner": "NixOS", "ref": "nixpkgs-unstable", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { "flake-compat": "flake-compat", "nixpkgs": "nixpkgs", "rust-overlay": "rust-overlay", "utils": "utils" } }, "rust-overlay": { "inputs": { "flake-utils": "flake-utils", "nixpkgs": "nixpkgs_2" }, "locked": { "lastModified": 1664334084, "narHash": "sha256-cqP0TzDs3GDRprS6IgVQcWjQ0ynmjQFjYWvp+LE/s6I=", "owner": "oxalica", "repo": "rust-overlay", "rev": "70eab96a255ae9b4b82b38ea5ac5c8e5b57e0abd", "type": "github" }, "original": { "owner": "oxalica", "repo": "rust-overlay", "type": "github" } }, "utils": { "locked": { "lastModified": 1659877975, "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", "owner": "numtide", "repo": "flake-utils", "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", "type": "github" }, "original": { "owner": "numtide", "repo": "flake-utils", "type": "github" } } }, "root": "root", "version": 7 } rfc2047-decoder-0.2.2/flake.nix000064400000000000000000000017651046102023000141320ustar 00000000000000{ description = "Rust library for decoding RFC 2047 MIME Message Headers."; inputs = { utils.url = "github:numtide/flake-utils"; rust-overlay.url = "github:oxalica/rust-overlay"; flake-compat = { url = "github:edolstra/flake-compat"; flake = false; }; }; outputs = { self, nixpkgs, utils, rust-overlay, ... }: utils.lib.eachDefaultSystem (system: let overlays = [ (import rust-overlay) ]; pkgs = import nixpkgs { inherit system overlays; }; rust-bin = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; in { # nix develop devShell = pkgs.mkShell { nativeBuildInputs = with pkgs; [ # Nix LSP + formatter rnix-lsp nixpkgs-fmt # Rust env openssl.dev pkg-config rust-bin rust-analyzer cargo-watch ]; }; } ); } rfc2047-decoder-0.2.2/rust-toolchain.toml000064400000000000000000000000371046102023000161670ustar 00000000000000[toolchain] channel = "stable" rfc2047-decoder-0.2.2/shell.nix000064400000000000000000000007061046102023000141510ustar 00000000000000# This file exists for legacy nix-shell # https://nixos.wiki/wiki/Flakes#Using_flakes_project_from_a_legacy_Nix # You generally do *not* have to modify this ever. (import ( let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in fetchTarball { url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz"; sha256 = lock.nodes.flake-compat.locked.narHash; } ) { src = ./.; }).shellNix rfc2047-decoder-0.2.2/src/decoder.rs000064400000000000000000000130331046102023000150610ustar 00000000000000use std::result; use thiserror::Error; use crate::{evaluator, lexer, parser}; #[derive(Error, Debug)] pub enum Error { #[error(transparent)] Lexer(#[from] lexer::Error), #[error(transparent)] Parser(#[from] parser::Error), #[error(transparent)] Evaluator(#[from] evaluator::Error), } pub type Result = result::Result; /// Represents the decoder builder. /// /// ``` /// let decoder = rfc2047_decoder::Decoder::new().skip_encoded_word_length(true); /// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?="); /// ``` #[derive(Debug, Default, Clone, Eq, PartialEq)] pub struct Decoder { pub skip_encoded_word_length: bool, } impl Decoder { /// Creates a new decoder builder using default values. pub fn new() -> Self { Self::default() } /// Sets option to skip encoded word length verification. pub fn skip_encoded_word_length(mut self, b: bool) -> Self { self.skip_encoded_word_length = b; self } /// Decodes the given RFC 2047 MIME Message Header encoded string. pub fn decode>(self, encoded_str: T) -> Result { let text_tokens = lexer::run(encoded_str.as_ref(), self)?; let parsed_text = parser::run(text_tokens)?; let evaluated_string = evaluator::run(parsed_text)?; Ok(evaluated_string) } } #[cfg(test)] mod tests { /// Here are the main-tests which are listed here: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 mod rfc_tests { use crate::decode; #[test] fn test_example_1() { assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a"); } #[test] fn test_example_2() { assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b"); } #[test] fn test_example_3() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(), "ab" ); } #[test] fn test_example_4() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(), "ab" ); } #[test] fn test_example_5() { assert_eq!( decode( "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=" ) .unwrap(), "ab" ); } #[test] fn test_example_6() { assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b"); } #[test] fn test_example_7() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(), "a b" ); } } /// Those are some custom tests mod custom_tests { use crate::{decode, Decoder}; #[test] fn clear_empty() { assert_eq!(decode("").unwrap(), ""); } #[test] fn clear_with_spaces() { assert_eq!(decode("str with spaces").unwrap(), "str with spaces"); } #[test] fn utf8_qs_empty() { assert_eq!(decode("").unwrap(), ""); } #[test] fn utf8_qs_with_str() { assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str"); } #[test] fn utf8_qs_with_spaces() { assert_eq!( decode("=?utf8?q?str_with_spaces?=").unwrap(), "str with spaces" ); } #[test] fn utf8_qs_with_spec_chars() { assert_eq!( decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(), "str with special çhàrß" ); } #[test] fn utf8_qs_double() { assert_eq!( decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(), "strstr" ); assert_eq!( decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(), "strstr" ); assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr"); assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr"); } #[test] fn utf8_b64_empty() { assert_eq!(decode("=?UTF-8?B??=").unwrap(), ""); } #[test] fn utf8_b64_with_str() { assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str"); } #[test] fn utf8_b64_with_spaces() { assert_eq!( decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(), "str with spaces" ); } #[test] fn utf8_b64_with_spec_chars() { assert_eq!( decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(), "str with special çhàrß" ); } #[test] fn utf8_b64_trailing_bit() { assert_eq!( decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(), "Portale HackingTeam", ); } #[test] fn utf8_b64_skip_encoded_word_length() { assert_eq!( Decoder::new().skip_encoded_word_length(true).decode("=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=").unwrap(), "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare.", ); } } } rfc2047-decoder-0.2.2/src/evaluator.rs000064400000000000000000000054301046102023000154600ustar 00000000000000use base64::{ alphabet, engine::{GeneralPurpose, GeneralPurposeConfig}, Engine, }; use charset::Charset; use std::{result, string}; use thiserror::Error; use crate::parser::{ClearText, Encoding, ParsedEncodedWord, ParsedEncodedWords}; #[derive(Error, Debug)] pub enum Error { #[error(transparent)] DecodeUtf8Error(#[from] string::FromUtf8Error), #[error(transparent)] DecodeBase64Error(#[from] base64::DecodeError), #[error(transparent)] DecodeQuotedPrintableError(#[from] quoted_printable::QuotedPrintableError), } type Result = result::Result; fn decode_base64(encoded_bytes: Vec) -> Result> { let base64_decoder = { let config = GeneralPurposeConfig::new().with_decode_allow_trailing_bits(true); GeneralPurpose::new(&alphabet::STANDARD, config) }; let decoded_bytes = base64_decoder.decode(encoded_bytes)?; Ok(decoded_bytes) } fn decode_quoted_printable(encoded_bytes: Vec) -> Result> { let parse_mode = quoted_printable::ParseMode::Robust; const SPACE: u8 = b' '; const UNDERSCORE: u8 = b'_'; let encoded_bytes = encoded_bytes .iter() .map(|b| if *b == UNDERSCORE { SPACE } else { *b }) .collect::>(); let decoded_bytes = quoted_printable::decode(encoded_bytes, parse_mode)?; Ok(decoded_bytes) } fn decode_with_encoding(encoding: Encoding, encoded_bytes: Vec) -> Result> { match encoding { Encoding::B => decode_base64(encoded_bytes), Encoding::Q => decode_quoted_printable(encoded_bytes), } } fn decode_with_charset(charset: Option, decoded_bytes: Vec) -> Result { let decoded_str = match charset { Some(charset) => charset.decode(&decoded_bytes).0, None => charset::decode_ascii(&decoded_bytes), }; Ok(decoded_str.into_owned()) } fn decode_utf8_string(clear_text: ClearText) -> Result { let decoded_bytes = String::from_utf8(clear_text)?; Ok(decoded_bytes) } fn decode_parsed_encoded_word( charset: Option, encoding: Encoding, encoded_text: Vec, ) -> Result { let decoded_bytes = decode_with_encoding(encoding, encoded_text)?; let decoded_str = decode_with_charset(charset, decoded_bytes)?; Ok(decoded_str) } pub fn run(parsed_encoded_words: ParsedEncodedWords) -> Result { parsed_encoded_words .into_iter() .map(|parsed_encoded_word| match parsed_encoded_word { ParsedEncodedWord::ClearText(clear_text) => decode_utf8_string(clear_text), ParsedEncodedWord::EncodedWord { charset, encoding, encoded_text, } => decode_parsed_encoded_word(charset, encoding, encoded_text), }) .collect() } rfc2047-decoder-0.2.2/src/lexer.rs000064400000000000000000000246311046102023000146010ustar 00000000000000use chumsky::{prelude::Simple, text::whitespace, Parser}; use std::{collections::HashSet, result}; use thiserror::Error; use crate::Decoder; #[derive(Error, Debug, Clone, PartialEq)] pub enum Error { #[error("cannot parse bytes into tokens")] ParseBytesError(Vec>), #[error("cannot parse encoded word: encoded word too long")] ParseEncodedWordTooLongError(Vec), } type Result = result::Result; const QUESTION_MARK: u8 = b'?'; const SPACE: u8 = b' '; const AMOUNT_DELIMITERS: usize = "=????=".len(); pub type Tokens = Vec; #[derive(Debug, Clone, PartialEq, Hash, Eq)] pub enum Token { ClearText(Vec), EncodedWord { charset: Vec, encoding: Vec, encoded_text: Vec, }, } impl Token { pub const ENCODED_WORD_PREFIX: &'static [u8] = "=?".as_bytes(); pub const ENCODED_WORD_SUFFIX: &'static [u8] = "?=".as_bytes(); pub const MAX_ENCODED_WORD_LENGTH: usize = 75; /// Returns the length of the encoded word including the delimiters pub fn len(&self) -> usize { match self { Token::ClearText(_) => 1, Token::EncodedWord { charset, encoding, encoded_text, } => charset.len() + encoding.len() + encoded_text.len() + AMOUNT_DELIMITERS, } } pub fn get_bytes(&self) -> Vec { match self { Token::ClearText(token) => (*token).clone(), Token::EncodedWord { charset, encoding, encoded_text, } => { let mut bytes = Vec::new(); bytes.extend(charset); bytes.extend(encoding); bytes.extend(encoded_text); bytes } } } pub fn get_encoded_word( ((charset, encoding), encoded_text): ((Vec, Vec), Vec), ) -> Self { Self::EncodedWord { charset, encoding, encoded_text, } } } pub fn run(encoded_bytes: &[u8], decoder: Decoder) -> Result { get_parser(decoder) .parse(encoded_bytes) .map_err(Error::ParseBytesError) } fn get_parser(decoder: Decoder) -> impl Parser> { use chumsky::prelude::*; let encoded_words_in_a_row = { let following_encoded_word = whitespace().ignore_then(encoded_word_parser(&decoder).rewind()); encoded_word_parser(&decoder).then_ignore(following_encoded_word) }; let single_encoded_word = encoded_word_parser(&decoder); let single_clear_text = clear_text_parser(&decoder); encoded_words_in_a_row .or(single_encoded_word) .or(single_clear_text) .repeated() } fn clear_text_parser(decoder: &Decoder) -> impl Parser> { use chumsky::prelude::*; const DEFAULT_EMPTY_INPUT_ERROR_MESSAGE: &str = "got empty input"; take_until(encoded_word_parser(&decoder).rewind().ignored().or(end())).try_map( |(chars, ()), span| { if chars.is_empty() { Err(Simple::custom(span, DEFAULT_EMPTY_INPUT_ERROR_MESSAGE)) } else { Ok(Token::ClearText(chars)) } }, ) } fn encoded_word_parser(decoder: &Decoder) -> impl Parser> { use chumsky::prelude::*; let skip_encoded_word_length = decoder.skip_encoded_word_length; let check_encoded_word_length = move |token: Token, span| { if !skip_encoded_word_length && token.len() > Token::MAX_ENCODED_WORD_LENGTH { Err(Simple::custom( span, Error::ParseEncodedWordTooLongError(token.get_bytes()), )) } else { Ok(token) } }; let is_especial = |c: u8| get_especials().contains(&c); let token = filter(move |&c: &u8| c != SPACE && !c.is_ascii_control() && !is_especial(c)); let charset = token.repeated().at_least(1).collect::>(); let encoding = token.repeated().at_least(1).collect::>(); let encoded_text = filter(|&c: &u8| c != QUESTION_MARK && c != SPACE) .repeated() .collect::>(); just(Token::ENCODED_WORD_PREFIX) .ignore_then(charset) .then_ignore(just(QUESTION_MARK)) .then(encoding) .then_ignore(just(QUESTION_MARK)) .then(encoded_text) .then_ignore(just(Token::ENCODED_WORD_SUFFIX)) .map(Token::get_encoded_word) .try_map(check_encoded_word_length) } fn get_especials() -> HashSet { "()<>@,;:/[]?.=".bytes().collect() } #[cfg(test)] mod tests { use crate::{lexer::Token, Decoder}; use super::get_parser; use chumsky::Parser; #[test] fn test_encoded_word() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?Yeet?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "Yeet".as_bytes().to_vec(), }] ); } #[test] fn test_clear_text() { let parser = get_parser(Decoder::new()); let message = "I use Arch by the way".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::ClearText( "I use Arch by the way".as_bytes().to_vec() )] ); } // The following examples are from the encoded-form table in section 8: // https://datatracker.ietf.org/doc/html/rfc2047#section-8 #[test] fn test_encoded_from_1() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?a?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec() }] ); } // see test_encoded_from_1 #[test] fn test_encoded_from_2() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?a?= b".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }, Token::ClearText(" b".as_bytes().to_vec()), ] ); } // see test_encoded_from_1 #[test] fn test_encoded_from_3() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }, Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() } ] ); } /// Test if parser can parse multiple encoded words in a row /// See: https://datatracker.ietf.org/doc/html/rfc2047#section-8 #[test] fn test_multiple_encoded_words() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?= =?ISO-8859-1?Q?c?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }, Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() }, Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "c".as_bytes().to_vec() } ] ); } #[test] fn test_ignore_mutiple_spaces_between_encoded_words() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }, Token::EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() } ] ); } /// An encoded word with more then 75 chars should be parsed as a normal cleartext #[test] fn test_too_long_encoded_word() { let parser = get_parser(Decoder::new()); // "=?" (2) + "ISO-8859-1" (10) + "?" (1) + "Q" (1) + "?" (1) + 'a' (60) + "?=" (2) // = 2 + 10 + 1 + 1 + 1 + 60 + 2 // = 77 => too long let message = "=?ISO-8859-1?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=" .as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!(parsed, vec![Token::ClearText(message.to_vec())]); } #[test] fn test_encoded_word_has_especials() { let parser = get_parser(Decoder::new()); let message = "=?ISO-8859-1(?Q?a?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!(parsed, vec![Token::ClearText(message.to_vec())]); } } rfc2047-decoder-0.2.2/src/lib.rs000064400000000000000000000005541046102023000142260ustar 00000000000000#![doc(html_root_url = "https://docs.rs/rfc2047-decoder/0.2.2")] mod decoder; pub use decoder::{Decoder, Error, Result}; mod evaluator; mod lexer; mod parser; /// Decodes the given RFC 2047 MIME Message Header encoded string /// using a default decoder. pub fn decode>(encoded_str: T) -> Result { Decoder::new().decode(encoded_str) } rfc2047-decoder-0.2.2/src/parser.rs000064400000000000000000000122461046102023000147550ustar 00000000000000use charset::Charset; use std::{convert::TryFrom, result}; use crate::lexer::{Token, Tokens}; #[derive(thiserror::Error, Debug, Clone)] pub enum Error { #[error("cannot parse encoding: encoding is bigger than a char")] ParseEncodingTooBigError, #[error("cannot parse encoding: encoding is empty")] ParseEncodingEmptyError, #[error("cannot parse encoding {0}: B or Q is expected")] ParseEncodingError(char), } type Result = result::Result; pub type ClearText = Vec; pub type ParsedEncodedWords = Vec; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Encoding { B, Q, } impl Encoding { pub const B_CHAR: char = 'b'; pub const Q_CHAR: char = 'q'; pub const MAX_LENGTH: usize = 1; } impl TryFrom> for Encoding { type Error = Error; fn try_from(token: Vec) -> Result { if token.len() > Self::MAX_LENGTH { return Err(Error::ParseEncodingTooBigError); } let encoding = token.first().ok_or(Error::ParseEncodingEmptyError)?; let encoding = *encoding as char; match encoding.to_ascii_lowercase() { Encoding::Q_CHAR => Ok(Self::Q), Encoding::B_CHAR => Ok(Self::B), _ => Err(Error::ParseEncodingError(encoding)), } } } #[derive(Debug, Clone, PartialEq, Hash)] pub enum ParsedEncodedWord { ClearText(ClearText), EncodedWord { charset: Option, encoding: Encoding, encoded_text: Vec, }, } impl ParsedEncodedWord { pub fn convert_encoded_word( charset: Vec, encoding: Vec, encoded_text: Vec, ) -> Result { let encoding = Encoding::try_from(encoding)?; let charset = Charset::for_label(&charset); Ok(Self::EncodedWord { charset, encoding, encoded_text, }) } } pub fn run(tokens: Tokens) -> Result { let parsed_encoded_words = convert_tokens_to_encoded_words(tokens)?; Ok(parsed_encoded_words) } fn convert_tokens_to_encoded_words(tokens: Tokens) -> Result { tokens .into_iter() .map(|token: Token| match token { Token::ClearText(clear_text) => Ok(ParsedEncodedWord::ClearText(clear_text)), Token::EncodedWord { charset, encoding, encoded_text, } => ParsedEncodedWord::convert_encoded_word(charset, encoding, encoded_text), }) .collect() } #[cfg(test)] mod tests { use charset::Charset; use crate::{ lexer, parser::{self, Encoding, ParsedEncodedWord}, Decoder, }; /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `From` field #[test] fn test_parse1() { let message = "=?US-ASCII?Q?Keith_Moore?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("US-ASCII".as_bytes()), encoding: Encoding::Q, encoded_text: "Keith_Moore".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `To` field #[test] fn test_parse2() { let message = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::Q, encoded_text: "Keld_J=F8rn_Simonsen".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `CC` field #[test] fn test_parse3() { let message = "=?ISO-8859-1?Q?Andr=E9?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::Q, encoded_text: "Andr=E9".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `Subject` field #[test] fn test_parse4() { let message = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::B, encoded_text: "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } }