rfc2047-decoder-1.0.6/.cargo_vcs_info.json0000644000000001360000000000100135320ustar { "git": { "sha1": "b97e11d610b96eca8eb7a6a3f0c4f0261d0f2db9" }, "path_in_vcs": "" }rfc2047-decoder-1.0.6/.envrc000064400000000000000000000000121046102023000134310ustar 00000000000000use flake rfc2047-decoder-1.0.6/.github/FUNDING.yml000064400000000000000000000000331046102023000154730ustar 00000000000000github: [soywod, TornaxO7] rfc2047-decoder-1.0.6/.github/dependabot.yml000064400000000000000000000010171046102023000165110ustar 00000000000000# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file version: 2 updates: - package-ecosystem: "cargo" # See documentation for possible values directory: "/" # Location of package manifests schedule: interval: "weekly" rfc2047-decoder-1.0.6/.github/workflows/ci.yml000064400000000000000000000013141046102023000170340ustar 00000000000000name: ci on: pull_request: push: branches: - main jobs: tests: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: github_access_token: ${{ secrets.GITHUB_TOKEN }} - name: Run test suite run: nix develop -c cargo test clippy: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Install Nix uses: cachix/install-nix-action@v30 with: github_access_token: ${{ secrets.GITHUB_TOKEN }} - name: Run test suite run: nix develop -c cargo clippy rfc2047-decoder-1.0.6/.gitignore000064400000000000000000000000431046102023000143070ustar 00000000000000/target Cargo.lock .idea/ .direnv/ rfc2047-decoder-1.0.6/CHANGELOG.md000064400000000000000000000063501046102023000141370ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [1.0.3] - 2023-12-23 - Fix versioning ## [1.0.2] - 2023-12-23 - Bumping dependencies ## [1.0.1] - 2023-09-17 - extending docs about some structs - fixing `Display` implementation of `TooLongEncodedWord` ## [1.0.0] - 2023-09-16 ### Changed - `lexer::Error` gets the alias `LexerError` - `parser::Error` gets the alias `ParserError` - `evaluator::Error` gets the alias `EvaluatorError` - `Decoder.too_long_encoded_word` has a strategy instead of a `bool` ## [0.2.2] - 2023-03-29 ### Changed - Bumped `base64` to `v0.21.0`. - Bumped `chumsky` to `v0.9.2`. ## [0.2.1] - 2023-01-08 ### Fixed - Fixed discarded errors [#20]. ## [0.2.0] - 2022-10-11 ### Added - Added Nix support - Allowed decoding strings bigger than 76 chars [#15] ### Changed - Renamed error variants to match the [Rust API guidelines](https://rust-lang.github.io/api-guidelines/naming.html#names-use-a-consistent-word-order-c-word-order): - `lexer::Error::EncodingIssue` becomes `ParseBytesError` - `lexer::Error::EncodedWordTooLong` becomes `ParseEncodedWordTooLongError` - `parser::Error::UnknownCharset` becomes `ParseEncodingError` - `parser::Error::UnknownCharset` has been removed (unused) - `parser::Error::UnknownEncoding` becomes `ParseEncodingError` - `parser::Error::EncodedWordTooBig` becomes `ParseEncodingTooBigError` - `parser::Error::EmptyEncoding` becomes `ParseEncodingEmptyError` - `evaluator::Error::DecodeUtf8` becomes `DecodeUtf8Error` - `evaluator::Error::DecodeBase64` becomes `DecodeBase64Error` - `evaluator::Error::DecodeQuotedPrintable` becomes `DecodeQuotedPrintableError` ## [0.1.3] - 2022-10-10 ### Fixed - Max length of encoded words [#1] - Manage tokens special chars [#3] ### Changed - Refactored parser using chumsky [#7] ## [0.1.2] - 2020-12-30 ### Fixed - Multiple encoded words separator ## [0.1.1] - 2020-12-30 ### Added - Added evaluator with AST ### Changed - Decoded fn accepts now `&[u8]` instead of `&str` ### Fixed - Removed space between encoded words [#2] ## [0.1.0] - 2020-12-28 First official release. [unreleased]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.2...HEAD [0.2.2]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/soywod/rfc2047-decoder/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.3...v0.2.0 [0.1.3]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.2...v0.1.3 [0.1.2]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/soywod/rfc2047-decoder/compare/v0.1.0...v0.1.1 [0.1.0]: https://github.com/soywod/rfc2047-decoder/releases/tag/v0.1.0 [#1]: https://github.com/soywod/rfc2047-decoder/issues/1 [#2]: https://github.com/soywod/rfc2047-decoder/issues/2 [#3]: https://github.com/soywod/rfc2047-decoder/issues/3 [#7]: https://github.com/soywod/rfc2047-decoder/issues/7 [#15]: https://github.com/soywod/rfc2047-decoder/issues/15 [#20]: https://github.com/soywod/rfc2047-decoder/issues/20 rfc2047-decoder-1.0.6/Cargo.lock0000644000000175670000000000100115250ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "ahash" version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "once_cell", "version_check", "zerocopy", ] [[package]] name = "allocator-api2" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "cc" version = "1.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40545c26d092346d8a8dab71ee48e7685a7a9cba76e634790c215b41a4a7b4cf" dependencies = [ "shlex", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "charset" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1f927b07c74ba84c7e5fe4db2baeb3e996ab2688992e39ac68ce3220a677c7e" dependencies = [ "base64", "encoding_rs", ] [[package]] name = "chumsky" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" dependencies = [ "hashbrown", "stacker", ] [[package]] name = "encoding_rs" version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", "allocator-api2", ] [[package]] name = "libc" version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "proc-macro2" version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] [[package]] name = "psm" version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" dependencies = [ "cc", ] [[package]] name = "quote" version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] [[package]] name = "quoted_printable" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "640c9bd8497b02465aeef5375144c26062e0dcd5939dfcbb0f5db76cb8c17c73" [[package]] name = "rfc2047-decoder" version = "1.0.6" dependencies = [ "base64", "charset", "chumsky", "memchr", "quoted_printable", "thiserror", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "stacker" version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" dependencies = [ "cc", "cfg-if", "libc", "psm", "windows-sys", ] [[package]] name = "syn" version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thiserror" version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "zerocopy" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", "syn", ] rfc2047-decoder-1.0.6/Cargo.toml0000644000000027740000000000100115420ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "rfc2047-decoder" version = "1.0.6" authors = [ "soywod ", "TornaxO7 ", ] build = false autobins = false autoexamples = false autotests = false autobenches = false description = "Rust library for decoding RFC 2047 MIME Message Headers." homepage = "https://github.com/TornaxO7/rfc2047-decoder" documentation = "https://docs.rs/rfc2047-decoder" readme = "README.md" keywords = [ "rfc2047", "mime", "header", "decoder", "decoding", ] categories = [ "email", "encoding", "parsing", "parser-implementations", ] license = "MIT" repository = "https://github.com/TornaxO7/rfc2047-decoder" [lib] name = "rfc2047_decoder" path = "src/lib.rs" [[example]] name = "decode" path = "examples/decode.rs" [dependencies.base64] version = "0.22" [dependencies.charset] version = "0.1" [dependencies.chumsky] version = "0.9" [dependencies.memchr] version = "2.5" [dependencies.quoted_printable] version = "0.5" [dependencies.thiserror] version = "2.0" rfc2047-decoder-1.0.6/Cargo.toml.orig000064400000000000000000000013151046102023000152110ustar 00000000000000[package] name = "rfc2047-decoder" description = "Rust library for decoding RFC 2047 MIME Message Headers." version = "1.0.6" authors = ["soywod ", "TornaxO7 "] edition = "2018" repository = "https://github.com/TornaxO7/rfc2047-decoder" documentation = "https://docs.rs/rfc2047-decoder" homepage = "https://github.com/TornaxO7/rfc2047-decoder" categories = ["email", "encoding", "parsing", "parser-implementations"] keywords = ["rfc2047", "mime", "header", "decoder", "decoding"] license = "MIT" readme = "README.md" [lib] name = "rfc2047_decoder" [dependencies] base64 = "0.22" charset = "0.1" chumsky = "0.9" memchr = "2.5" quoted_printable = "0.5" thiserror = "2.0" rfc2047-decoder-1.0.6/LICENSE000064400000000000000000000021071046102023000133270ustar 00000000000000MIT License Copyright (c) 2020-2022 soywod Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rfc2047-decoder-1.0.6/README.md000064400000000000000000000034431046102023000136050ustar 00000000000000# rfc2047-decoder [![Crates.io](https://img.shields.io/crates/v/rfc2047-decoder?style=flat-square)](https://crates.io/crates/rfc2047-decoder) [![Crates.io](https://img.shields.io/crates/d/rfc2047-decoder?style=flat-square)](https://crates.io/crates/rfc2047-decoder) # State This project is considered as finished, only bugs will be fixed so don't wonder, if the last commit is a long time ago. # Introduction Rust library for decoding [RFC 2047 MIME Message Headers](https://tools.ietf.org/html/rfc2047). ```rust use rfc2047_decoder; fn main() { let encoded_str = "=?UTF-8?Q?str?="; let decoded_str = "str"; // using the decode helper (default options) assert_eq!( rfc2047_decoder::decode(encoded_str.as_bytes()).unwrap(), decoded_str ); // using the decoder builder (custom options) assert_eq!( rfc2047_decoder::Decoder::new() .too_long_encoded_word_strategy(rfc2047_decoder::RecoverStrategy::Skip) .decode(encoded_str.as_bytes()) .unwrap(), decoded_str ); } ``` ## Sponsoring [![github](https://img.shields.io/badge/-GitHub%20Sponsors-fafbfc?logo=GitHub%20Sponsors&style=flat-square)](https://github.com/sponsors/soywod) [![paypal](https://img.shields.io/badge/-PayPal-0079c1?logo=PayPal&logoColor=ffffff&style=flat-square)](https://www.paypal.com/paypalme/soywod) [![ko-fi](https://img.shields.io/badge/-Ko--fi-ff5e5a?logo=Ko-fi&logoColor=ffffff&style=flat-square)](https://ko-fi.com/soywod) [![buy-me-a-coffee](https://img.shields.io/badge/-Buy%20Me%20a%20Coffee-ffdd00?logo=Buy%20Me%20A%20Coffee&logoColor=000000&style=flat-square)](https://www.buymeacoffee.com/soywod) [![liberapay](https://img.shields.io/badge/-Liberapay-f6c915?logo=Liberapay&logoColor=222222&style=flat-square)](https://liberapay.com/soywod) rfc2047-decoder-1.0.6/examples/decode.rs000064400000000000000000000010461046102023000157320ustar 00000000000000use rfc2047_decoder; fn main() { let encoded_str = "=?UTF-8?Q?str?="; let decoded_str = "str"; // using the decode helper (default options) assert_eq!( rfc2047_decoder::decode(encoded_str.as_bytes()).unwrap(), decoded_str ); // using the decoder builder (custom options) assert_eq!( rfc2047_decoder::Decoder::new() .too_long_encoded_word_strategy(rfc2047_decoder::RecoverStrategy::Skip) .decode(encoded_str.as_bytes()) .unwrap(), decoded_str ); } rfc2047-decoder-1.0.6/flake.lock000064400000000000000000000036671046102023000142720ustar 00000000000000{ "nodes": { "nixpkgs": { "locked": { "lastModified": 1731139594, "narHash": "sha256-IigrKK3vYRpUu+HEjPL/phrfh7Ox881er1UEsZvw9Q4=", "owner": "nixos", "repo": "nixpkgs", "rev": "76612b17c0ce71689921ca12d9ffdc9c23ce40b2", "type": "github" }, "original": { "owner": "nixos", "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } }, "nixpkgs_2": { "locked": { "lastModified": 1728538411, "narHash": "sha256-f0SBJz1eZ2yOuKUr5CA9BHULGXVSn6miBuUWdTyhUhU=", "owner": "NixOS", "repo": "nixpkgs", "rev": "b69de56fac8c2b6f8fd27f2eca01dcda8e0a4221", "type": "github" }, "original": { "owner": "NixOS", "ref": "nixpkgs-unstable", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { "nixpkgs": "nixpkgs", "rust-overlay": "rust-overlay", "systems": "systems" } }, "rust-overlay": { "inputs": { "nixpkgs": "nixpkgs_2" }, "locked": { "lastModified": 1731292155, "narHash": "sha256-fYVoUUtSadbOrH0z0epVQDsStBDS/S/fAK//0ECQAAI=", "owner": "oxalica", "repo": "rust-overlay", "rev": "7c4cd99ed7604b79e8cb721099ac99c66f656b3a", "type": "github" }, "original": { "owner": "oxalica", "repo": "rust-overlay", "type": "github" } }, "systems": { "locked": { "lastModified": 1681028828, "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", "owner": "nix-systems", "repo": "default", "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", "type": "github" }, "original": { "owner": "nix-systems", "repo": "default", "type": "github" } } }, "root": "root", "version": 7 } rfc2047-decoder-1.0.6/flake.nix000064400000000000000000000016151046102023000141270ustar 00000000000000{ description = "Rust library for decoding RFC 2047 MIME Message Headers."; inputs = { nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; rust-overlay.url = "github:oxalica/rust-overlay"; systems.url = "github:nix-systems/default"; }; outputs = { nixpkgs, rust-overlay, systems, ... }: let eachSystem = nixpkgs.lib.genAttrs (import systems); in { devShells = eachSystem (system: let pkgs = import nixpkgs { inherit system; overlays = [ rust-overlay.overlays.default ]; }; rust-toolchain = (pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml).override { extensions = [ "rust-src" "rust-analyzer" ]; }; in { default = pkgs.mkShell { packages = with pkgs; [ cargo-release ] ++ [ rust-toolchain ]; }; }); }; } rfc2047-decoder-1.0.6/rust-toolchain.toml000064400000000000000000000000371046102023000161720ustar 00000000000000[toolchain] channel = "stable" rfc2047-decoder-1.0.6/rustfmt.toml000064400000000000000000000000011046102023000147120ustar 00000000000000 rfc2047-decoder-1.0.6/shell.nix000064400000000000000000000007061046102023000141540ustar 00000000000000# This file exists for legacy nix-shell # https://nixos.wiki/wiki/Flakes#Using_flakes_project_from_a_legacy_Nix # You generally do *not* have to modify this ever. (import ( let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in fetchTarball { url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz"; sha256 = lock.nodes.flake-compat.locked.narHash; } ) { src = ./.; }).shellNix rfc2047-decoder-1.0.6/src/decoder.rs000064400000000000000000000231771046102023000150760ustar 00000000000000use std::result; use thiserror::Error; use crate::{evaluator, lexer, parser}; /// The possible errors which can occur while parsing the string. #[derive(Error, Debug, PartialEq)] pub enum Error { /// Symbolises that an error occured in the lexer. #[error(transparent)] Lexer(#[from] lexer::Error), /// Symbolises that an error occured in the parser. #[error(transparent)] Parser(#[from] parser::Error), /// Symbolises that an error occured in the evaluator. #[error(transparent)] Evaluator(#[from] evaluator::Error), } /// Determines which strategy should be used if an encoded word isn't encoded as /// described in the RFC. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RecoverStrategy { /// Decode the encoded word although it's incorrectly encoded. /// /// # Example /// Take a look to [Decoder#RecoveryStrategy::Decode](Decoder#recoverstrategydecode). Decode, /// Skip the incorrectly encoded encoded word. /// /// # Example /// Take a look to [Decoder#RecoveryStrategy::Skip](Decoder#recoverstrategyskip). Skip, /// Abort the string-parsing and return an error. /// /// # Example /// Take a look to [Decoder#RecoveryStrategy::Abort](Decoder#recoverstrategyabort-default). Abort, } type Result = result::Result; /// Represents the decoder builder. /// /// # Example /// ``` /// use rfc2047_decoder::{Decoder, RecoverStrategy}; /// /// let decoder = Decoder::new() /// .too_long_encoded_word_strategy(RecoverStrategy::Skip); /// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap(); /// /// assert_eq!(decoded_str, "str"); /// ``` #[derive(Debug, Clone, Eq, PartialEq)] pub struct Decoder { /// Determines which strategy should be used, if the parser encounters /// encoded words which are longer than allowed in the RFC (it's longer than 75 chars). pub too_long_encoded_word: RecoverStrategy, } impl Decoder { /// Equals [Decoder::default]. pub fn new() -> Self { Self::default() } /// Set the strategy if the decoder finds an encoded word which is too long. /// /// # Examples /// /// Each example uses the same encoded message: /// ```txt /// =?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?= /// ``` /// which exceeds the maximum length of 75 chars so it's actually invalid. /// /// ## RecoverStrategy::Skip /// Skips the invalid encoded word and parses it as clear text. /// /// ```rust /// use rfc2047_decoder::{Decoder, RecoverStrategy}; /// /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?="; /// let decoder = Decoder::new() /// .too_long_encoded_word_strategy(RecoverStrategy::Skip); /// /// let parsed = decoder.decode(message).unwrap(); /// /// // nothing changed! /// assert_eq!(parsed, message); /// ``` /// /// ## RecoverStrategy::Decode /// Although the encoded word is invalid, keep decoding it. /// /// ```rust /// use rfc2047_decoder::{Decoder, RecoverStrategy}; /// /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?="; /// let decoder = Decoder::new() /// .too_long_encoded_word_strategy(RecoverStrategy::Decode); /// /// let parsed = decoder.decode(message).unwrap(); /// /// // could you decode it? ;) /// let expected_result = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare."; /// /// assert_eq!(parsed, expected_result); /// ``` /// /// ## RecoverStrategy::Abort (default) /// The parser will return an `Err` and collects all encoded words which are /// too long. You can use them afterwards for error messages for example. /// /// ```rust /// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}}; /// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError; /// use rfc2047_decoder::TooLongEncodedWords; /// /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?="; /// // `RecoverStrategy::Abort` is the default strategy /// let decoder = Decoder::new(); /// /// let parsed = decoder.decode(message); /// /// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=".to_string()]))))); /// ``` pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self { self.too_long_encoded_word = strategy; self } /// Decodes the given RFC 2047 MIME Message Header encoded string. pub fn decode>(self, encoded_str: T) -> Result { let text_tokens = lexer::run(encoded_str.as_ref(), self)?; let parsed_text = parser::run(text_tokens)?; let evaluated_string = evaluator::run(parsed_text)?; Ok(evaluated_string) } } impl Default for Decoder { /// Returns the decoder with the following default "settings": /// /// - `too_long_encoded_word`: [RecoverStrategy::Abort] fn default() -> Self { Self { too_long_encoded_word: RecoverStrategy::Abort, } } } #[cfg(test)] mod tests { /// Here are the main-tests which are listed here: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// Scroll down until you see the table. mod rfc_tests { use crate::decode; #[test] fn decode_encoded_word_single_char() { assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a"); } #[test] fn decode_encoded_word_separated_by_whitespace() { assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b"); } #[test] fn decode_two_encoded_chars() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(), "ab" ); } #[test] fn whitespace_between_two_encoded_words_should_be_ignored() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(), "ab" ); } #[test] fn whitespace_chars_between_two_encoded_words_should_be_ignored() { assert_eq!( decode( "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=" ) .unwrap(), "ab" ); } #[test] fn whitespace_encoded_in_encoded_word() { assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b"); } #[test] fn ignore_whitespace_between_two_encoded_words_but_not_the_encoded_whitespace() { assert_eq!( decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(), "a b" ); } } /// Those are some custom tests mod custom_tests { use crate::decode; #[test] fn clear_empty() { assert_eq!(decode("").unwrap(), ""); } #[test] fn clear_with_spaces() { assert_eq!(decode("str with spaces").unwrap(), "str with spaces"); } #[test] fn utf8_qs_empty() { assert_eq!(decode("").unwrap(), ""); } #[test] fn utf8_qs_with_str() { assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str"); } #[test] fn utf8_qs_with_spaces() { assert_eq!( decode("=?utf8?q?str_with_spaces?=").unwrap(), "str with spaces" ); } #[test] fn utf8_qs_with_spec_chars() { assert_eq!( decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(), "str with special çhàrß" ); } #[test] fn utf8_qs_double() { assert_eq!( decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(), "strstr" ); assert_eq!( decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(), "strstr" ); assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr"); assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr"); } #[test] fn utf8_b64_empty() { assert_eq!(decode("=?UTF-8?B??=").unwrap(), ""); } #[test] fn utf8_b64_with_str() { assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str"); } #[test] fn utf8_b64_with_spaces() { assert_eq!( decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(), "str with spaces" ); } #[test] fn utf8_b64_with_spec_chars() { assert_eq!( decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(), "str with special çhàrß" ); } #[test] fn utf8_b64_trailing_bit() { assert_eq!( decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(), "Portale HackingTeam", ); } } } rfc2047-decoder-1.0.6/src/evaluator.rs000064400000000000000000000055251046102023000154700ustar 00000000000000use base64::{ alphabet, engine::{GeneralPurpose, GeneralPurposeConfig}, Engine, }; use charset::Charset; use std::{result, string}; use thiserror::Error; use crate::parser::{ClearText, Encoding, ParsedEncodedWord, ParsedEncodedWords}; /// All errors which the evaluator can throw. #[derive(Error, Debug, PartialEq)] pub enum Error { #[error(transparent)] DecodeUtf8Error(#[from] string::FromUtf8Error), #[error(transparent)] DecodeBase64Error(#[from] base64::DecodeError), #[error(transparent)] DecodeQuotedPrintableError(#[from] quoted_printable::QuotedPrintableError), } type Result = result::Result; fn decode_base64(encoded_bytes: Vec) -> Result> { let base64_decoder = { let config = GeneralPurposeConfig::new().with_decode_allow_trailing_bits(true); GeneralPurpose::new(&alphabet::STANDARD, config) }; let decoded_bytes = base64_decoder.decode(encoded_bytes)?; Ok(decoded_bytes) } fn decode_quoted_printable(encoded_bytes: Vec) -> Result> { let parse_mode = quoted_printable::ParseMode::Robust; const SPACE: u8 = b' '; const UNDERSCORE: u8 = b'_'; let encoded_bytes = encoded_bytes .iter() .map(|b| if *b == UNDERSCORE { SPACE } else { *b }) .collect::>(); let decoded_bytes = quoted_printable::decode(encoded_bytes, parse_mode)?; Ok(decoded_bytes) } fn decode_with_encoding(encoding: Encoding, encoded_bytes: Vec) -> Result> { match encoding { Encoding::B => decode_base64(encoded_bytes), Encoding::Q => decode_quoted_printable(encoded_bytes), } } fn decode_with_charset(charset: Option, decoded_bytes: Vec) -> Result { let decoded_str = match charset { Some(charset) => charset.decode(&decoded_bytes).0, None => charset::decode_ascii(&decoded_bytes), }; Ok(decoded_str.into_owned()) } fn decode_utf8_string(clear_text: ClearText) -> Result { let decoded_bytes = String::from_utf8(clear_text)?; Ok(decoded_bytes) } fn decode_parsed_encoded_word( charset: Option, encoding: Encoding, encoded_text: Vec, ) -> Result { let decoded_bytes = decode_with_encoding(encoding, encoded_text)?; let decoded_str = decode_with_charset(charset, decoded_bytes)?; Ok(decoded_str) } pub fn run(parsed_encoded_words: ParsedEncodedWords) -> Result { parsed_encoded_words .into_iter() .map(|parsed_encoded_word| match parsed_encoded_word { ParsedEncodedWord::ClearText(clear_text) => decode_utf8_string(clear_text), ParsedEncodedWord::EncodedWord { charset, encoding, encoded_text, } => decode_parsed_encoded_word(charset, encoding, encoded_text), }) .collect() } rfc2047-decoder-1.0.6/src/lexer/encoded_word.rs000064400000000000000000000034471046102023000172420ustar 00000000000000use std::fmt::Display; use super::QUESTION_MARK; pub const PREFIX: &[u8] = "=?".as_bytes(); pub const SUFFIX: &[u8] = "?=".as_bytes(); pub const MAX_LENGTH: usize = 75; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct EncodedWord { pub charset: Vec, pub encoding: Vec, pub encoded_text: Vec, } impl EncodedWord { pub fn new(charset: Vec, encoding: Vec, encoded_text: Vec) -> Self { Self { charset, encoding, encoded_text, } } pub fn from_parser(((charset, encoding), encoded_text): ((Vec, Vec), Vec)) -> Self { Self::new(charset, encoding, encoded_text) } /// Returns the amount of `char`s for this encoded word pub fn len(&self) -> usize { self.get_bytes(true).len() } pub fn get_bytes(&self, with_delimiters: bool) -> Vec { let mut bytes = Vec::new(); if with_delimiters { bytes.extend(PREFIX); bytes.extend(&self.charset); bytes.extend(&[QUESTION_MARK]); bytes.extend(&self.encoding); bytes.extend(&[QUESTION_MARK]); bytes.extend(&self.encoded_text); bytes.extend(SUFFIX); } else { bytes.extend(&self.charset); bytes.extend(&self.encoding); bytes.extend(&self.encoded_text); } bytes } } impl Display for EncodedWord { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let charset = String::from_utf8(self.charset.clone()).unwrap(); let encoding = String::from_utf8(self.encoding.clone()).unwrap(); let encoded_text = String::from_utf8(self.encoded_text.clone()).unwrap(); write!(f, "=?{}?{}?{}?=", charset, encoding, encoded_text) } } rfc2047-decoder-1.0.6/src/lexer/mod.rs000064400000000000000000000314601046102023000153610ustar 00000000000000pub mod encoded_word; use chumsky::{prelude::Simple, text::whitespace, Parser}; use std::{collections::HashSet, fmt::Display, result}; use thiserror::Error; use crate::{decoder::RecoverStrategy, Decoder}; use self::encoded_word::EncodedWord; pub const QUESTION_MARK: u8 = b'?'; const SPACE: u8 = b' '; /// A helper struct which implements [std::fmt::Display] for `Vec` and /// which contains the encoded words which are too long as a `String`. /// /// # Example /// ``` /// use rfc2047_decoder::{self, decode, RecoverStrategy, LexerError}; /// /// // the first string and the third string are more than 75 characters, hence /// // they are actually invalid encoded words /// let message = concat![ /// "=?utf-8?B?bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb==?=", /// "among us", /// "=?utf-8?B?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa==?=", /// ]; /// let result = decode(message).unwrap_err(); /// if let rfc2047_decoder::Error::Lexer(LexerError::ParseEncodedWordTooLongError(invalid_encoded_words)) = result { /// assert_eq!(invalid_encoded_words.0[0], "=?utf-8?B?bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb==?="); /// assert_eq!(invalid_encoded_words.0[1], "=?utf-8?B?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa==?="); /// } else { /// assert!(false); /// } /// ``` #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TooLongEncodedWords(pub Vec); impl TooLongEncodedWords { pub fn new(encoded_words: Vec) -> Self { Self(encoded_words) } } impl Display for TooLongEncodedWords { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut message = String::new(); if !self.0.is_empty() { message = self.0[0].clone(); for encoded_word in self.0.iter().skip(1) { message.push_str(&format!(", {}", encoded_word)); } } f.write_str(&message) } } /// All errors which the lexer can throw. #[derive(Error, Debug, Clone, PartialEq)] pub enum Error { #[error("cannot parse bytes into tokens")] ParseBytesError(Vec>), #[error("Cannot parse the following encoded words, because they are too long: {0}")] ParseEncodedWordTooLongError(TooLongEncodedWords), } type Result = result::Result; pub type Tokens = Vec; #[derive(Debug, Clone, PartialEq, Hash, Eq)] pub enum Token { ClearText(Vec), EncodedWord(EncodedWord), } impl Token { /// Returns the amount of bytes which the token holds pub fn len(&self) -> usize { match self { Self::ClearText(clear_text) => clear_text.len(), Self::EncodedWord(encoded_word) => encoded_word.len(), } } } pub fn run(encoded_bytes: &[u8], decoder: Decoder) -> Result { let tokens = get_parser(&decoder) .parse(encoded_bytes) .map_err(Error::ParseBytesError)?; validate_tokens(tokens, &decoder) } fn get_parser(decoder: &Decoder) -> impl Parser> { use chumsky::prelude::*; let encoded_words_in_a_row = { let following_encoded_word = whitespace().ignore_then(encoded_word_parser(decoder).rewind()); encoded_word_parser(decoder).then_ignore(following_encoded_word) }; let single_encoded_word = encoded_word_parser(decoder); let single_clear_text = clear_text_parser(decoder); encoded_words_in_a_row .or(single_encoded_word) .or(single_clear_text) .repeated() } fn clear_text_parser(decoder: &Decoder) -> impl Parser> { use chumsky::prelude::*; const DEFAULT_EMPTY_INPUT_ERROR_MESSAGE: &str = "got empty input"; take_until(encoded_word_parser(decoder).rewind().ignored().or(end())).try_map( |(chars, ()), span| { if chars.is_empty() { Err(Simple::custom(span, DEFAULT_EMPTY_INPUT_ERROR_MESSAGE)) } else { Ok(Token::ClearText(chars)) } }, ) } fn encoded_word_parser(decoder: &Decoder) -> impl Parser> { use chumsky::prelude::*; let skip_encoded_word_length = decoder.too_long_encoded_word; let convert_to_token = move |encoded_word: EncodedWord| { if encoded_word.len() > encoded_word::MAX_LENGTH && skip_encoded_word_length == RecoverStrategy::Skip { Token::ClearText(encoded_word.get_bytes(true)) } else { Token::EncodedWord(encoded_word) } }; let is_especial = |c: u8| get_especials().contains(&c); let token = filter(move |&c: &u8| c != SPACE && !c.is_ascii_control() && !is_especial(c)); let charset = token.repeated().at_least(1).collect::>(); let encoding = token.repeated().at_least(1).collect::>(); let encoded_text = filter(|&c: &u8| c != QUESTION_MARK && c != SPACE) .repeated() .collect::>(); just(encoded_word::PREFIX) .ignore_then(charset) .then_ignore(just(QUESTION_MARK)) .then(encoding) .then_ignore(just(QUESTION_MARK)) .then(encoded_text) .then_ignore(just(encoded_word::SUFFIX)) .map(EncodedWord::from_parser) .map(convert_to_token) } fn get_especials() -> HashSet { "()<>@,;:/[]?.=".bytes().collect() } fn validate_tokens(tokens: Tokens, decoder: &Decoder) -> Result { if let Some(too_long_encoded_words) = get_too_long_encoded_words(&tokens, decoder) { return Err(Error::ParseEncodedWordTooLongError(too_long_encoded_words)); } Ok(tokens) } fn get_too_long_encoded_words(tokens: &Tokens, decoder: &Decoder) -> Option { let strategy = decoder.too_long_encoded_word; let mut too_long_encoded_words: Vec = Vec::new(); for token in tokens.iter() { if let Token::EncodedWord(encoded_word) = token { if token.len() > encoded_word::MAX_LENGTH && strategy == RecoverStrategy::Abort { too_long_encoded_words.push(encoded_word.to_string()); } } } if too_long_encoded_words.is_empty() { None } else { Some(TooLongEncodedWords::new(too_long_encoded_words)) } } #[cfg(test)] mod tests { use crate::{ lexer::{encoded_word::EncodedWord, run, Token}, Decoder, }; use super::{get_parser, Error, TooLongEncodedWords}; use chumsky::Parser; #[test] fn encoded_word() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?Yeet?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "Yeet".as_bytes().to_vec(), })] ); } #[test] fn clear_text() { let parser = get_parser(&Decoder::new()); let message = "I use Arch by the way".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::ClearText( "I use Arch by the way".as_bytes().to_vec() )] ); } // The following examples are from the encoded-form table in section 8: // https://datatracker.ietf.org/doc/html/rfc2047#section-8 #[test] fn encoded_from_1() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?a?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec() })] ); } // see encoded_from_1 #[test] fn encoded_from_2() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?a?= b".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }), Token::ClearText(" b".as_bytes().to_vec()), ] ); } // see encoded_from_1 #[test] fn encoded_from_3() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }), Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() }) ] ); } /// Test if parser can parse multiple encoded words in a row /// See: https://datatracker.ietf.org/doc/html/rfc2047#section-8 #[test] fn multiple_encoded_words() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?= =?ISO-8859-1?Q?c?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }), Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() }), Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "c".as_bytes().to_vec() }) ] ); } #[test] fn ignore_mutiple_spaces_between_encoded_words() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!( parsed, vec![ Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "a".as_bytes().to_vec(), }), Token::EncodedWord(EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "b".as_bytes().to_vec() }) ] ); } /// An encoded word with more then 75 chars should panic #[test] fn err_on_too_long_encoded_word() { // "=?" (2) + "ISO-8859-1" (10) + "?" (1) + "Q" (1) + "?" (1) + 'a' (60) + "?=" (2) // = 2 + 10 + 1 + 1 + 1 + 60 + 2 // = 77 => too long let message = "=?ISO-8859-1?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=" .as_bytes(); let parsed = run(message, Decoder::new()); assert_eq!( parsed, Err(Error::ParseEncodedWordTooLongError( TooLongEncodedWords::new(vec![EncodedWord { charset: "ISO-8859-1".as_bytes().to_vec(), encoding: "Q".as_bytes().to_vec(), encoded_text: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" .as_bytes() .to_vec() } .to_string()]) )) ); } #[test] fn encoded_word_has_especials() { let parser = get_parser(&Decoder::new()); let message = "=?ISO-8859-1(?Q?a?=".as_bytes(); let parsed = parser.parse(message).unwrap(); assert_eq!(parsed, vec![Token::ClearText(message.to_vec())]); } } rfc2047-decoder-1.0.6/src/lib.rs000064400000000000000000000023501046102023000142250ustar 00000000000000//! # Introduction //! This crate provides a [Decoder] and the function [decode], in order to decode //! encoded words as described in the [RFC 2047]. //! //! [RFC 2047]: https://datatracker.ietf.org/doc/html/rfc2047 //! //! # Where to start looking //! You will likely want to start looking into [Decoder] and/or the [decode] //! to use this crate. mod decoder; pub use decoder::{Decoder, Error, RecoverStrategy}; mod evaluator; mod lexer; mod parser; pub use evaluator::Error as EvaluatorError; pub use lexer::{Error as LexerError, TooLongEncodedWords}; pub use parser::Error as ParserError; /// Decodes the given RFC 2047 MIME Message Header encoded string /// using a default decoder. /// /// This function equals doing `Decoder::new().decode`. /// /// # Example /// ``` /// use rfc2047_decoder::{decode, Decoder}; /// /// let encoded_message = "=?ISO-8859-1?Q?hello_there?=".as_bytes(); /// let decoded_message = "hello there"; /// /// // This ... /// assert_eq!(decode(encoded_message).unwrap(), decoded_message); /// /// // ... equals this: /// assert_eq!(Decoder::new().decode(encoded_message).unwrap(), decoded_message); /// ``` pub fn decode>(encoded_str: T) -> Result { Decoder::new().decode(encoded_str) } rfc2047-decoder-1.0.6/src/parser.rs000064400000000000000000000122461046102023000147600ustar 00000000000000use charset::Charset; use std::{convert::TryFrom, result}; use crate::lexer::{encoded_word, Token, Tokens}; /// All errors which the parser can throw. #[derive(thiserror::Error, Debug, Clone, PartialEq)] pub enum Error { #[error("cannot parse encoding: encoding is bigger than a char")] ParseEncodingTooBigError, #[error("cannot parse encoding: encoding is empty")] ParseEncodingEmptyError, #[error("cannot parse encoding {0}: B or Q is expected")] ParseEncodingError(char), } type Result = result::Result; pub type ClearText = Vec; pub type ParsedEncodedWords = Vec; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Encoding { B, Q, } impl Encoding { pub const B_CHAR: char = 'b'; pub const Q_CHAR: char = 'q'; pub const MAX_LENGTH: usize = 1; } impl TryFrom> for Encoding { type Error = Error; fn try_from(token: Vec) -> Result { if token.len() > Self::MAX_LENGTH { return Err(Error::ParseEncodingTooBigError); } let encoding = token.first().ok_or(Error::ParseEncodingEmptyError)?; let encoding = *encoding as char; match encoding.to_ascii_lowercase() { Encoding::Q_CHAR => Ok(Self::Q), Encoding::B_CHAR => Ok(Self::B), _ => Err(Error::ParseEncodingError(encoding)), } } } #[derive(Debug, Clone, PartialEq, Hash)] pub enum ParsedEncodedWord { ClearText(ClearText), EncodedWord { charset: Option, encoding: Encoding, encoded_text: Vec, }, } impl ParsedEncodedWord { pub fn convert_encoded_word(encoded_word: encoded_word::EncodedWord) -> Result { let encoding = Encoding::try_from(encoded_word.encoding)?; let charset = Charset::for_label(&encoded_word.charset); Ok(Self::EncodedWord { charset, encoding, encoded_text: encoded_word.encoded_text, }) } } pub fn run(tokens: Tokens) -> Result { let parsed_encoded_words = convert_tokens_to_encoded_words(tokens)?; Ok(parsed_encoded_words) } fn convert_tokens_to_encoded_words(tokens: Tokens) -> Result { tokens .into_iter() .map(|token: Token| match token { Token::ClearText(clear_text) => Ok(ParsedEncodedWord::ClearText(clear_text)), Token::EncodedWord(encoded_word) => { ParsedEncodedWord::convert_encoded_word(encoded_word) } }) .collect() } #[cfg(test)] mod tests { use charset::Charset; use crate::{ lexer, parser::{self, Encoding, ParsedEncodedWord}, Decoder, }; /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `From` field #[test] fn test_parse1() { let message = "=?US-ASCII?Q?Keith_Moore?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("US-ASCII".as_bytes()), encoding: Encoding::Q, encoded_text: "Keith_Moore".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `To` field #[test] fn test_parse2() { let message = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::Q, encoded_text: "Keld_J=F8rn_Simonsen".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `CC` field #[test] fn test_parse3() { let message = "=?ISO-8859-1?Q?Andr=E9?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::Q, encoded_text: "Andr=E9".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } /// Example taken from: /// https://datatracker.ietf.org/doc/html/rfc2047#section-8 /// /// `Subject` field #[test] fn test_parse4() { let message = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=".as_bytes(); let tokens = lexer::run(&message, Decoder::new()).unwrap(); let parsed = parser::run(tokens).unwrap(); let expected = vec![ParsedEncodedWord::EncodedWord { charset: Charset::for_label("ISO-8859-1".as_bytes()), encoding: Encoding::B, encoded_text: "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=".as_bytes().to_vec(), }]; assert_eq!(parsed, expected); } }