urlencoding-2.1.3/.cargo_vcs_info.json0000644000000001360000000000100133500ustar { "git": { "sha1": "3cc277fc4e577a0bef99294d7ab9bc85232f0730" }, "path_in_vcs": "" }urlencoding-2.1.3/.gitignore000064400000000000000000000000221046102023000141220ustar 00000000000000target Cargo.lock urlencoding-2.1.3/Cargo.toml0000644000000021130000000000100113430ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "urlencoding" version = "2.1.3" authors = [ "Kornel ", "Bertram Truong ", ] description = "A Rust library for doing URL percentage encoding." homepage = "https://lib.rs/urlencoding" readme = "README.md" keywords = [ "url", "percent", "escape", "urlencode", "urldecode", ] categories = [ "encoding", "web-programming", ] license = "MIT" repository = "https://github.com/kornelski/rust_urlencoding" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] [badges.maintenance] status = "looking-for-maintainer" urlencoding-2.1.3/Cargo.toml.orig0000644000000011210000000000100123000ustar [package] name = "urlencoding" version = "2.1.3" authors = ["Kornel ", "Bertram Truong "] categories = ["encoding", "web-programming"] description = "A Rust library for doing URL percentage encoding." homepage = "https://lib.rs/urlencoding" keywords = ["url", "percent", "escape", "urlencode", "urldecode"] license = "MIT" readme = "README.md" repository = "https://github.com/kornelski/rust_urlencoding" edition = "2021" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] [badges] maintenance = { status = "looking-for-maintainer" } urlencoding-2.1.3/Cargo.toml.orig000064400000000000000000000011211046102023000150220ustar 00000000000000[package] name = "urlencoding" version = "2.1.3" authors = ["Kornel ", "Bertram Truong "] categories = ["encoding", "web-programming"] description = "A Rust library for doing URL percentage encoding." homepage = "https://lib.rs/urlencoding" keywords = ["url", "percent", "escape", "urlencode", "urldecode"] license = "MIT" readme = "README.md" repository = "https://github.com/kornelski/rust_urlencoding" edition = "2021" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] [badges] maintenance = { status = "looking-for-maintainer" } urlencoding-2.1.3/LICENSE000064400000000000000000000020601046102023000131430ustar 00000000000000© 2016 Bertram Truong © 2021 Kornel Lesiński Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. urlencoding-2.1.3/README.md000064400000000000000000000025011046102023000134150ustar 00000000000000# urlencoding [![Latest Version](https://img.shields.io/crates/v/urlencoding.svg)](https://lib.rs/crates/urlencoding) A tiny Rust library for doing URL percentage encoding and decoding. It percent-encodes everything except alphanumerics and `-`, `_`, `.`, `~`. When decoding `+` is not treated as a space. Error recovery from incomplete percent-escapes follows the [WHATWG URL standard](https://url.spec.whatwg.org/). ## Usage To encode a string, do the following: ```rust use urlencoding::encode; let encoded = encode("This string will be URL encoded."); println!("{}", encoded); // This%20string%20will%20be%20URL%20encoded. ``` To decode a string, it's only slightly different: ```rust use urlencoding::decode; let decoded = decode("%F0%9F%91%BE%20Exterminate%21")?; println!("{}", decoded); // 👾 Exterminate! ``` To decode allowing arbitrary bytes and invalid UTF-8: ```rust use urlencoding::decode_binary; let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2"); let decoded = String::from_utf8_lossy(&binary); ``` This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`. ## License This project is licensed under the MIT license. For more information see the `LICENSE` file. urlencoding-2.1.3/src/dec.rs000064400000000000000000000066761046102023000140470ustar 00000000000000use std::borrow::Cow; use std::string::FromUtf8Error; #[inline] pub(crate) fn from_hex_digit(digit: u8) -> Option { match digit { b'0'..=b'9' => Some(digit - b'0'), b'A'..=b'F' => Some(digit - b'A' + 10), b'a'..=b'f' => Some(digit - b'a' + 10), _ => None, } } /// Decode percent-encoded string assuming UTF-8 encoding. /// /// If you need a `String`, call `.into_owned()` (not `.to_owned()`). /// /// Unencoded `+` is preserved literally, and _not_ changed to a space. pub fn decode(data: &str) -> Result, FromUtf8Error> { match decode_binary(data.as_bytes()) { Cow::Borrowed(_) => Ok(Cow::Borrowed(data)), Cow::Owned(s) => Ok(Cow::Owned(String::from_utf8(s)?)), } } /// Decode percent-encoded string as binary data, in any encoding. /// /// Unencoded `+` is preserved literally, and _not_ changed to a space. pub fn decode_binary(data: &[u8]) -> Cow<[u8]> { let offset = data.iter().take_while(|&&c| c != b'%').count(); if offset >= data.len() { return Cow::Borrowed(data) } let mut decoded: Vec = Vec::with_capacity(data.len()); let mut out = NeverRealloc(&mut decoded); let (ascii, mut data) = data.split_at(offset); out.extend_from_slice(ascii); loop { let mut parts = data.splitn(2, |&c| c == b'%'); // first the decoded non-% part let non_escaped_part = parts.next().unwrap(); let rest = parts.next(); if rest.is_none() && out.0.is_empty() { // if empty there were no '%' in the string return data.into(); } out.extend_from_slice(non_escaped_part); // then decode one %xx match rest { Some(rest) => match rest.get(0..2) { Some(&[first, second]) => match from_hex_digit(first) { Some(first_val) => match from_hex_digit(second) { Some(second_val) => { out.push((first_val << 4) | second_val); data = &rest[2..]; }, None => { out.extend_from_slice(&[b'%', first]); data = &rest[1..]; }, }, None => { out.push(b'%'); data = rest; }, }, _ => { // too short out.push(b'%'); out.extend_from_slice(rest); break; }, }, None => break, } } Cow::Owned(decoded) } struct NeverRealloc<'a, T>(pub &'a mut Vec); impl NeverRealloc<'_, T> { #[inline] pub fn push(&mut self, val: T) { // these branches only exist to remove redundant reallocation code // (the capacity is always sufficient) if self.0.len() != self.0.capacity() { self.0.push(val); } } #[inline] pub fn extend_from_slice(&mut self, val: &[T]) where T: Clone { if self.0.capacity() - self.0.len() >= val.len() { self.0.extend_from_slice(val); } } } #[test] fn dec_borrows() { assert!(matches!(decode("hello"), Ok(Cow::Borrowed("hello")))); assert!(matches!(decode("hello%20"), Ok(Cow::Owned(s)) if s == "hello ")); assert!(matches!(decode("%20hello"), Ok(Cow::Owned(s)) if s == " hello")); } urlencoding-2.1.3/src/enc.rs000064400000000000000000000102431046102023000140420ustar 00000000000000use std::borrow::Cow; use std::fmt; use std::io; use std::str; /// Wrapper type that implements `Display`. Encodes on the fly, without allocating. /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. /// /// ```rust /// use urlencoding::Encoded; /// format!("{}", Encoded("hello!")); /// ``` #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] #[repr(transparent)] pub struct Encoded(pub Str); impl> Encoded { /// Long way of writing `Encoded(data)` /// /// Takes any string-like type or a slice of bytes, either owned or borrowed. #[inline(always)] pub fn new(string: Str) -> Self { Self(string) } #[inline(always)] pub fn to_str(&self) -> Cow { encode_binary(self.0.as_ref()) } /// Perform urlencoding to a string #[inline] #[allow(clippy::inherent_to_string_shadow_display)] pub fn to_string(&self) -> String { self.to_str().into_owned() } /// Perform urlencoding into a writer #[inline] pub fn write(&self, writer: &mut W) -> io::Result<()> { encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?; Ok(()) } /// Perform urlencoding into a string #[inline] pub fn append_to(&self, string: &mut String) { append_string(self.0.as_ref(), string, false); } } impl<'a> Encoded<&'a str> { /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented /// on references-to-references. #[inline(always)] pub fn str(string: &'a str) -> Self { Self(string) } } impl> fmt::Display for Encoded { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { encode_into(self.0.as_ref(), false, |s| f.write_str(s))?; Ok(()) } } /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. /// /// Call `.into_owned()` if you need a `String` #[inline(always)] pub fn encode(data: &str) -> Cow { encode_binary(data.as_bytes()) } /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. #[inline] pub fn encode_binary(data: &[u8]) -> Cow { // add maybe extra capacity, but try not to exceed allocator's bucket size let mut escaped = String::with_capacity(data.len() | 15); let unmodified = append_string(data, &mut escaped, true); if unmodified { return Cow::Borrowed(unsafe { // encode_into has checked it's ASCII str::from_utf8_unchecked(data) }); } Cow::Owned(escaped) } fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool { encode_into(data, may_skip, |s| { escaped.push_str(s); Ok::<_, std::convert::Infallible>(()) }).unwrap() } fn encode_into(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result { let mut pushed = false; loop { // Fast path to skip over safe chars at the beginning of the remaining string let ascii_len = data.iter() .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count(); let (safe, rest) = if ascii_len >= data.len() { if !pushed && may_skip_write { return Ok(true); } (data, &[][..]) // redundatnt to optimize out a panic in split_at } else { data.split_at(ascii_len) }; pushed = true; if !safe.is_empty() { push_str(unsafe { str::from_utf8_unchecked(safe) })?; } if rest.is_empty() { break; } match rest.split_first() { Some((byte, rest)) => { let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)]; push_str(unsafe { str::from_utf8_unchecked(enc) })?; data = rest; } None => break, }; } Ok(false) } #[inline] fn to_hex_digit(digit: u8) -> u8 { match digit { 0..=9 => b'0' + digit, 10..=255 => b'A' - 10 + digit, } } urlencoding-2.1.3/src/lib.rs000064400000000000000000000105051046102023000140440ustar 00000000000000//! To encode a string, do the following: //! //! ```rust //! use urlencoding::encode; //! //! let encoded = encode("This string will be URL encoded."); //! println!("{}", encoded); //! // This%20string%20will%20be%20URL%20encoded. //! ``` //! //! To decode a string, it's only slightly different: //! //! ```rust //! use urlencoding::decode; //! //! let decoded = decode("%F0%9F%91%BE%20Exterminate%21").expect("UTF-8"); //! println!("{}", decoded); //! // 👾 Exterminate! //! ``` //! //! To decode allowing arbitrary bytes and invalid UTF-8: //! //! ```rust //! use urlencoding::decode_binary; //! //! let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2"); //! let decoded = String::from_utf8_lossy(&binary); //! ``` //! //! This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`. mod enc; pub use enc::encode; pub use enc::encode_binary; pub use enc::Encoded; mod dec; pub use dec::decode; pub use dec::decode_binary; #[cfg(test)] mod tests { use super::*; use crate::dec::from_hex_digit; #[test] fn it_encodes_successfully() { let expected = "this%20that"; assert_eq!(expected, encode("this that")); } #[test] fn it_encodes_successfully_emoji() { let emoji_string = "👾 Exterminate!"; let expected = "%F0%9F%91%BE%20Exterminate%21"; assert_eq!(expected, encode(emoji_string)); } #[test] fn it_decodes_successfully() { let expected = String::from("this that"); let encoded = "this%20that"; assert_eq!(expected, decode(encoded).unwrap()); } #[test] fn it_decodes_successfully_emoji() { let expected = String::from("👾 Exterminate!"); let encoded = "%F0%9F%91%BE%20Exterminate%21"; assert_eq!(expected, decode(encoded).unwrap()); } #[test] fn it_decodes_unsuccessfully_emoji() { let bad_encoded_string = "👾 Exterminate!"; assert_eq!(bad_encoded_string, decode(bad_encoded_string).unwrap()); } #[test] fn misc() { assert_eq!(3, from_hex_digit(b'3').unwrap()); assert_eq!(10, from_hex_digit(b'a').unwrap()); assert_eq!(15, from_hex_digit(b'F').unwrap()); assert_eq!(None, from_hex_digit(b'G')); assert_eq!(None, from_hex_digit(9)); assert_eq!("pureascii", encode("pureascii")); assert_eq!("pureascii", decode("pureascii").unwrap()); assert_eq!("", encode("")); assert_eq!("", decode("").unwrap()); assert_eq!("%26a%25b%21c.d%3Fe", encode("&a%b!c.d?e")); assert_eq!("%00", encode("\0")); assert_eq!("%00x", encode("\0x")); assert_eq!("x%00", encode("x\0")); assert_eq!("x%00x", encode("x\0x")); assert_eq!("aa%00%00bb", encode("aa\0\0bb")); assert_eq!("\0", decode("\0").unwrap()); assert!(decode("%F0%0F%91%BE%20Hello%21").is_err()); assert_eq!("this that", decode("this%20that").unwrap()); assert_eq!("this that%", decode("this%20that%").unwrap()); assert_eq!("this that%2", decode("this%20that%2").unwrap()); assert_eq!("this that%%", decode("this%20that%%").unwrap()); assert_eq!("this that%2%", decode("this%20that%2%").unwrap()); assert_eq!("this%2that", decode("this%2that").unwrap()); assert_eq!("this%%2that", decode("this%%2that").unwrap()); assert_eq!("this%2x&that", decode("this%2x%26that").unwrap()); // assert_eq!("this%2&that", decode("this%2%26that").unwrap()); } #[test] fn lazy_writer() { let mut s = "he".to_string(); Encoded("llo").append_to(&mut s); assert_eq!("hello", s); assert_eq!("hello", Encoded("hello").to_string()); assert_eq!("hello", format!("{}", Encoded("hello"))); assert_eq!("hello", Encoded("hello").to_str()); assert!(matches!(Encoded("hello").to_str(), std::borrow::Cow::Borrowed(_))); } #[test] fn whatwg_examples() { assert_eq!(*decode_binary(b"%25%s%1G"), b"%%s%1G"[..]); assert_eq!(*decode_binary("‽%25%2E".as_bytes()), b"\xE2\x80\xBD\x25\x2E"[..]); assert_eq!(encode("≡"), "%E2%89%A1"); assert_eq!(encode("‽"), "%E2%80%BD"); assert_eq!(encode("Say what‽"), "Say%20what%E2%80%BD"); } }