compact_str-0.8.0/.cargo_vcs_info.json0000644000000001510000000000100133540ustar { "git": { "sha1": "5c8b856ab3d4596feec508f332dfbb3cbe0fa13a" }, "path_in_vcs": "compact_str" }compact_str-0.8.0/Cargo.toml0000644000000064730000000000100113670ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "compact_str" version = "0.8.0" authors = ["Parker Timmerman "] description = "A memory efficient string type that transparently stores strings on the stack, when possible" homepage = "https://github.com/ParkMyCar/compact_str" readme = "README.md" keywords = [ "string", "compact", "small", "memory", "mutable", ] categories = [ "encoding", "parsing", "memory-management", "text-processing", ] license = "MIT" repository = "https://github.com/ParkMyCar/compact_str" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [dependencies.arbitrary] version = "1" optional = true default-features = false [dependencies.borsh] version = "1" optional = true [dependencies.bytes] version = "1" optional = true [dependencies.castaway] version = "0.2.3" features = ["alloc"] default-features = false [dependencies.cfg-if] version = "1" [dependencies.diesel] version = "2" optional = true default-features = false [dependencies.itoa] version = "1" [dependencies.markup] version = "0.13" optional = true default-features = false [dependencies.proptest] version = "1" features = ["std"] optional = true default-features = false [dependencies.quickcheck] version = "1" optional = true default-features = false [dependencies.rkyv] version = "0.7" features = ["size_32"] optional = true default-features = false [dependencies.rustversion] version = "1" [dependencies.ryu] version = "1" [dependencies.serde] version = "1" features = [ "derive", "alloc", ] optional = true default-features = false [dependencies.smallvec] version = "1" features = ["union"] optional = true [dependencies.sqlx] version = "0.7" optional = true default-features = false [dependencies.static_assertions] version = "1" [dev-dependencies.cfg-if] version = "1" [dev-dependencies.proptest] version = "1" features = ["std"] default-features = false [dev-dependencies.quickcheck] version = "1" default-features = false [dev-dependencies.quickcheck_macros] version = "1" [dev-dependencies.rayon] version = "1" [dev-dependencies.rkyv] version = "0.7" features = [ "alloc", "size_32", ] default-features = false [dev-dependencies.serde] version = "1" features = ["derive"] [dev-dependencies.serde_json] version = "1" [dev-dependencies.test-case] version = "3" [dev-dependencies.test-strategy] version = "0.3" [features] arbitrary = ["dep:arbitrary"] borsh = ["dep:borsh"] bytes = ["dep:bytes"] default = ["std"] diesel = ["dep:diesel"] markup = ["dep:markup"] proptest = ["dep:proptest"] quickcheck = ["dep:quickcheck"] rkyv = ["dep:rkyv"] serde = ["dep:serde"] smallvec = ["dep:smallvec"] sqlx = [ "dep:sqlx", "std", ] sqlx-mysql = [ "sqlx", "sqlx/mysql", ] sqlx-postgres = [ "sqlx", "sqlx/postgres", ] sqlx-sqlite = [ "sqlx", "sqlx/sqlite", ] std = [] compact_str-0.8.0/Cargo.toml.orig000064400000000000000000000045511046102023000150430ustar 00000000000000[package] name = "compact_str" description = "A memory efficient string type that transparently stores strings on the stack, when possible" version = "0.8.0" authors = ["Parker Timmerman "] edition = "2021" license = "MIT" homepage = "https://github.com/ParkMyCar/compact_str" repository = "https://github.com/ParkMyCar/compact_str" readme = "../README.md" keywords = ["string", "compact", "small", "memory", "mutable"] categories = ["encoding", "parsing", "memory-management", "text-processing"] [features] default = ["std"] std = [] arbitrary = ["dep:arbitrary"] borsh = ["dep:borsh"] bytes = ["dep:bytes"] diesel = ["dep:diesel"] markup = ["dep:markup"] proptest = ["dep:proptest"] quickcheck = ["dep:quickcheck"] rkyv = ["dep:rkyv"] serde = ["dep:serde"] smallvec = ["dep:smallvec"] sqlx = ["dep:sqlx", "std"] sqlx-mysql = ["sqlx", "sqlx/mysql"] sqlx-postgres = ["sqlx", "sqlx/postgres"] sqlx-sqlite = ["sqlx", "sqlx/sqlite"] [dependencies] arbitrary = { version = "1", optional = true, default-features = false } borsh = { version = "1", optional = true } bytes = { version = "1", optional = true } diesel = { version = "2", optional = true, default-features = false } markup = { version = "0.13", optional = true, default-features = false } proptest = { version = "1", optional = true, default-features = false, features = ["std"] } quickcheck = { version = "1", optional = true, default-features = false } rkyv = { version = "0.7", optional = true, default-features = false, features = ["size_32"] } serde = { version = "1", optional = true, default-features = false, features = ["derive", "alloc"] } smallvec = { version = "1", optional = true, features = ["union"] } sqlx = { version = "0.7", optional = true, default-features = false } castaway = { version = "0.2.3", default-features = false, features = ["alloc"] } cfg-if = "1" itoa = "1" rustversion = "1" ryu = "1" static_assertions = "1" [dev-dependencies] cfg-if = "1" proptest = { version = "1", default-features = false, features = ["std"] } quickcheck = { version = "1", default-features = false } quickcheck_macros = "1" rayon = "1" rkyv = { version = "0.7", default-features = false, features = ["alloc", "size_32"] } serde = { version = "1", features = ["derive"] } serde_json = "1" test-case = "3" test-strategy = "0.3" [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] compact_str-0.8.0/LICENSE000064400000000000000000000020611046102023000131530ustar 00000000000000MIT License Copyright (c) 2021 Parker Timmerman Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compact_str-0.8.0/README.md000064400000000000000000000301331046102023000134260ustar 00000000000000

compact_str

A memory efficient string type that can store up to 24* bytes on the stack.

version on crates.io Minimum supported Rust Version: 1.60 mit license
Continuous Integration Status Cross Platform Status Minimum Supported Rust Version Status Clippy Status

* 12 bytes for 32-bit architectures


### About A `CompactString` is a more memory efficient string type, that can store smaller strings on the stack, and transparently stores longer strings on the heap (aka a small string optimization). It can mostly be used as a drop in replacement for `String` and are particularly useful in parsing, deserializing, or any other application where you may have smaller strings. ### Properties A `CompactString` specifically has the following properties: * `size_of::() == size_of::()` * Stores up to 24 bytes on the stack * 12 bytes if running on a 32 bit architecture * Strings longer than 24 bytes are stored on the heap * `Clone` is `O(n)` * `From` or `From>` re-uses underlying buffer * Eagerly inlines small strings * `O(1)` creation from `&'static str` with `CompactString::const_new` * Heap based string grows at a rate of 1.5x * The std library `String` grows at a rate of 2x * Space optimized for `Option<_>` * `size_of::() == size_of::>()` * Uses [branchless instructions](https://en.algorithmica.org/hpc/pipelining/branchless/) for string accesses * Supports `no_std` environments ### Traits This crate exposes two traits, `ToCompactString` and `CompactStringExt`. #### `ToCompactString` Provides the `to_compact_string(&self)` method for converting types into a `CompactString`. This trait is automatically implemented for all types that are `std::fmt::Display`, with specialized higher performance impls for: * `u8`, `u16`, `u32`, `u64`, `usize`, `u128` * `i8`, `i16`, `i32`, `i64`, `isize`, `i128` * `f32`, `f64` * `bool`, `char` * `NonZeroU*`, `NonZeroI*` * `String`, `CompactString` #### `CompactStringExt` Provides two methods `join_compact(seperator: impl AsRef)` and `concat_compact()`. This trait is automatically implemented for all types that can be converted into an iterator and yield types that `impl AsRef`. This allows you to join Vec's, slices, and any other collection to form `CompactString`s. ### Macros This crate exposes one macro `format_compact!` that can be used to create `CompactString`s from arguments, like you can `String`s with the `std::format!` macro. ### Features `compact_str` has the following optional features: * `serde`, which implements [`Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html) and [`Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) from the popular [`serde`](https://docs.rs/serde/1/serde/) crate, for `CompactString` * `bytes`, which provides two methods `from_utf8_buf(buf: &mut B)` and `from_utf8_buf_unchecked(buf: &mut B)`, which allows for the creation of a `CompactString` from a [`bytes::Buf`](https://docs.rs/bytes/1/bytes/trait.Buf.html) * `markup`, which implements [`Render`](https://docs.rs/markup/0.13/markup/trait.Render.html) trait, so `CompactString`s can be used in templates as HTML escaped strings * `diesel`, which allows using CompactStrings in [`diesel`](https://diesel.rs/) text columns * `sqlx-mysql` / `sqlx-postgres` / `sqlx-sqlite`, which allows using CompactStrings in [`sqlx`](https://github.com/launchbadge/sqlx) text columns * `arbitrary`, which implements the [`arbitrary::Arbitrary`](https://docs.rs/arbitrary/1/arbitrary/trait.Arbitrary.html) trait for fuzzing * `proptest`, which implements the [`proptest::arbitrary::Arbitrary`](https://docs.rs/proptest/1/proptest/arbitrary/trait.Arbitrary.html) trait for fuzzing * `quickcheck`, which implements the [`quickcheck::Arbitrary`](https://docs.rs/quickcheck/1/quickcheck/trait.Arbitrary.html) trait for fuzzing * `rkyv`, which implements [`rkyv::Archive`](https://docs.rs/rkyv/0.7/rkyv/trait.Archive.html), [`rkyv::Serialize`](https://docs.rs/rkyv/0.7/rkyv/trait.Serialize.html) and [`rkyv::Deserialize`](https://docs.rs/rkyv/0.7/rkyv/trait.Deserialize.html) for fast zero-copy serialization, interchangable with serialized Strings * `smallvec`, provides the `into_bytes()` method which enables you to convert a `CompactString` into a byte vector, using [`smallvec::SmallVec`](https://docs.rs/smallvec/latest/smallvec/struct.SmallVec.html) ### How it works Note: this explanation assumes a 64-bit architecture, for 32-bit architectures generally divide any number by 2. Normally strings are stored on the heap since they're dynamically sized. In Rust a `String` consists of three fields, each of which are the size of a `usize`. e.g. its layout is something like the following: `String: [ ptr<8> | len<8> | cap<8> ]` 1. `ptr` is a pointer to a location on the heap that stores the string 2. `len` is the length of the string 3. `cap` is the total capacity of the buffer being pointed to This results in 24 bytes being stored on the stack, 8 bytes for each field. Then the actual string is stored on the heap, usually with additional memory allocated to prevent re-allocating if the string is mutated. The idea of `CompactString` is instead of storing metadata on the stack, just store the string itself. This way for smaller strings we save a bit of memory, and we don't have to heap allocate so it's more performant. A `CompactString` is limited to 24 bytes (aka `size_of::()`) so it won't ever use more memory than a `String` would. The memory layout of a `CompactString` looks something like: `CompactString: [ buffer<23> | len<1> ]` #### Memory Layout Internally a `CompactString` has two variants: 1. **Inline**, a string <= 24 bytes long 2. **Heap** allocated, a string > 24 bytes long We define a discriminant (aka track which variant we are) *within* the last byte, specifically: 1. `0b11111110` - All 1s with a trailing 0, indicates **heap** allocated 2. `0b11XXXXXX` - Two leading 1s, indicates **inline**, with the trailing 6 bits used to store the length and the overall memory layout of a `CompactString` is: 1. `heap: { ptr: NonNull, len: usize, cap: Capacity }` 2. `inline: { buffer: [u8; 24] }` Both variants are 24 bytes long For **heap** allocated strings we use a custom `HeapBuffer` which normally stores the capacity of the string on the stack, but also optionally allows us to store it on the heap. Since we use the last byte to track our discriminant, we only have 7 bytes to store the capacity, or 3 bytes on a 32-bit architecture. 7 bytes allows us to store a value up to `2^56`, aka 64 petabytes, while 3 bytes only allows us to store a value up to `2^24`, aka 16 megabytes. For 64-bit architectures we always inline the capacity, because we can safely assume our strings will never be larger than 64 petabytes, but on 32-bit architectures, when creating or growing a `CompactString`, if the text is larger than 16MB then we move the capacity onto the heap. We handle the capacity in this way for two reasons: 1. Users shouldn't have to pay for what they don't use. Meaning, in the _majority_ of cases the capacity of the buffer could easily fit into 7 or 3 bytes, so the user shouldn't have to pay the memory cost of storing the capacity on the heap, if they don't need to. 2. Allows us to convert `From` in `O(1)` time, by taking the parts of a `String` (e.g. `ptr`, `len`, and `cap`) and using those to create a `CompactString`, without having to do any heap allocations. This is important when using `CompactString` in large codebases where you might have `CompactString` working alongside of `String`. For **inline** strings we only have a 24 byte buffer on the stack. This might make you wonder how can we store a 24 byte long string, inline? Don't we also need to store the length somewhere? To do this, we utilize the fact that the last byte of our string could only ever have a value in the range `[0, 192)`. We know this because all strings in Rust are valid [UTF-8](https://en.wikipedia.org/wiki/UTF-8), and the only valid byte pattern for the last byte of a UTF-8 character (and thus the possible last byte of a string) is `0b0XXXXXXX` aka `[0, 128)` or `0b10XXXXXX` aka `[128, 192)`. This leaves all values in `[192, 255]` as unused in our last byte. Therefore, we can use values in the range of `[192, 215]` to represent a length in the range of `[0, 23]`, and if our last byte has a value `< 192`, we know that's a UTF-8 character, and can interpret the length of our string as `24`. Specifically, the last byte on the stack for a `CompactString` has the following uses: * `[0, 191]` - Is the last byte of a UTF-8 char, the `CompactString` is stored on the stack and implicitly has a length of `24` * `[192, 215]` - Denotes a length in the range of `[0, 23]`, this `CompactString` is stored on the stack. * `216` - Denotes this `CompactString` is stored on the heap * `217` - Denotes this `CompactString` stores a `&'static str`. * `[218, 255]` - Unused, denotes e.g. the `None` variant for `Option` ### Testing Strings and unicode can be quite messy, even further, we're working with things at the bit level. `compact_str` has an _extensive_ test suite comprised of unit testing, property testing, and fuzz testing, to ensure our invariants are upheld. We test across all major OSes (Windows, macOS, and Linux), architectures (64-bit and 32-bit), and endian-ness (big endian and little endian). Fuzz testing is run with `libFuzzer`, `AFL++`, *and* `honggfuzz`, with `AFL++` running on both `x86_64` and `ARMv7` architectures. We test with [`miri`](https://github.com/rust-lang/miri) to catch cases of undefined behavior, and run all tests on every Rust compiler since `v1.60` to ensure support for our minimum supported Rust version (MSRV). ### `unsafe` code `CompactString` uses a bit of unsafe code because we manually define what variant we are, so unlike an enum, the compiler can't guarantee what value is actually stored. We also have some manually implemented heap data structures, i.e. `HeapBuffer`, and mess with bytes at a bit level, to make the most out of our resources. That being said, uses of unsafe code in this library are constrained to only where *absolutely* necessary, and always documented with `// SAFETY: `. ### Similar Crates Storing strings on the stack is not a new idea, in fact there are a few other crates in the Rust ecosystem that do similar things, an incomplete list: 1. [`smol_str`](https://crates.io/crates/smol_str) - Can inline 22 bytes, `Clone` is `O(1)`, doesn't adjust for 32-bit archs 2. [`smartstring`](https://crates.io/crates/smartstring) - Can inline 23 bytes, `Clone` is `O(n)`, is mutable 3. [`kstring`](https://crates.io/crates/kstring) - Can inline 15 or 22 bytes dependent on crate features, `Clone` is `O(1)`, can also store `&'static str`s 4. [`flexstr`](https://crates.io/crates/flexstr) - Can inline 22 bytes, `Clone` is `O(1)`, can also store `&'static str`s
Thanks for readingme! compact_str-0.8.0/src/features/arbitrary.rs000064400000000000000000000026451046102023000171300ustar 00000000000000//! Implements the [`arbitrary::Arbitrary`] trait for [`CompactString`] use arbitrary::{ Arbitrary, Result, Unstructured, }; use crate::CompactString; #[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))] impl<'a> Arbitrary<'a> for CompactString { fn arbitrary(u: &mut Unstructured<'a>) -> Result { <&str as Arbitrary>::arbitrary(u).map(CompactString::new) } fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { <&str as Arbitrary>::arbitrary_take_rest(u).map(CompactString::new) } #[inline] fn size_hint(depth: usize) -> (usize, Option) { <&str as Arbitrary>::size_hint(depth) } } #[cfg(test)] mod test { use arbitrary::{ Arbitrary, Unstructured, }; use crate::CompactString; #[test] fn arbitrary_sanity() { let mut data = Unstructured::new(&[42; 50]); let compact = CompactString::arbitrary(&mut data).expect("generate a CompactString"); // we don't really care what the content of the CompactString is, just that one's generated assert!(!compact.is_empty()); } #[test] fn arbitrary_inlines_strings() { let mut data = Unstructured::new(&[42; 20]); let compact = CompactString::arbitrary(&mut data).expect("generate a CompactString"); // running this manually, we generate the string "**" assert!(!compact.is_heap_allocated()); } } compact_str-0.8.0/src/features/borsh.rs000064400000000000000000000071631046102023000162460ustar 00000000000000#![cfg_attr(docsrs, doc(cfg(feature = "borsh")))] use alloc::string::String; use alloc::vec::Vec; use core::str; use borsh::io::{ Error, ErrorKind, Read, Result, Write, }; use borsh::{ BorshDeserialize, BorshSerialize, }; use crate::repr::MAX_SIZE; use crate::CompactString; impl BorshSerialize for CompactString { fn serialize(&self, writer: &mut W) -> Result<()> { self.as_str().serialize(writer) } } impl BorshDeserialize for CompactString { fn deserialize_reader(reader: &mut R) -> Result { let len = u32::deserialize_reader(&mut *reader)? as usize; if len <= MAX_SIZE { let mut buf = [0u8; MAX_SIZE]; reader.read_exact(&mut buf[..len])?; let s = str::from_utf8(&buf[..len]) .map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) } else { // We can't just deserialize `Vec` because we have already read the length // TODO: replace with `read_buf` when (if) it stabilizes let buf = vec_from_reader(len, reader)?; let s = String::from_utf8(buf).map_err(|err| Error::new(ErrorKind::InvalidData, err))?; Ok(CompactString::from(s)) } } } // A copy of hidden `u8::vec_from_reader`(https://docs.rs/borsh/1.5.1/src/borsh/de/mod.rs.html#156-184) fn vec_from_reader(len: usize, reader: &mut R) -> Result> { // Avoid OOM by limiting the size of allocation. This makes the read // less efficient (since we need to loop and reallocate) but it protects // us from someone sending us [0xff, 0xff, 0xff, 0xff] and forcing us to // allocate 4GiB of memory. let mut vec = vec![0u8; len.min(1024 * 1024)]; let mut pos = 0; while pos < len { if pos == vec.len() { vec.resize(vec.len().saturating_mul(2).min(len), 0) } // TODO(mina86): Convert this to read_buf once that stabilises. match reader.read(&mut vec.as_mut_slice()[pos..])? { 0 => { return Err(Error::new( ErrorKind::InvalidData, "Unexpected length of input", )) } read => { pos += read; } } } Ok(vec) } #[cfg(test)] mod tests { use alloc::string::String; use test_strategy::proptest; use crate::repr::{ HEAP_MASK, MAX_SIZE, }; use crate::CompactString; fn assert_roundtrip(s: &str) { let bytes_compact = borsh::to_vec(&CompactString::from(s)).unwrap(); let bytes_control = borsh::to_vec(&String::from(s)).unwrap(); assert_eq!(&*bytes_compact, &*bytes_control); let compact: CompactString = borsh::from_slice(&bytes_compact).unwrap(); let control: String = borsh::from_slice(&bytes_control).unwrap(); assert_eq!(compact, s); assert_eq!(control, s); } #[test] fn test_deserialize_invalid_utf8() { let bytes = borsh::to_vec(&[HEAP_MASK; MAX_SIZE] as &[u8]).unwrap(); borsh::from_slice::(&bytes).unwrap_err(); } #[test] fn test_deserialize_unexpected_eof() { let s = core::str::from_utf8(&[b'a'; 55]).unwrap(); let mut bytes = borsh::to_vec(s).unwrap(); bytes.pop(); borsh::from_slice::(&bytes).unwrap_err(); } #[test] fn test_roundtrip() { assert_roundtrip("Hello, ๐ŸŒ!"); } #[cfg_attr(miri, ignore)] #[proptest] fn proptest_roundtrip(s: String) { assert_roundtrip(&s); } } compact_str-0.8.0/src/features/bytes.rs000064400000000000000000000100221046102023000162430ustar 00000000000000use core::str::Utf8Error; use bytes::Buf; use crate::{ CompactString, Repr, }; impl CompactString { /// Converts a buffer of bytes to a [`CompactString`] /// /// # Examples /// ### Basic usage /// ``` /// # use compact_str::CompactString; /// # use std::collections::VecDeque; /// /// // `bytes::Buf` is implemented for `VecDeque` /// let mut sparkle_heart = VecDeque::from(vec![240, 159, 146, 150]); /// // We know these bytes are valid, so we can `.unwrap()` or `.expect(...)` here /// let compact_str = CompactString::from_utf8_buf(&mut sparkle_heart).expect("valid utf-8"); /// /// assert_eq!(compact_str, "๐Ÿ’–"); /// ``` /// /// ### With invalid/non-UTF8 bytes /// ``` /// # use compact_str::CompactString; /// # use std::io; /// /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>` /// let mut invalid = io::Cursor::new(&[0, 159]); /// /// // The provided buffer is invalid, so trying to create a `CompactString` will fail /// assert!(CompactString::from_utf8_buf(&mut invalid).is_err()); /// ``` #[cfg_attr(docsrs, doc(cfg(feature = "bytes")))] pub fn from_utf8_buf(buf: &mut B) -> Result { Repr::from_utf8_buf(buf).map(CompactString) } /// Converts a buffer of bytes to a [`CompactString`], without checking that the provided buffer /// is valid UTF-8. /// /// # Safety /// This function is unsafe because it does not check that the provided bytes are valid UTF-8. /// If this constraint is violated, it may cause memory safety issues with futures uses of the /// `CompactString`, as the rest of the library assumes that `CompactString`s are valid UTF-8 /// /// # Examples /// ``` /// # use compact_str::CompactString; /// # use std::io; /// /// let word = "hello world"; /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>` /// let mut buffer = io::Cursor::new(word.as_bytes()); /// let compact_str = unsafe { CompactString::from_utf8_buf_unchecked(&mut buffer) }; /// /// assert_eq!(compact_str, word); /// ``` #[cfg_attr(docsrs, doc(cfg(feature = "bytes")))] pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { let repr = Repr::from_utf8_buf_unchecked(buf); CompactString(repr) } } #[cfg(test)] mod test { use alloc::string::String; use alloc::vec::Vec; #[cfg(feature = "std")] use std::io::Cursor; use proptest::prelude::*; use test_strategy::proptest; use crate::tests::{ rand_bytes, rand_unicode, }; use crate::CompactString; const MAX_SIZE: usize = core::mem::size_of::(); #[proptest] #[cfg_attr(miri, ignore)] fn proptest_buffers_roundtrip(#[strategy(rand_unicode())] word: String) { let mut buf = Cursor::new(word.as_bytes()); let compact = CompactString::from_utf8_buf(&mut buf).unwrap(); proptest::prop_assert_eq!(&word, &compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_allocated_properly(#[strategy(rand_unicode())] word: String) { let mut buf = Cursor::new(word.as_bytes()); let compact = CompactString::from_utf8_buf(&mut buf).unwrap(); if word.len() <= MAX_SIZE { proptest::prop_assert!(!compact.is_heap_allocated()) } else { proptest::prop_assert!(compact.is_heap_allocated()) } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_only_accept_valid_utf8(#[strategy(rand_bytes())] bytes: Vec) { let mut buf = Cursor::new(bytes.as_slice()); let compact_result = CompactString::from_utf8_buf(&mut buf); let str_result = core::str::from_utf8(bytes.as_slice()); match (compact_result, str_result) { (Ok(c), Ok(s)) => prop_assert_eq!(c, s), (Err(c_err), Err(s_err)) => prop_assert_eq!(c_err, s_err), _ => panic!("CompactString and core::str read UTF-8 differently?"), } } } compact_str-0.8.0/src/features/diesel.rs000064400000000000000000000026111046102023000163670ustar 00000000000000#![cfg_attr(docsrs, doc(cfg(feature = "diesel")))] // Copied and adapted from // use diesel::{ backend, deserialize, expression, serialize, sql_types, }; use crate::CompactString; #[derive(expression::AsExpression, deserialize::FromSqlRow)] #[diesel(foreign_derive)] #[diesel(sql_type = sql_types::Text)] #[allow(dead_code)] struct CompactStringProxy(CompactString); impl deserialize::FromSql for CompactString where DB: backend::Backend, *const str: deserialize::FromSql, { fn from_sql(bytes: DB::RawValue<'_>) -> deserialize::Result { let str_ptr = <*const str as deserialize::FromSql>::from_sql(bytes)?; if !str_ptr.is_null() { // SAFETY: We just checked that `str_ptr` is not null, and `from_sql()` should return // a valid pointer to an `str`. let string = unsafe { &*str_ptr }; Ok(string.into()) } else { Ok(CompactString::new("")) } } } impl serialize::ToSql for CompactString where DB: backend::Backend, str: serialize::ToSql, { fn to_sql<'b>(&'b self, out: &mut serialize::Output<'b, '_, DB>) -> serialize::Result { self.as_str().to_sql(out) } } compact_str-0.8.0/src/features/markup.rs000064400000000000000000000016321046102023000164230ustar 00000000000000#[cfg(test)] use alloc::string::String; use markup::Render; use crate::CompactString; #[cfg_attr(docsrs, doc(cfg(feature = "markup")))] impl Render for CompactString { #[inline] fn render(&self, writer: &mut impl core::fmt::Write) -> core::fmt::Result { self.as_str().render(writer) } } #[cfg(test)] #[test] fn test_markup() { const TEXT: &str = ""; markup::define!(Template(msg: M) { textarea { @msg } }); let compact = Template { msg: CompactString::from(TEXT), }; let control = Template { msg: String::from(TEXT), }; assert_eq!( compact.to_string(), "", ); assert_eq!( control.to_string(), "", ); } compact_str-0.8.0/src/features/mod.rs000064400000000000000000000010301046102023000156730ustar 00000000000000//! A module that contains the implementations for optional features. For example `serde` support #[cfg(feature = "arbitrary")] mod arbitrary; #[cfg(feature = "borsh")] mod borsh; #[cfg(feature = "bytes")] mod bytes; #[cfg(feature = "diesel")] mod diesel; #[cfg(feature = "markup")] mod markup; #[cfg(feature = "proptest")] mod proptest; #[cfg(feature = "quickcheck")] mod quickcheck; #[cfg(feature = "rkyv")] mod rkyv; #[cfg(feature = "serde")] mod serde; #[cfg(feature = "smallvec")] mod smallvec; #[cfg(feature = "sqlx")] mod sqlx; compact_str-0.8.0/src/features/proptest.rs000064400000000000000000000027661046102023000170150ustar 00000000000000//! Implements the [`proptest::arbitrary::Arbitrary`] trait for [`CompactString`] use alloc::string::String; use proptest::arbitrary::StrategyFor; use proptest::prelude::*; use proptest::strategy::MapInto; use proptest::string::StringParam; use crate::CompactString; #[cfg_attr(docsrs, doc(cfg(feature = "proptest")))] impl Arbitrary for CompactString { type Parameters = StringParam; type Strategy = MapInto, Self>; fn arbitrary_with(a: Self::Parameters) -> Self::Strategy { any_with::(a).prop_map_into() } } #[cfg(test)] mod test { use alloc::string::String; use proptest::prelude::*; use crate::CompactString; const MAX_SIZE: usize = core::mem::size_of::(); proptest! { #[test] #[cfg_attr(miri, ignore)] fn proptest_sanity(compact: CompactString) { let control: String = compact.clone().into(); assert_eq!(control, compact); } /// We rely on [`proptest`]'s `String` strategy for generating a `CompactString`. When /// converting from a `String` into a `CompactString`, if it's short enough we should /// eagerly inline strings #[test] #[cfg_attr(miri, ignore)] fn proptest_does_not_inline_strings(compact: CompactString) { if compact.len() <= MAX_SIZE { assert!(!compact.is_heap_allocated()); } else { assert!(compact.is_heap_allocated()); } } } } compact_str-0.8.0/src/features/quickcheck.rs000064400000000000000000000030411046102023000172320ustar 00000000000000//! Implements the [`quickcheck::Arbitrary`] trait for [`CompactString`] use alloc::boxed::Box; use alloc::vec::Vec; use quickcheck::{ Arbitrary, Gen, }; use crate::CompactString; #[cfg_attr(docsrs, doc(cfg(feature = "quickcheck")))] impl Arbitrary for CompactString { fn arbitrary(g: &mut Gen) -> CompactString { let max = g.size(); // pick some value in [0, max] let x = usize::arbitrary(g); let ratio = (x as f64) / (usize::MAX as f64); let size = (ratio * max as f64) as usize; (0..size).map(|_| char::arbitrary(g)).collect() } fn shrink(&self) -> Box> { // Shrink a string by shrinking a vector of its characters. let chars: Vec = self.chars().collect(); Box::new( chars .shrink() .map(|x| x.into_iter().collect::()), ) } } #[cfg(test)] mod test { use alloc::string::String; use quickcheck_macros::quickcheck; use crate::CompactString; #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_sanity(compact: CompactString) { let control: String = compact.clone().into(); assert_eq!(control, compact); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_inlines_strings(compact: CompactString) { if compact.len() <= core::mem::size_of::() { assert!(!compact.is_heap_allocated()) } else { assert!(compact.is_heap_allocated()) } } } compact_str-0.8.0/src/features/rkyv.rs000064400000000000000000000073321046102023000161220ustar 00000000000000#![cfg_attr(docsrs, doc(cfg(feature = "rkyv")))] use rkyv::string::{ ArchivedString, StringResolver, }; use rkyv::{ Archive, Deserialize, DeserializeUnsized, Fallible, Serialize, SerializeUnsized, }; use crate::CompactString; impl Archive for CompactString { type Archived = ArchivedString; type Resolver = StringResolver; #[inline] unsafe fn resolve(&self, pos: usize, resolver: Self::Resolver, out: *mut Self::Archived) { ArchivedString::resolve_from_str(self.as_str(), pos, resolver, out); } } impl Serialize for CompactString where str: SerializeUnsized, { #[inline] fn serialize(&self, serializer: &mut S) -> Result { ArchivedString::serialize_from_str(self.as_str(), serializer) } } impl Deserialize for ArchivedString where str: DeserializeUnsized, { #[inline] fn deserialize(&self, _: &mut D) -> Result { Ok(self.as_str().into()) } } impl PartialEq for ArchivedString { #[inline] fn eq(&self, other: &CompactString) -> bool { PartialEq::eq(self.as_str(), other.as_str()) } } impl PartialOrd for ArchivedString { #[inline] fn partial_cmp(&self, other: &CompactString) -> Option { PartialOrd::partial_cmp(self.as_str(), other.as_str()) } } #[cfg(test)] mod tests { use alloc::string::String; use rkyv::Deserialize; use test_strategy::proptest; use crate::CompactString; #[cfg_attr(miri, ignore)] // https://github.com/rust-lang/unsafe-code-guidelines/issues/134 #[test] fn test_roundtrip() { const VALUE: &str = "Hello, ๐ŸŒ!"; let bytes_compact = rkyv::to_bytes::<_, 32>(&CompactString::from(VALUE)).unwrap(); let bytes_control = rkyv::to_bytes::<_, 32>(&String::from(VALUE)).unwrap(); assert_eq!(&*bytes_compact, &*bytes_control); let archived = unsafe { rkyv::archived_root::(&bytes_compact) }; let compact: CompactString = archived.deserialize(&mut rkyv::Infallible).unwrap(); let control: String = archived.deserialize(&mut rkyv::Infallible).unwrap(); assert_eq!(archived, VALUE); assert_eq!(compact, VALUE); assert_eq!(control, VALUE); let archived = unsafe { rkyv::archived_root::(&bytes_compact) }; let compact: CompactString = archived.deserialize(&mut rkyv::Infallible).unwrap(); let control: String = archived.deserialize(&mut rkyv::Infallible).unwrap(); assert_eq!(archived, VALUE); assert_eq!(compact, VALUE); assert_eq!(control, VALUE); } #[cfg_attr(miri, ignore)] #[proptest] fn proptest_roundtrip(s: String) { let bytes_compact = rkyv::to_bytes::<_, 32>(&CompactString::from(&s)).unwrap(); let bytes_control = rkyv::to_bytes::<_, 32>(&s).unwrap(); assert_eq!(&*bytes_compact, &*bytes_control); let archived = unsafe { rkyv::archived_root::(&bytes_compact) }; let compact: CompactString = archived.deserialize(&mut rkyv::Infallible).unwrap(); let control: String = archived.deserialize(&mut rkyv::Infallible).unwrap(); assert_eq!(archived, &s); assert_eq!(compact, s); assert_eq!(control, s); let archived = unsafe { rkyv::archived_root::(&bytes_compact) }; let compact: CompactString = archived.deserialize(&mut rkyv::Infallible).unwrap(); let control: String = archived.deserialize(&mut rkyv::Infallible).unwrap(); assert_eq!(archived, &s); assert_eq!(compact, s); assert_eq!(control, s); } } compact_str-0.8.0/src/features/serde.rs000064400000000000000000000123551046102023000162320ustar 00000000000000use alloc::string::String; use alloc::vec::Vec; use serde::de::{ Deserializer, Error, Unexpected, Visitor, }; use crate::CompactString; fn compact_string<'de: 'a, 'a, D: Deserializer<'de>>( deserializer: D, ) -> Result { struct CompactStringVisitor; impl<'a> Visitor<'a> for CompactStringVisitor { type Value = CompactString; fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { formatter.write_str("a string") } fn visit_str(self, v: &str) -> Result { Ok(CompactString::from(v)) } fn visit_borrowed_str(self, v: &'a str) -> Result { Ok(CompactString::from(v)) } fn visit_string(self, v: String) -> Result { Ok(CompactString::from(v)) } fn visit_bytes(self, v: &[u8]) -> Result { match core::str::from_utf8(v) { Ok(s) => Ok(CompactString::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } } fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result { match core::str::from_utf8(v) { Ok(s) => Ok(CompactString::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } } fn visit_byte_buf(self, v: Vec) -> Result { match String::from_utf8(v) { Ok(s) => Ok(CompactString::from(s)), Err(e) => Err(Error::invalid_value( Unexpected::Bytes(&e.into_bytes()), &self, )), } } } deserializer.deserialize_str(CompactStringVisitor) } #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] impl serde::Serialize for CompactString { fn serialize(&self, serializer: S) -> Result { self.as_str().serialize(serializer) } } #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] impl<'de> serde::Deserialize<'de> for CompactString { fn deserialize>(deserializer: D) -> Result { compact_string(deserializer) } } #[cfg(test)] mod tests { use alloc::string::{ String, ToString, }; use alloc::vec::Vec; use serde::{ Deserialize, Serialize, }; use test_strategy::proptest; use crate::CompactString; #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] struct PersonString { name: String, phones: Vec, address: Option, } #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] struct PersonCompactString { name: CompactString, phones: Vec, address: Option, } #[test] fn test_roundtrip() { let name = "Ferris the Crab"; let phones = vec!["1-800-111-1111", "2-222-222-2222"]; let address = Some("123 Sesame Street"); let std = PersonString { name: name.to_string(), phones: phones.iter().map(|s| s.to_string()).collect(), address: address.as_ref().map(|s| s.to_string()), }; let compact = PersonCompactString { name: name.into(), phones: phones.iter().map(|s| CompactString::from(*s)).collect(), address: address.as_ref().map(|s| CompactString::from(*s)), }; let std_json = serde_json::to_string(&std).unwrap(); let compact_json = serde_json::to_string(&compact).unwrap(); // the serialized forms should be the same assert_eq!(std_json, compact_json); let std_de_compact: PersonString = serde_json::from_str(&compact_json).unwrap(); let compact_de_std: PersonCompactString = serde_json::from_str(&std_json).unwrap(); // we should be able to deserailze from the opposite, serialized, source assert_eq!(std_de_compact, std); assert_eq!(compact_de_std, compact); } #[cfg_attr(miri, ignore)] #[proptest] fn proptest_roundtrip(name: String, phones: Vec, address: Option) { let std = PersonString { name: name.clone(), phones: phones.iter().map(|s| s.clone()).collect(), address: address.clone(), }; let compact = PersonCompactString { name: name.into(), phones: phones.iter().map(|s| CompactString::from(s)).collect(), address: address.map(|s| CompactString::from(s)), }; let std_json = serde_json::to_string(&std).unwrap(); let compact_json = serde_json::to_string(&compact).unwrap(); // the serialized forms should be the same assert_eq!(std_json, compact_json); let std_de_compact: PersonString = serde_json::from_str(&compact_json).unwrap(); let compact_de_std: PersonCompactString = serde_json::from_str(&std_json).unwrap(); // we should be able to deserailze from the opposite, serialized, source assert_eq!(std_de_compact, std); assert_eq!(compact_de_std, compact); } } compact_str-0.8.0/src/features/smallvec.rs000064400000000000000000000053261046102023000167360ustar 00000000000000use smallvec::SmallVec; use crate::repr::MAX_SIZE; use crate::CompactString; impl CompactString { /// Converts a [`CompactString`] into a byte vector /// /// This consumes the [`CompactString`] and returns a [`SmallVec`], so we do not need to copy /// contents /// /// Note: [`SmallVec`] is an inline-able version [`Vec`](alloc::vec::Vec), just like /// [`CompactString`] is an inline-able version of [`String`](alloc::string::String). /// /// # Example /// ``` /// use compact_str::CompactString; /// /// let c = CompactString::new("hello"); /// let bytes = c.into_bytes(); /// /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); /// ``` #[cfg_attr(docsrs, doc(cfg(feature = "smallvec")))] pub fn into_bytes(self) -> SmallVec<[u8; MAX_SIZE]> { self.0.into_bytes() } } #[cfg(test)] mod tests { use alloc::string::String; use proptest::prelude::*; use test_strategy::proptest; use crate::repr::MAX_SIZE; use crate::tests::rand_unicode; use crate::CompactString; /// generates random unicode strings, that are at least MAX_SIZE bytes long pub fn rand_long_unicode() -> impl Strategy { proptest::collection::vec(proptest::char::any(), (MAX_SIZE + 1)..80) .prop_map(|v| v.into_iter().collect()) } #[test] fn test_buffer_reuse() { let c = CompactString::from("I am a longer string that will be on the heap"); let c_ptr = c.as_ptr(); let bytes = c.into_bytes(); let b_ptr = bytes.as_ptr(); // Note: inlined CompactStrings also get their buffers re-used, but we can't assert their // re-use the same way we do for longer strings, because the underlying array may move on // the callstack, whereas for longer strings the buffer is not moving on the heap // converting into_bytes should _always_ re-use the underlying buffer assert_eq!(c_ptr, b_ptr); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_buffer_reuse(#[strategy(rand_long_unicode())] s: String) { let c = CompactString::from(s); let c_ptr = c.as_ptr(); let bytes = c.into_bytes(); let b_ptr = bytes.as_ptr(); // converting into_bytes should _always_ re-use the underlying buffer prop_assert_eq!(c_ptr, b_ptr); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_roundtrip(#[strategy(rand_unicode())] s: String) { let og_compact = CompactString::from(s.clone()); prop_assert_eq!(&og_compact, &s); let bytes = og_compact.into_bytes(); let ex_compact = CompactString::from_utf8(bytes).unwrap(); prop_assert_eq!(&ex_compact, &s); } } compact_str-0.8.0/src/features/sqlx.rs000064400000000000000000000060341046102023000161140ustar 00000000000000use sqlx::database::HasValueRef; use sqlx::error::BoxDynError; #[cfg(any( feature = "sqlx-mysql", feature = "sqlx-postgres", feature = "sqlx-sqlite" ))] use sqlx::{ database::HasArguments, encode::IsNull, Encode, }; use sqlx::{ Database, Decode, Type, Value, ValueRef, }; use crate::{ CompactString, ToCompactString, }; #[cfg_attr(docsrs, doc(cfg(feature = "sqlx")))] impl Type for CompactString where DB: Database, for<'x> &'x str: Type, { #[inline] fn type_info() -> ::TypeInfo { <&str as Type>::type_info() } } #[cfg_attr(docsrs, doc(cfg(feature = "sqlx")))] impl<'r, DB> Decode<'r, DB> for CompactString where DB: Database, for<'x> &'x str: Decode<'x, DB> + Type, { fn decode(value: >::ValueRef) -> Result { let value = value.to_owned(); let value: &str = value.try_decode()?; Ok(value.try_to_compact_string()?) } } #[cfg(feature = "sqlx-mysql")] #[cfg_attr(docsrs, doc(cfg(feature = "sqlx-mysql")))] impl<'q> Encode<'q, sqlx::MySql> for CompactString { fn encode_by_ref(&self, buf: &mut >::ArgumentBuffer) -> IsNull { Encode::<'_, sqlx::MySql>::encode_by_ref(&self.as_str(), buf) } #[inline] fn produces(&self) -> Option<::TypeInfo> { <&str as Encode<'_, sqlx::MySql>>::produces(&self.as_str()) } #[inline] fn size_hint(&self) -> usize { <&str as Encode<'_, sqlx::MySql>>::size_hint(&self.as_str()) } } #[cfg(feature = "sqlx-postgres")] #[cfg_attr(docsrs, doc(cfg(feature = "sqlx-postgres")))] impl<'q> Encode<'q, sqlx::Postgres> for CompactString { fn encode_by_ref( &self, buf: &mut >::ArgumentBuffer, ) -> IsNull { Encode::<'_, sqlx::Postgres>::encode_by_ref(&self.as_str(), buf) } #[inline] fn produces(&self) -> Option<::TypeInfo> { <&str as Encode<'_, sqlx::Postgres>>::produces(&self.as_str()) } #[inline] fn size_hint(&self) -> usize { <&str as Encode<'_, sqlx::Postgres>>::size_hint(&self.as_str()) } } #[cfg(feature = "sqlx-sqlite")] #[cfg_attr(docsrs, doc(cfg(feature = "sqlx-sqlite")))] impl<'q> Encode<'q, sqlx::Sqlite> for CompactString { fn encode(self, buf: &mut >::ArgumentBuffer) -> IsNull { Encode::<'_, sqlx::Sqlite>::encode(self.into_string(), buf) } fn encode_by_ref( &self, buf: &mut >::ArgumentBuffer, ) -> IsNull { Encode::<'_, sqlx::Sqlite>::encode(alloc::string::String::from(self.as_str()), buf) } #[inline] fn produces(&self) -> Option<::TypeInfo> { <&str as Encode<'_, sqlx::Sqlite>>::produces(&self.as_str()) } #[inline] fn size_hint(&self) -> usize { <&str as Encode<'_, sqlx::Sqlite>>::size_hint(&self.as_str()) } } compact_str-0.8.0/src/lib.rs000064400000000000000000002447011046102023000140620ustar 00000000000000#![doc = include_str!("../README.md")] #![cfg_attr(docsrs, feature(doc_cfg))] #![no_std] #[cfg(feature = "std")] #[macro_use] extern crate std; #[cfg_attr(test, macro_use)] extern crate alloc; use alloc::borrow::Cow; use alloc::boxed::Box; use alloc::string::String; #[doc(hidden)] pub use core; use core::borrow::{ Borrow, BorrowMut, }; use core::cmp::Ordering; use core::hash::{ Hash, Hasher, }; use core::iter::FusedIterator; use core::ops::{ Add, AddAssign, Bound, Deref, DerefMut, RangeBounds, }; use core::str::{ FromStr, Utf8Error, }; use core::{ fmt, mem, slice, }; #[cfg(feature = "std")] use std::ffi::OsStr; mod features; mod macros; mod unicode_data; mod repr; use repr::Repr; mod traits; pub use traits::{ CompactStringExt, ToCompactString, }; #[cfg(test)] mod tests; /// A [`CompactString`] is a compact string type that can be used almost anywhere a /// [`String`] or [`str`] can be used. /// /// ## Using `CompactString` /// ``` /// use compact_str::CompactString; /// # use std::collections::HashMap; /// /// // CompactString auto derefs into a str so you can use all methods from `str` /// // that take a `&self` /// if CompactString::new("hello world!").is_ascii() { /// println!("we're all ASCII") /// } /// /// // You can use a CompactString in collections like you would a String or &str /// let mut map: HashMap = HashMap::new(); /// /// // directly construct a new `CompactString` /// map.insert(CompactString::new("nyc"), CompactString::new("empire state building")); /// // create a `CompactString` from a `&str` /// map.insert("sf".into(), "transamerica pyramid".into()); /// // create a `CompactString` from a `String` /// map.insert(String::from("sea").into(), String::from("space needle").into()); /// /// fn wrapped_print>(text: T) { /// println!("{}", text.as_ref()); /// } /// /// // CompactString impls AsRef and Borrow, so it can be used anywhere /// // that expects a generic string /// if let Some(building) = map.get("nyc") { /// wrapped_print(building); /// } /// /// // CompactString can also be directly compared to a String or &str /// assert_eq!(CompactString::new("chicago"), "chicago"); /// assert_eq!(CompactString::new("houston"), String::from("houston")); /// ``` /// /// # Converting from a `String` /// It's important that a `CompactString` interops well with `String`, so you can easily use both in /// your code base. /// /// `CompactString` implements `From` and operates in the following manner: /// - Eagerly inlines the string, possibly dropping excess capacity /// - Otherwise re-uses the same underlying buffer from `String` /// /// ``` /// use compact_str::CompactString; /// /// // eagerly inlining /// let short = String::from("hello world"); /// let short_c = CompactString::from(short); /// assert!(!short_c.is_heap_allocated()); /// /// // dropping excess capacity /// let mut excess = String::with_capacity(256); /// excess.push_str("abc"); /// /// let excess_c = CompactString::from(excess); /// assert!(!excess_c.is_heap_allocated()); /// assert!(excess_c.capacity() < 256); /// /// // re-using the same buffer /// let long = String::from("this is a longer string that will be heap allocated"); /// /// let long_ptr = long.as_ptr(); /// let long_len = long.len(); /// let long_cap = long.capacity(); /// /// let mut long_c = CompactString::from(long); /// assert!(long_c.is_heap_allocated()); /// /// let cpt_ptr = long_c.as_ptr(); /// let cpt_len = long_c.len(); /// let cpt_cap = long_c.capacity(); /// /// // the original String and the CompactString point to the same place in memory, buffer re-use! /// assert_eq!(cpt_ptr, long_ptr); /// assert_eq!(cpt_len, long_len); /// assert_eq!(cpt_cap, long_cap); /// ``` /// /// ### Prevent Eagerly Inlining /// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which /// might not always be desirable if you're converting a very large amount of `String`s. If your /// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API. #[repr(transparent)] pub struct CompactString(Repr); impl CompactString { /// Creates a new [`CompactString`] from any type that implements `AsRef`. /// If the string is short enough, then it will be inlined on the stack! /// /// In a `static` or `const` context you can use the method [`CompactString::const_new()`]. /// /// # Examples /// /// ### Inlined /// ``` /// # use compact_str::CompactString; /// // We can inline strings up to 12 characters long on 32-bit architectures... /// #[cfg(target_pointer_width = "32")] /// let s = "i'm 12 chars"; /// // ...and up to 24 characters on 64-bit architectures! /// #[cfg(target_pointer_width = "64")] /// let s = "i am 24 characters long!"; /// /// let compact = CompactString::new(&s); /// /// assert_eq!(compact, s); /// // we are not allocated on the heap! /// assert!(!compact.is_heap_allocated()); /// ``` /// /// ### Heap /// ``` /// # use compact_str::CompactString; /// // For longer strings though, we get allocated on the heap /// let long = "I am a longer string that will be allocated on the heap"; /// let compact = CompactString::new(long); /// /// assert_eq!(compact, long); /// // we are allocated on the heap! /// assert!(compact.is_heap_allocated()); /// ``` /// /// ### Creation /// ``` /// use compact_str::CompactString; /// /// // Using a `&'static str` /// let s = "hello world!"; /// let hello = CompactString::new(&s); /// /// // Using a `String` /// let u = String::from("๐Ÿฆ„๐ŸŒˆ"); /// let unicorn = CompactString::new(u); /// /// // Using a `Box` /// let b: Box = String::from("๐Ÿ“ฆ๐Ÿ“ฆ๐Ÿ“ฆ").into_boxed_str(); /// let boxed = CompactString::new(&b); /// ``` #[inline] #[track_caller] pub fn new>(text: T) -> Self { Self::try_new(text).unwrap_with_msg() } /// Fallible version of [`CompactString::new()`] /// /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`]. /// Otherwise it behaves the same as [`CompactString::new()`]. #[inline] pub fn try_new>(text: T) -> Result { Repr::new(text.as_ref()).map(CompactString) } /// Creates a new inline [`CompactString`] from `&'static str` at compile time. /// Complexity: O(1). As an optimization, short strings get inlined. /// /// In a dynamic context you can use the method [`CompactString::new()`]. /// /// # Examples /// ``` /// use compact_str::CompactString; /// /// const DEFAULT_NAME: CompactString = CompactString::const_new("untitled"); /// ``` #[inline] pub const fn const_new(text: &'static str) -> Self { CompactString(Repr::const_new(text)) } /// Creates a new inline [`CompactString`] at compile time. #[deprecated( since = "0.8.0", note = "replaced by CompactString::const_new, will be removed in 0.9.0" )] #[inline] pub const fn new_inline(text: &'static str) -> Self { CompactString::const_new(text) } /// Creates a new inline [`CompactString`] from `&'static str` at compile time. #[deprecated( since = "0.8.0", note = "replaced by CompactString::const_new, will be removed in 0.9.0" )] #[inline] pub const fn from_static_str(text: &'static str) -> Self { CompactString::const_new(text) } /// Get back the `&'static str` constructed by [`CompactString::const_new`]. /// /// If the string was short enough that it could be inlined, then it was inline, and /// this method will return `None`. /// /// # Examples /// ``` /// use compact_str::CompactString; /// /// const DEFAULT_NAME: CompactString = /// CompactString::const_new("That is not dead which can eternal lie."); /// assert_eq!( /// DEFAULT_NAME.as_static_str().unwrap(), /// "That is not dead which can eternal lie.", /// ); /// ``` #[inline] #[rustversion::attr(since(1.64), const)] pub fn as_static_str(&self) -> Option<&'static str> { self.0.as_static_str() } /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes. /// /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically, /// if the string has a length less than or equal to `std::mem::size_of::` bytes /// then it will be inlined. This also means that `CompactString`s have a minimum capacity /// of `std::mem::size_of::`. /// /// # Panics /// /// This method panics if the system is out-of-memory. /// Use [`CompactString::try_with_capacity()`] if you want to handle such a problem manually. /// /// # Examples /// /// ### "zero" Capacity /// ``` /// # use compact_str::CompactString; /// // Creating a CompactString with a capacity of 0 will create /// // one with capacity of std::mem::size_of::(); /// let empty = CompactString::with_capacity(0); /// let min_size = std::mem::size_of::(); /// /// assert_eq!(empty.capacity(), min_size); /// assert_ne!(0, min_size); /// assert!(!empty.is_heap_allocated()); /// ``` /// /// ### Max Inline Size /// ``` /// # use compact_str::CompactString; /// // Creating a CompactString with a capacity of std::mem::size_of::() /// // will not heap allocate. /// let str_size = std::mem::size_of::(); /// let empty = CompactString::with_capacity(str_size); /// /// assert_eq!(empty.capacity(), str_size); /// assert!(!empty.is_heap_allocated()); /// ``` /// /// ### Heap Allocating /// ``` /// # use compact_str::CompactString; /// // If you create a `CompactString` with a capacity greater than /// // `std::mem::size_of::`, it will heap allocated. For heap /// // allocated strings we have a minimum capacity /// /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::() * 4; /// /// let heap_size = std::mem::size_of::() + 1; /// let empty = CompactString::with_capacity(heap_size); /// /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY); /// assert!(empty.is_heap_allocated()); /// ``` #[inline] #[track_caller] pub fn with_capacity(capacity: usize) -> Self { Self::try_with_capacity(capacity).unwrap_with_msg() } /// Fallible version of [`CompactString::with_capacity()`] /// /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`]. /// Otherwise it behaves the same as [`CompactString::with_capacity()`]. #[inline] pub fn try_with_capacity(capacity: usize) -> Result { Repr::with_capacity(capacity).map(CompactString) } /// Convert a slice of bytes into a [`CompactString`]. /// /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8). /// This method converts from an arbitrary contiguous collection of bytes into a /// [`CompactString`], failing if the provided bytes are not `UTF-8`. /// /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes, /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf` /// /// # Examples /// ### Valid UTF-8 /// ``` /// # use compact_str::CompactString; /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175]; /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8"); /// /// assert_eq!(compact, "๐Ÿฆ€๐Ÿ’ฏ"); /// ``` /// /// ### Invalid UTF-8 /// ``` /// # use compact_str::CompactString; /// let bytes = vec![255, 255, 255]; /// let result = CompactString::from_utf8(bytes); /// /// assert!(result.is_err()); /// ``` #[inline] pub fn from_utf8>(buf: B) -> Result { Repr::from_utf8(buf).map(CompactString) } /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains /// valid UTF-8. /// /// See the safe version, [`CompactString::from_utf8`], for more details. /// /// # Safety /// /// This function is unsafe because it does not check that the bytes passed to it are valid /// UTF-8. If this constraint is violated, it may cause memory unsafety issues with future users /// of the [`CompactString`], as the rest of the standard library assumes that /// [`CompactString`]s are valid UTF-8. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// // some bytes, in a vector /// let sparkle_heart = vec![240, 159, 146, 150]; /// /// let sparkle_heart = unsafe { /// CompactString::from_utf8_unchecked(sparkle_heart) /// }; /// /// assert_eq!("๐Ÿ’–", sparkle_heart); /// ``` #[inline] #[must_use] #[track_caller] pub unsafe fn from_utf8_unchecked>(buf: B) -> Self { Repr::from_utf8_unchecked(buf) .map(CompactString) .unwrap_with_msg() } /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data. /// /// # Examples /// ### Valid UTF-16 /// ``` /// # use compact_str::CompactString; /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063]; /// let compact = CompactString::from_utf16(buf).unwrap(); /// /// assert_eq!(compact, "๐„žmusic"); /// ``` /// /// ### Invalid UTF-16 /// ``` /// # use compact_str::CompactString; /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063]; /// let res = CompactString::from_utf16(buf); /// /// assert!(res.is_err()); /// ``` #[inline] pub fn from_utf16>(buf: B) -> Result { // Note: we don't use collect::>() because that fails to pre-allocate a buffer, // even though the size of our iterator, `buf`, is known ahead of time. // // rustlang issue #48994 is tracking the fix let buf = buf.as_ref(); let mut ret = CompactString::with_capacity(buf.len()); for c in core::char::decode_utf16(buf.iter().copied()) { if let Ok(c) = c { ret.push(c); } else { return Err(Utf16Error(())); } } Ok(ret) } /// Decode a UTF-16โ€“encoded slice `v` into a `CompactString`, replacing invalid data with /// the replacement character (`U+FFFD`), ๏ฟฝ. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// // ๐„žmusic /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, /// 0x0073, 0xDD1E, 0x0069, 0x0063, /// 0xD834]; /// /// assert_eq!(CompactString::from("๐„žmus\u{FFFD}ic\u{FFFD}"), /// CompactString::from_utf16_lossy(v)); /// ``` #[inline] pub fn from_utf16_lossy>(buf: B) -> Self { let buf = buf.as_ref(); let mut ret = CompactString::with_capacity(buf.len()); for c in core::char::decode_utf16(buf.iter().copied()) { match c { Ok(c) => ret.push(c), Err(_) => ret.push_str("๏ฟฝ"), } } ret } /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes. /// /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4 /// bytes long, therefore the return value of this method might not be what a human considers /// the length of the string. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let ascii = CompactString::new("hello world"); /// assert_eq!(ascii.len(), 11); /// /// let emoji = CompactString::new("๐Ÿ‘ฑ"); /// assert_eq!(emoji.len(), 4); /// ``` #[inline] pub fn len(&self) -> usize { self.0.len() } /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut msg = CompactString::new(""); /// assert!(msg.is_empty()); /// /// // add some characters /// msg.push_str("hello reader!"); /// assert!(!msg.is_empty()); /// ``` #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Returns the capacity of the [`CompactString`], in bytes. /// /// # Note /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::()` /// /// # Examples /// ### Minimum Size /// ``` /// # use compact_str::CompactString; /// let min_size = std::mem::size_of::(); /// let compact = CompactString::new(""); /// /// assert!(compact.capacity() >= min_size); /// ``` /// /// ### Heap Allocated /// ``` /// # use compact_str::CompactString; /// let compact = CompactString::with_capacity(128); /// assert_eq!(compact.capacity(), 128); /// ``` #[inline] pub fn capacity(&self) -> usize { self.0.capacity() } /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than /// its length. The capacity may be increased by more than `additional` bytes if it chooses, /// to prevent frequent reallocations. /// /// # Note /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::()` /// * Reserving additional bytes may cause the `CompactString` to become heap allocated /// /// # Panics /// This method panics if the new capacity overflows `usize` or if the system is out-of-memory. /// Use [`CompactString::try_reserve()`] if you want to handle such a problem manually. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// /// const WORD: usize = std::mem::size_of::(); /// let mut compact = CompactString::default(); /// assert!(compact.capacity() >= (WORD * 3) - 1); /// /// compact.reserve(200); /// assert!(compact.is_heap_allocated()); /// assert!(compact.capacity() >= 200); /// ``` #[inline] #[track_caller] pub fn reserve(&mut self, additional: usize) { self.try_reserve(additional).unwrap_with_msg() } /// Fallible version of [`CompactString::reserve()`] /// /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`] /// Otherwise it behaves the same as [`CompactString::reserve()`]. #[inline] pub fn try_reserve(&mut self, additional: usize) -> Result<(), ReserveError> { self.0.reserve(additional) } /// Returns a string slice containing the entire [`CompactString`]. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let s = CompactString::new("hello"); /// /// assert_eq!(s.as_str(), "hello"); /// ``` #[inline] pub fn as_str(&self) -> &str { self.0.as_str() } /// Returns a mutable string slice containing the entire [`CompactString`]. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("hello"); /// s.as_mut_str().make_ascii_uppercase(); /// /// assert_eq!(s.as_str(), "HELLO"); /// ``` #[inline] pub fn as_mut_str(&mut self) -> &mut str { let len = self.len(); unsafe { core::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) } } unsafe fn spare_capacity_mut(&mut self) -> &mut [mem::MaybeUninit] { let buf = self.0.as_mut_buf(); let ptr = buf.as_mut_ptr(); let cap = buf.len(); let len = self.len(); slice::from_raw_parts_mut(ptr.add(len) as *mut mem::MaybeUninit, cap - len) } /// Returns a byte slice of the [`CompactString`]'s contents. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let s = CompactString::new("hello"); /// /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); /// ``` #[inline] pub fn as_bytes(&self) -> &[u8] { &self.0.as_slice()[..self.len()] } // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning? // /// Provides a mutable reference to the underlying buffer of bytes. /// /// # Safety /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must /// guarantee that any modifications made to the underlying buffer are valid UTF-8. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("hello"); /// /// let slice = unsafe { s.as_mut_bytes() }; /// // copy bytes into our string /// slice[5..11].copy_from_slice(" world".as_bytes()); /// // set the len of the string /// unsafe { s.set_len(11) }; /// /// assert_eq!(s, "hello world"); /// ``` #[inline] pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] { self.0.as_mut_buf() } /// Appends the given [`char`] to the end of this [`CompactString`]. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("foo"); /// /// s.push('b'); /// s.push('a'); /// s.push('r'); /// /// assert_eq!("foobar", s); /// ``` pub fn push(&mut self, ch: char) { self.push_str(ch.encode_utf8(&mut [0; 4])); } /// Removes the last character from the [`CompactString`] and returns it. /// Returns `None` if this [`CompactString`] is empty. /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("abc"); /// /// assert_eq!(s.pop(), Some('c')); /// assert_eq!(s.pop(), Some('b')); /// assert_eq!(s.pop(), Some('a')); /// /// assert_eq!(s.pop(), None); /// ``` #[inline] pub fn pop(&mut self) -> Option { self.0.pop() } /// Appends a given string slice onto the end of this [`CompactString`] /// /// # Examples /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("abc"); /// /// s.push_str("123"); /// /// assert_eq!("abc123", s); /// ``` #[inline] pub fn push_str(&mut self, s: &str) { self.0.push_str(s) } /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it. /// /// This is an *O*(*n*) operation, as it requires copying every element in the /// buffer. /// /// # Panics /// /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length, /// or if it does not lie on a [`char`] boundary. /// /// # Examples /// /// ### Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut c = CompactString::from("hello world"); /// /// assert_eq!(c.remove(0), 'h'); /// assert_eq!(c, "ello world"); /// /// assert_eq!(c.remove(5), 'w'); /// assert_eq!(c, "ello orld"); /// ``` /// /// ### Past total length: /// /// ```should_panic /// # use compact_str::CompactString; /// let mut c = CompactString::from("hello there!"); /// c.remove(100); /// ``` /// /// ### Not on char boundary: /// /// ```should_panic /// # use compact_str::CompactString; /// let mut c = CompactString::from("๐Ÿฆ„"); /// c.remove(1); /// ``` #[inline] pub fn remove(&mut self, idx: usize) -> char { let len = self.len(); let substr = &mut self.as_mut_str()[idx..]; // get the char we want to remove let ch = substr .chars() .next() .expect("cannot remove a char from the end of a string"); let ch_len = ch.len_utf8(); // shift everything back one character let num_bytes = substr.len() - ch_len; let ptr = substr.as_mut_ptr(); // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes, // and are properly aligned unsafe { core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes); self.set_len(len - ch_len); } ch } /// Forces the length of the [`CompactString`] to `new_len`. /// /// This is a low-level operation that maintains none of the normal invariants for /// `CompactString`. If you want to modify the `CompactString` you should use methods like /// `push`, `push_str` or `pop`. /// /// # Safety /// * `new_len` must be less than or equal to `capacity()` /// * The elements at `old_len..new_len` must be initialized #[inline] pub unsafe fn set_len(&mut self, new_len: usize) { self.0.set_len(new_len) } /// Returns whether or not the [`CompactString`] is heap allocated. /// /// # Examples /// ### Inlined /// ``` /// # use compact_str::CompactString; /// let hello = CompactString::new("hello world"); /// /// assert!(!hello.is_heap_allocated()); /// ``` /// /// ### Heap Allocated /// ``` /// # use compact_str::CompactString; /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 ๐Ÿ’ฅ"); /// /// assert!(msg.is_heap_allocated()); /// ``` #[inline] pub fn is_heap_allocated(&self) -> bool { self.0.is_heap_allocated() } /// Ensure that the given range is inside the set data, and that no codepoints are split. /// /// Returns the range `start..end` as a tuple. #[inline] fn ensure_range(&self, range: impl RangeBounds) -> (usize, usize) { #[cold] #[inline(never)] fn illegal_range() -> ! { panic!("illegal range"); } let start = match range.start_bound() { Bound::Included(&n) => n, Bound::Excluded(&n) => match n.checked_add(1) { Some(n) => n, None => illegal_range(), }, Bound::Unbounded => 0, }; let end = match range.end_bound() { Bound::Included(&n) => match n.checked_add(1) { Some(n) => n, None => illegal_range(), }, Bound::Excluded(&n) => n, Bound::Unbounded => self.len(), }; if end < start { illegal_range(); } let s = self.as_str(); if !s.is_char_boundary(start) || !s.is_char_boundary(end) { illegal_range(); } (start, end) } /// Removes the specified range in the [`CompactString`], /// and replaces it with the given string. /// The given string doesn't need to be the same length as the range. /// /// # Panics /// /// Panics if the starting point or end point do not lie on a [`char`] /// boundary, or if they're out of bounds. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Hello, world!"); /// /// s.replace_range(7..12, "WORLD"); /// assert_eq!(s, "Hello, WORLD!"); /// /// s.replace_range(7..=11, "you"); /// assert_eq!(s, "Hello, you!"); /// /// s.replace_range(5.., "! Is it me you're looking for?"); /// assert_eq!(s, "Hello! Is it me you're looking for?"); /// ``` #[inline] pub fn replace_range(&mut self, range: impl RangeBounds, replace_with: &str) { let (start, end) = self.ensure_range(range); let dest_len = end - start; match dest_len.cmp(&replace_with.len()) { Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) }, Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) }, Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) }, } } /// Replace into the same size. unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) { core::ptr::copy_nonoverlapping( replace_with.as_ptr(), self.as_mut_ptr().add(start), end - start, ); } /// Replace, so self.len() gets smaller. unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) { let total_len = self.len(); let dest_len = end - start; let new_len = total_len - (dest_len - replace_with.len()); let amount = total_len - end; let data = self.as_mut_ptr(); // first insert the replacement string, overwriting the current content core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len()); // then move the tail of the CompactString forward to its new place, filling the gap core::ptr::copy( data.add(total_len - amount), data.add(new_len - amount), amount, ); // and lastly we set the new length self.set_len(new_len); } /// Replace, so self.len() gets bigger. unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) { let dest_len = end - start; self.reserve(replace_with.len() - dest_len); let total_len = self.len(); let new_len = total_len + (replace_with.len() - dest_len); let amount = total_len - end; // first grow the string, so MIRI knows that the full range is usable self.set_len(new_len); let data = self.as_mut_ptr(); // then move the tail of the CompactString back to its new place core::ptr::copy( data.add(total_len - amount), data.add(new_len - amount), amount, ); // and lastly insert the replacement string core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len()); } /// Creates a new [`CompactString`] by repeating a string `n` times. /// /// # Panics /// /// This function will panic if the capacity would overflow. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::CompactString; /// assert_eq!(CompactString::new("abc").repeat(4), CompactString::new("abcabcabcabc")); /// ``` /// /// A panic upon overflow: /// /// ```should_panic /// use compact_str::CompactString; /// /// // this will panic at runtime /// let huge = CompactString::new("0123456789abcdef").repeat(usize::MAX); /// ``` #[must_use] pub fn repeat(&self, n: usize) -> Self { if n == 0 || self.is_empty() { Self::const_new("") } else if n == 1 { self.clone() } else { let mut out = Self::with_capacity(self.len() * n); (0..n).for_each(|_| out.push_str(self)); out } } /// Truncate the [`CompactString`] to a shorter length. /// /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op. /// /// Calling this function does not change the capacity of the [`CompactString`]. /// /// # Panics /// /// Panics if the new end of the string does not lie on a [`char`] boundary. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Hello, world!"); /// s.truncate(5); /// assert_eq!(s, "Hello"); /// ``` pub fn truncate(&mut self, new_len: usize) { let s = self.as_str(); if new_len >= s.len() { return; } assert!( s.is_char_boundary(new_len), "new_len must lie on char boundary", ); unsafe { self.set_len(new_len) }; } /// Converts a [`CompactString`] to a raw pointer. #[inline] pub fn as_ptr(&self) -> *const u8 { self.0.as_slice().as_ptr() } /// Converts a mutable [`CompactString`] to a raw pointer. #[inline] pub fn as_mut_ptr(&mut self) -> *mut u8 { unsafe { self.0.as_mut_buf().as_mut_ptr() } } /// Insert string character at an index. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Hello!"); /// s.insert_str(5, ", world"); /// assert_eq!(s, "Hello, world!"); /// ``` pub fn insert_str(&mut self, idx: usize, string: &str) { assert!(self.is_char_boundary(idx), "idx must lie on char boundary"); let new_len = self.len() + string.len(); self.reserve(string.len()); // SAFETY: We just checked that we may split self at idx. // We set the length only after reserving the memory. // We fill the gap with valid UTF-8 data. unsafe { // first move the tail to the new back let data = self.as_mut_ptr(); core::ptr::copy( data.add(idx), data.add(idx + string.len()), new_len - idx - string.len(), ); // then insert the new bytes core::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len()); // and lastly resize the string self.set_len(new_len); } } /// Insert a character at an index. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Hello world!"); /// s.insert(5, ','); /// assert_eq!(s, "Hello, world!"); /// ``` pub fn insert(&mut self, idx: usize, ch: char) { self.insert_str(idx, ch.encode_utf8(&mut [0; 4])); } /// Reduces the length of the [`CompactString`] to zero. /// /// Calling this function does not change the capacity of the [`CompactString`]. /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!"); /// assert_eq!(s.capacity(), 49); /// /// s.clear(); /// /// assert_eq!(s, ""); /// assert_eq!(s.capacity(), 49); /// ``` pub fn clear(&mut self) { unsafe { self.set_len(0) }; } /// Split the [`CompactString`] into at the given byte index. /// /// Calling this function does not change the capacity of the [`CompactString`], unless the /// [`CompactString`] is backed by a `&'static str`. /// /// # Panics /// /// Panics if `at` does not lie on a [`char`] boundary. /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::const_new("Hello, world!"); /// let w = s.split_off(5); /// /// assert_eq!(w, ", world!"); /// assert_eq!(s, "Hello"); /// ``` pub fn split_off(&mut self, at: usize) -> Self { if let Some(s) = self.as_static_str() { let result = Self::const_new(&s[at..]); // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid unsafe { self.set_len(at) }; result } else { let result = self[at..].into(); // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid unsafe { self.set_len(at) }; result } } /// Remove a range from the [`CompactString`], and return it as an iterator. /// /// Calling this function does not change the capacity of the [`CompactString`]. /// /// # Panics /// /// Panics if the start or end of the range does not lie on a [`char`] boundary. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::new("Hello, world!"); /// /// let mut d = s.drain(5..12); /// assert_eq!(d.next(), Some(',')); // iterate over the extracted data /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str /// /// // The iterator keeps a reference to `s`, so you have to drop() the iterator, /// // before you can access `s` again. /// drop(d); /// assert_eq!(s, "Hello!"); /// ``` pub fn drain(&mut self, range: impl RangeBounds) -> Drain<'_> { let (start, end) = self.ensure_range(range); Drain { compact_string: self as *mut Self, start, end, chars: self[start..end].chars(), } } /// Shrinks the capacity of this [`CompactString`] with a lower bound. /// /// The resulting capactity is never less than the size of 3ร—[`usize`], /// i.e. the capacity than can be inlined. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::with_capacity(100); /// assert_eq!(s.capacity(), 100); /// /// // if the capacity was already bigger than the argument, the call is a no-op /// s.shrink_to(100); /// assert_eq!(s.capacity(), 100); /// /// s.shrink_to(50); /// assert_eq!(s.capacity(), 50); /// /// // if the string can be inlined, it is /// s.shrink_to(10); /// assert_eq!(s.capacity(), 3 * std::mem::size_of::()); /// ``` #[inline] pub fn shrink_to(&mut self, min_capacity: usize) { self.0.shrink_to(min_capacity); } /// Shrinks the capacity of this [`CompactString`] to match its length. /// /// The resulting capactity is never less than the size of 3ร—[`usize`], /// i.e. the capacity than can be inlined. /// /// This method is effectively the same as calling [`string.shrink_to(0)`]. /// /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::from("This is a string with more than 24 characters."); /// /// s.reserve(100); /// assert!(s.capacity() >= 100); /// /// s.shrink_to_fit(); /// assert_eq!(s.len(), s.capacity()); /// ``` /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::from("short string"); /// /// s.reserve(100); /// assert!(s.capacity() >= 100); /// /// s.shrink_to_fit(); /// assert_eq!(s.capacity(), 3 * std::mem::size_of::()); /// ``` #[inline] pub fn shrink_to_fit(&mut self) { self.0.shrink_to(0); } /// Retains only the characters specified by the predicate. /// /// The method iterates over the characters in the string and calls the `predicate`. /// /// If the `predicate` returns `false`, then the character gets removed. /// If the `predicate` returns `true`, then the character is kept. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// let mut s = CompactString::from("รคb๐„ždโ‚ฌ"); /// /// let keep = [false, true, true, false, true]; /// let mut iter = keep.iter(); /// s.retain(|_| *iter.next().unwrap()); /// /// assert_eq!(s, "b๐„žโ‚ฌ"); /// ``` pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) { // We iterate over the string, and copy character by character. let s = self.as_mut_str(); let mut dest_idx = 0; let mut src_idx = 0; while let Some(ch) = s[src_idx..].chars().next() { let ch_len = ch.len_utf8(); if predicate(ch) { // SAFETY: We know that both indices are valid, and that we don't split a char. unsafe { let p = s.as_mut_ptr(); core::ptr::copy(p.add(src_idx), p.add(dest_idx), ch_len); } dest_idx += ch_len; } src_idx += ch_len; } // SAFETY: We know that the index is a valid position to break the string. unsafe { self.set_len(dest_idx) }; } /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// let chess_knight = b"\xf0\x9f\xa8\x84"; /// /// assert_eq!( /// "๐Ÿจ„", /// CompactString::from_utf8_lossy(chess_knight), /// ); /// /// // For valid UTF-8 slices, this is the same as: /// assert_eq!( /// "๐Ÿจ„", /// CompactString::new(std::str::from_utf8(chess_knight).unwrap()), /// ); /// ``` /// /// Incorrect bytes: /// /// ``` /// # use compact_str::CompactString; /// let broken = b"\xf0\x9f\xc8\x84"; /// /// assert_eq!( /// "๏ฟฝศ„", /// CompactString::from_utf8_lossy(broken), /// ); /// /// // For invalid UTF-8 slices, this is an optimized implemented for: /// assert_eq!( /// "๏ฟฝศ„", /// CompactString::from(String::from_utf8_lossy(broken)), /// ); /// ``` pub fn from_utf8_lossy(v: &[u8]) -> Self { fn next_char<'a>( iter: &mut <&[u8] as IntoIterator>::IntoIter, buf: &'a mut [u8; 4], ) -> Option<&'a [u8]> { const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes(); macro_rules! ensure_range { ($idx:literal, $range:pat) => {{ let mut i = iter.clone(); match i.next() { Some(&c) if matches!(c, $range) => { buf[$idx] = c; *iter = i; } _ => return Some(REPLACEMENT), } }}; } macro_rules! ensure_cont { ($idx:literal) => {{ ensure_range!($idx, 0x80..=0xBF); }}; } let c = *iter.next()?; buf[0] = c; match c { 0x00..=0x7F => { // simple ASCII: push as is Some(&buf[..1]) } 0xC2..=0xDF => { // two bytes ensure_cont!(1); Some(&buf[..2]) } 0xE0..=0xEF => { // three bytes match c { // 0x80..=0x9F encodes surrogate half 0xE0 => ensure_range!(1, 0xA0..=0xBF), // 0xA0..=0xBF encodes surrogate half 0xED => ensure_range!(1, 0x80..=0x9F), // all UTF-8 continuation bytes are valid _ => ensure_cont!(1), } ensure_cont!(2); Some(&buf[..3]) } 0xF0..=0xF4 => { // four bytes match c { // 0x80..=0x8F encodes overlong three byte codepoint 0xF0 => ensure_range!(1, 0x90..=0xBF), // 0x90..=0xBF encodes codepoint > U+10FFFF 0xF4 => ensure_range!(1, 0x80..=0x8F), // all UTF-8 continuation bytes are valid _ => ensure_cont!(1), } ensure_cont!(2); ensure_cont!(3); Some(&buf[..4]) } | 0x80..=0xBF // unicode continuation, invalid | 0xC0..=0xC1 // overlong one byte character | 0xF5..=0xF7 // four bytes that encode > U+10FFFF | 0xF8..=0xFB // five bytes, invalid | 0xFC..=0xFD // six bytes, invalid | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid } } let mut buf = [0; 4]; let mut result = Self::with_capacity(v.len()); let mut iter = v.iter(); while let Some(s) = next_char(&mut iter, &mut buf) { // SAFETY: next_char() only returns valid strings let s = unsafe { core::str::from_utf8_unchecked(s) }; result.push_str(s); } result } fn from_utf16x( v: &[u8], from_int: impl Fn(u16) -> u16, from_bytes: impl Fn([u8; 2]) -> u16, ) -> Result { if v.len() % 2 != 0 { // Input had an odd number of bytes. return Err(Utf16Error(())); } // Note: we don't use collect::>() because that fails to pre-allocate a buffer, // even though the size of our iterator, `v`, is known ahead of time. // // rustlang issue #48994 is tracking the fix let mut result = CompactString::with_capacity(v.len() / 2); // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right. match unsafe { v.align_to::() } { (&[], v, &[]) => { // Input is correctly aligned. for c in core::char::decode_utf16(v.iter().copied().map(from_int)) { result.push(c.map_err(|_| Utf16Error(()))?); } } _ => { // Input's alignment is off. // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]` let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) }; for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) { result.push(c.map_err(|_| Utf16Error(()))?); } } } Ok(result) } fn from_utf16x_lossy( v: &[u8], from_int: impl Fn(u16) -> u16, from_bytes: impl Fn([u8; 2]) -> u16, ) -> Self { // Notice: We write the string "๏ฟฝ" instead of the character '๏ฟฝ', so the character does not // have to be formatted before it can be appended. let (trailing_extra_byte, v) = match v.len() % 2 != 0 { true => (true, &v[..v.len() - 1]), false => (false, v), }; let mut result = CompactString::with_capacity(v.len() / 2); // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right. match unsafe { v.align_to::() } { (&[], v, &[]) => { // Input is correctly aligned. for c in core::char::decode_utf16(v.iter().copied().map(from_int)) { match c { Ok(c) => result.push(c), Err(_) => result.push_str("๏ฟฝ"), } } } _ => { // Input's alignment is off. // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]` let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) }; for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) { match c { Ok(c) => result.push(c), Err(_) => result.push_str("๏ฟฝ"), } } } } if trailing_extra_byte { result.push_str("๏ฟฝ"); } result } /// Decode a slice of bytes as UTF-16 encoded string, in little endian. /// /// # Errors /// /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters, /// a [`Utf16Error`] is returned. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe"; /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap(); /// assert_eq!(dancing_men, "๐Ÿ‘ฏโ€โ™‚๏ธ"); /// ``` #[inline] pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result { CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes) } /// Decode a slice of bytes as UTF-16 encoded string, in big endian. /// /// # Errors /// /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters, /// a [`Utf16Error`] is returned. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f"; /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap(); /// assert_eq!(dancing_women, "๐Ÿ‘ฏโ€โ™€๏ธ"); /// ``` #[inline] pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result { CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes) } /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian. /// /// In this context "lossy" means that any broken characters in the input are replaced by the /// \ `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// // A "random" bit was flipped in the 4th byte: /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe"; /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN); /// assert_eq!(dancing_men, "๏ฟฝ\u{fc6f}\u{200d}โ™‚๏ธ"); /// ``` #[inline] pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self { CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes) } /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian. /// /// In this context "lossy" means that any broken characters in the input are replaced by the /// \ `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// // A "random" bit was flipped in the 9th byte: /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f"; /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN); /// assert_eq!(dancing_women, "๐Ÿ‘ฏ\u{200d}โ™€๏ฟฝ"); /// ``` #[inline] pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self { CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes) } /// Convert the [`CompactString`] into a [`String`]. /// /// # Examples /// /// ``` /// # use compact_str::CompactString; /// let s = CompactString::new("Hello world"); /// let s = s.into_string(); /// assert_eq!(s, "Hello world"); /// ``` pub fn into_string(self) -> String { self.0.into_string() } /// Convert a [`String`] into a [`CompactString`] _without inlining_. /// /// Note: You probably don't need to use this method, instead you should use `From` /// which is implemented for [`CompactString`]. /// /// This method exists incase your code is very sensitive to memory allocations. Normally when /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack. /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that /// was previously owned by the [`String`], so no trips to the allocator are needed. /// /// # Examples /// /// ### Short Strings /// ``` /// use compact_str::CompactString; /// /// let short = "hello world".to_string(); /// let c_heap = CompactString::from_string_buffer(short); /// /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer /// assert!(c_heap.is_heap_allocated()); /// /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point /// let c_inline = c_heap.clone(); /// assert!(!c_inline.is_heap_allocated()); /// /// assert_eq!(c_heap, c_inline); /// ``` /// /// ### Longer Strings /// ``` /// use compact_str::CompactString; /// /// let x = "longer string that will be on the heap".to_string(); /// let c1 = CompactString::from(x); /// /// let y = "longer string that will be on the heap".to_string(); /// let c2 = CompactString::from_string_buffer(y); /// /// // for longer strings, we re-use the underlying String's buffer in both cases /// assert!(c1.is_heap_allocated()); /// assert!(c2.is_heap_allocated()); /// ``` /// /// ### Buffer Re-use /// ``` /// use compact_str::CompactString; /// /// let og = "hello world".to_string(); /// let og_addr = og.as_ptr(); /// /// let mut c = CompactString::from_string_buffer(og); /// let ex_addr = c.as_ptr(); /// /// // When converting to/from String and CompactString with from_string_buffer we always re-use /// // the same underlying allocated memory/buffer /// assert_eq!(og_addr, ex_addr); /// /// let long = "this is a long string that will be on the heap".to_string(); /// let long_addr = long.as_ptr(); /// /// let mut long_c = CompactString::from(long); /// let long_ex_addr = long_c.as_ptr(); /// /// // When converting to/from String and CompactString with From, we'll also re-use the /// // underlying buffer, if the string is long, otherwise when converting to CompactString we /// // eagerly inline /// assert_eq!(long_addr, long_ex_addr); /// ``` #[inline] #[track_caller] pub fn from_string_buffer(s: String) -> Self { let repr = Repr::from_string(s, false).unwrap_with_msg(); CompactString(repr) } /// Returns a copy of this string where each character is mapped to its /// ASCII lower case equivalent. /// /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', /// but non-ASCII letters are unchanged. /// /// To lowercase the value in-place, use [`str::make_ascii_lowercase`]. /// /// To lowercase ASCII characters in addition to non-ASCII characters, use /// [`CompactString::to_lowercase`]. /// /// # Examples /// /// ``` /// use compact_str::CompactString; /// let s = CompactString::new("GrรผรŸe, Jรผrgen โค"); /// /// assert_eq!("grรผรŸe, jรผrgen โค", s.to_ascii_lowercase()); /// ``` #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"] #[inline] pub fn to_ascii_lowercase(&self) -> Self { let mut s = self.clone(); s.make_ascii_lowercase(); s } /// Returns a copy of this string where each character is mapped to its /// ASCII upper case equivalent. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', /// but non-ASCII letters are unchanged. /// /// To uppercase the value in-place, use [`str::make_ascii_uppercase`]. /// /// To uppercase ASCII characters in addition to non-ASCII characters, use /// [`CompactString::to_uppercase`]. /// /// # Examples /// /// ``` /// use compact_str::CompactString; /// let s = CompactString::new("GrรผรŸe, Jรผrgen โค"); /// /// assert_eq!("GRรผรŸE, JรผRGEN โค", s.to_ascii_uppercase()); /// ``` #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"] #[inline] pub fn to_ascii_uppercase(&self) -> Self { let mut s = self.clone(); s.make_ascii_uppercase(); s } /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`]. /// /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property /// `Lowercase`. /// /// Since some characters can expand into multiple characters when changing /// the case, this function returns a [`CompactString`] instead of modifying the /// parameter in-place. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::CompactString; /// let s = CompactString::new("HELLO"); /// /// assert_eq!("hello", s.to_lowercase()); /// ``` /// /// A tricky example, with sigma: /// /// ``` /// use compact_str::CompactString; /// let sigma = CompactString::new("ฮฃ"); /// /// assert_eq!("ฯƒ", sigma.to_lowercase()); /// /// // but at the end of a word, it's ฯ‚, not ฯƒ: /// let odysseus = CompactString::new("แฝˆฮ”ฮฅฮฃฮฃฮ•ฮŽฮฃ"); /// /// assert_eq!("แฝ€ฮดฯ…ฯƒฯƒฮตฯฯ‚", odysseus.to_lowercase()); /// ``` /// /// Languages without case are not changed: /// /// ``` /// use compact_str::CompactString; /// let new_year = CompactString::new("ๅ†œๅކๆ–ฐๅนด"); /// /// assert_eq!(new_year, new_year.to_lowercase()); /// ``` #[must_use = "this returns the lowercase string as a new CompactString, \ without modifying the original"] pub fn to_lowercase(&self) -> Self { Self::from_str_to_lowercase(self.as_str()) } /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`]. /// /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property /// `Lowercase`. /// /// Since some characters can expand into multiple characters when changing /// the case, this function returns a [`CompactString`] instead of modifying the /// parameter in-place. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::CompactString; /// /// assert_eq!("hello", CompactString::from_str_to_lowercase("HELLO")); /// ``` /// /// A tricky example, with sigma: /// /// ``` /// use compact_str::CompactString; /// /// assert_eq!("ฯƒ", CompactString::from_str_to_lowercase("ฮฃ")); /// /// // but at the end of a word, it's ฯ‚, not ฯƒ: /// assert_eq!("แฝ€ฮดฯ…ฯƒฯƒฮตฯฯ‚", CompactString::from_str_to_lowercase("แฝˆฮ”ฮฅฮฃฮฃฮ•ฮŽฮฃ")); /// ``` /// /// Languages without case are not changed: /// /// ``` /// use compact_str::CompactString; /// /// let new_year = "ๅ†œๅކๆ–ฐๅนด"; /// assert_eq!(new_year, CompactString::from_str_to_lowercase(new_year)); /// ``` #[must_use = "this returns the lowercase string as a new CompactString, \ without modifying the original"] pub fn from_str_to_lowercase(input: &str) -> Self { let mut s = convert_while_ascii(input.as_bytes(), u8::to_ascii_lowercase); // Safety: we know this is a valid char boundary since // out.len() is only progressed if ascii bytes are found let rest = unsafe { input.get_unchecked(s.len()..) }; for (i, c) in rest.char_indices() { if c == 'ฮฃ' { // ฮฃ maps to ฯƒ, except at the end of a word where it maps to ฯ‚. // This is the only conditional (contextual) but language-independent mapping // in `SpecialCasing.txt`, // so hard-code it rather than have a generic "condition" mechanism. // See https://github.com/rust-lang/rust/issues/26035 map_uppercase_sigma(rest, i, &mut s) } else { s.extend(c.to_lowercase()); } } return s; fn map_uppercase_sigma(from: &str, i: usize, to: &mut CompactString) { // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 // for the definition of `Final_Sigma`. debug_assert!('ฮฃ'.len_utf8() == 2); let is_word_final = case_ignorable_then_cased(from[..i].chars().rev()) && !case_ignorable_then_cased(from[i + 2..].chars()); to.push_str(if is_word_final { "ฯ‚" } else { "ฯƒ" }); } fn case_ignorable_then_cased>(mut iter: I) -> bool { use unicode_data::case_ignorable::lookup as Case_Ignorable; use unicode_data::cased::lookup as Cased; match iter.find(|&c| !Case_Ignorable(c)) { Some(c) => Cased(c), None => false, } } } /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`]. /// /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property /// `Uppercase`. /// /// Since some characters can expand into multiple characters when changing /// the case, this function returns a [`CompactString`] instead of modifying the /// parameter in-place. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::CompactString; /// let s = CompactString::new("hello"); /// /// assert_eq!("HELLO", s.to_uppercase()); /// ``` /// /// Scripts without case are not changed: /// /// ``` /// use compact_str::CompactString; /// let new_year = CompactString::new("ๅ†œๅކๆ–ฐๅนด"); /// /// assert_eq!(new_year, new_year.to_uppercase()); /// ``` /// /// One character can become multiple: /// ``` /// use compact_str::CompactString; /// let s = CompactString::new("tschรผรŸ"); /// /// assert_eq!("TSCHรœSS", s.to_uppercase()); /// ``` #[must_use = "this returns the uppercase string as a new CompactString, \ without modifying the original"] pub fn to_uppercase(&self) -> Self { Self::from_str_to_uppercase(self.as_str()) } /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`]. /// /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property /// `Uppercase`. /// /// Since some characters can expand into multiple characters when changing /// the case, this function returns a [`CompactString`] instead of modifying the /// parameter in-place. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::CompactString; /// /// assert_eq!("HELLO", CompactString::from_str_to_uppercase("hello")); /// ``` /// /// Scripts without case are not changed: /// /// ``` /// use compact_str::CompactString; /// /// let new_year = "ๅ†œๅކๆ–ฐๅนด"; /// assert_eq!(new_year, CompactString::from_str_to_uppercase(new_year)); /// ``` /// /// One character can become multiple: /// ``` /// use compact_str::CompactString; /// /// assert_eq!("TSCHรœSS", CompactString::from_str_to_uppercase("tschรผรŸ")); /// ``` #[must_use = "this returns the uppercase string as a new CompactString, \ without modifying the original"] pub fn from_str_to_uppercase(input: &str) -> Self { let mut out = convert_while_ascii(input.as_bytes(), u8::to_ascii_uppercase); // Safety: we know this is a valid char boundary since // out.len() is only progressed if ascii bytes are found let rest = unsafe { input.get_unchecked(out.len()..) }; for c in rest.chars() { out.extend(c.to_uppercase()); } out } } /// Converts the bytes while the bytes are still ascii. /// For better average performance, this is happens in chunks of `2*size_of::()`. /// Returns a vec with the converted bytes. /// /// Copied from https://doc.rust-lang.org/nightly/src/alloc/str.rs.html#623-666 #[inline] fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> CompactString { let mut out = CompactString::with_capacity(b.len()); const USIZE_SIZE: usize = mem::size_of::(); const MAGIC_UNROLL: usize = 2; const N: usize = USIZE_SIZE * MAGIC_UNROLL; const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]); let mut i = 0; unsafe { while i + N <= b.len() { // Safety: we have checks the sizes `b` and `out` to know that our let in_chunk = b.get_unchecked(i..i + N); let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N); let mut bits = 0; for j in 0..MAGIC_UNROLL { // read the bytes 1 usize at a time (unaligned since we haven't checked the // alignment) safety: in_chunk is valid bytes in the range bits |= in_chunk.as_ptr().cast::().add(j).read_unaligned(); } // if our chunks aren't ascii, then return only the prior bytes as init if bits & NONASCII_MASK != 0 { break; } // perform the case conversions on N bytes (gets heavily autovec'd) for j in 0..N { // safety: in_chunk and out_chunk is valid bytes in the range let out = out_chunk.get_unchecked_mut(j); out.write(convert(in_chunk.get_unchecked(j))); } // mark these bytes as initialised i += N; } out.set_len(i); } out } impl Clone for CompactString { #[inline] fn clone(&self) -> Self { Self(self.0.clone()) } #[inline] fn clone_from(&mut self, source: &Self) { self.0.clone_from(&source.0) } } impl Default for CompactString { #[inline] fn default() -> Self { CompactString::new("") } } impl Deref for CompactString { type Target = str; #[inline] fn deref(&self) -> &str { self.as_str() } } impl DerefMut for CompactString { #[inline] fn deref_mut(&mut self) -> &mut str { self.as_mut_str() } } impl AsRef for CompactString { #[inline] fn as_ref(&self) -> &str { self.as_str() } } #[cfg(feature = "std")] impl AsRef for CompactString { #[inline] fn as_ref(&self) -> &OsStr { OsStr::new(self.as_str()) } } impl AsRef<[u8]> for CompactString { #[inline] fn as_ref(&self) -> &[u8] { self.as_bytes() } } impl Borrow for CompactString { #[inline] fn borrow(&self) -> &str { self.as_str() } } impl BorrowMut for CompactString { #[inline] fn borrow_mut(&mut self) -> &mut str { self.as_mut_str() } } impl Eq for CompactString {} impl + ?Sized> PartialEq for CompactString { fn eq(&self, other: &T) -> bool { self.as_str() == other.as_ref() } } impl PartialEq for &CompactString { fn eq(&self, other: &CompactString) -> bool { self.as_str() == other.as_str() } } impl PartialEq for String { fn eq(&self, other: &CompactString) -> bool { self.as_str() == other.as_str() } } impl<'a> PartialEq<&'a CompactString> for String { fn eq(&self, other: &&CompactString) -> bool { self.as_str() == other.as_str() } } impl PartialEq for &String { fn eq(&self, other: &CompactString) -> bool { self.as_str() == other.as_str() } } impl PartialEq for str { fn eq(&self, other: &CompactString) -> bool { self == other.as_str() } } impl<'a> PartialEq<&'a CompactString> for str { fn eq(&self, other: &&CompactString) -> bool { self == other.as_str() } } impl PartialEq for &str { fn eq(&self, other: &CompactString) -> bool { *self == other.as_str() } } impl PartialEq for &&str { fn eq(&self, other: &CompactString) -> bool { **self == other.as_str() } } impl<'a> PartialEq for Cow<'a, str> { fn eq(&self, other: &CompactString) -> bool { *self == other.as_str() } } impl<'a> PartialEq for &Cow<'a, str> { fn eq(&self, other: &CompactString) -> bool { *self == other.as_str() } } impl PartialEq for &CompactString { fn eq(&self, other: &String) -> bool { self.as_str() == other.as_str() } } impl<'a> PartialEq> for &CompactString { fn eq(&self, other: &Cow<'a, str>) -> bool { self.as_str() == other } } impl Ord for CompactString { fn cmp(&self, other: &Self) -> Ordering { self.as_str().cmp(other.as_str()) } } impl PartialOrd for CompactString { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Hash for CompactString { fn hash(&self, state: &mut H) { self.as_str().hash(state) } } impl<'a> From<&'a str> for CompactString { #[inline] #[track_caller] fn from(s: &'a str) -> Self { CompactString::new(s) } } impl From for CompactString { #[inline] #[track_caller] fn from(s: String) -> Self { let repr = Repr::from_string(s, true).unwrap_with_msg(); CompactString(repr) } } impl<'a> From<&'a String> for CompactString { #[inline] #[track_caller] fn from(s: &'a String) -> Self { CompactString::new(s) } } impl<'a> From> for CompactString { fn from(cow: Cow<'a, str>) -> Self { match cow { Cow::Borrowed(s) => s.into(), // we separate these two so we can re-use the underlying buffer in the owned case Cow::Owned(s) => s.into(), } } } impl From> for CompactString { #[inline] #[track_caller] fn from(b: Box) -> Self { let s = b.into_string(); let repr = Repr::from_string(s, true).unwrap_with_msg(); CompactString(repr) } } impl From for String { #[inline] fn from(s: CompactString) -> Self { s.into_string() } } impl From for Cow<'_, str> { #[inline] fn from(s: CompactString) -> Self { if let Some(s) = s.as_static_str() { Self::Borrowed(s) } else { Self::Owned(s.into_string()) } } } impl<'a> From<&'a CompactString> for Cow<'a, str> { #[inline] fn from(s: &'a CompactString) -> Self { Self::Borrowed(s) } } #[cfg(target_has_atomic = "ptr")] impl From for alloc::sync::Arc { fn from(value: CompactString) -> Self { Self::from(value.as_str()) } } impl From for alloc::rc::Rc { fn from(value: CompactString) -> Self { Self::from(value.as_str()) } } #[cfg(feature = "std")] impl From for Box { fn from(value: CompactString) -> Self { struct StringError(CompactString); impl std::error::Error for StringError { #[allow(deprecated)] fn description(&self) -> &str { &self.0 } } impl fmt::Display for StringError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.0, f) } } // Purposefully skip printing "StringError(..)" impl fmt::Debug for StringError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&self.0, f) } } Box::new(StringError(value)) } } #[cfg(feature = "std")] impl From for Box { fn from(value: CompactString) -> Self { let err1: Box = From::from(value); let err2: Box = err1; err2 } } impl From for Box { fn from(value: CompactString) -> Self { if value.is_heap_allocated() { value.into_string().into_boxed_str() } else { Box::from(value.as_str()) } } } #[cfg(feature = "std")] impl From for std::ffi::OsString { fn from(value: CompactString) -> Self { Self::from(value.into_string()) } } #[cfg(feature = "std")] impl From for std::path::PathBuf { fn from(value: CompactString) -> Self { Self::from(std::ffi::OsString::from(value)) } } #[cfg(feature = "std")] impl AsRef for CompactString { fn as_ref(&self) -> &std::path::Path { std::path::Path::new(self.as_str()) } } impl From for alloc::vec::Vec { fn from(value: CompactString) -> Self { if value.is_heap_allocated() { value.into_string().into_bytes() } else { value.as_bytes().to_vec() } } } impl FromStr for CompactString { type Err = core::convert::Infallible; fn from_str(s: &str) -> Result { Ok(CompactString::from(s)) } } impl fmt::Debug for CompactString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for CompactString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl FromIterator for CompactString { fn from_iter>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl<'a> FromIterator<&'a char> for CompactString { fn from_iter>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl<'a> FromIterator<&'a str> for CompactString { fn from_iter>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl FromIterator> for CompactString { fn from_iter>>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl<'a> FromIterator> for CompactString { fn from_iter>>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl FromIterator for CompactString { fn from_iter>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl FromIterator for CompactString { fn from_iter>(iter: T) -> Self { let repr = iter.into_iter().collect(); CompactString(repr) } } impl FromIterator for String { fn from_iter>(iter: T) -> Self { let mut iterator = iter.into_iter(); match iterator.next() { None => String::new(), Some(buf) => { let mut buf = buf.into_string(); buf.extend(iterator); buf } } } } impl FromIterator for Cow<'_, str> { fn from_iter>(iter: T) -> Self { String::from_iter(iter).into() } } impl Extend for CompactString { fn extend>(&mut self, iter: T) { self.0.extend(iter) } } impl<'a> Extend<&'a char> for CompactString { fn extend>(&mut self, iter: T) { self.0.extend(iter) } } impl<'a> Extend<&'a str> for CompactString { fn extend>(&mut self, iter: T) { self.0.extend(iter) } } impl Extend> for CompactString { fn extend>>(&mut self, iter: T) { self.0.extend(iter) } } impl<'a> Extend> for CompactString { fn extend>>(&mut self, iter: T) { iter.into_iter().for_each(move |s| self.push_str(&s)); } } impl Extend for CompactString { fn extend>(&mut self, iter: T) { self.0.extend(iter) } } impl Extend for String { fn extend>(&mut self, iter: T) { for s in iter { self.push_str(&s); } } } impl Extend for CompactString { fn extend>(&mut self, iter: T) { for s in iter { self.push_str(&s); } } } impl<'a> Extend for Cow<'a, str> { fn extend>(&mut self, iter: T) { self.to_mut().extend(iter); } } impl fmt::Write for CompactString { fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s); Ok(()) } fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result { match args.as_str() { Some(s) => { if self.is_empty() && !self.is_heap_allocated() { // Since self is currently an empty inline variant or // an empty `StaticStr` variant, constructing a new one // with `Self::const_new` is more efficient since // it is guaranteed to be O(1). *self = Self::const_new(s); } else { self.push_str(s); } Ok(()) } None => fmt::write(&mut self, args), } } } impl Add<&str> for CompactString { type Output = Self; fn add(mut self, rhs: &str) -> Self::Output { self.push_str(rhs); self } } impl AddAssign<&str> for CompactString { fn add_assign(&mut self, rhs: &str) { self.push_str(rhs); } } /// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice. /// /// This type is the error type for the [`from_utf16`] method on [`CompactString`]. /// /// [`from_utf16`]: CompactString::from_utf16 /// # Examples /// /// Basic usage: /// /// ``` /// # use compact_str::CompactString; /// // ๐„žmuic /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, /// 0xD800, 0x0069, 0x0063]; /// /// assert!(CompactString::from_utf16(v).is_err()); /// ``` #[derive(Copy, Clone, Debug)] pub struct Utf16Error(()); impl fmt::Display for Utf16Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt("invalid utf-16: lone surrogate found", f) } } /// An iterator over the exacted data by [`CompactString::drain()`]. #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct Drain<'a> { compact_string: *mut CompactString, start: usize, end: usize, chars: core::str::Chars<'a>, } // SAFETY: Drain keeps the lifetime of the CompactString it belongs to. unsafe impl Send for Drain<'_> {} unsafe impl Sync for Drain<'_> {} impl fmt::Debug for Drain<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Drain").field(&self.as_str()).finish() } } impl fmt::Display for Drain<'_> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) } } impl Drop for Drain<'_> { #[inline] fn drop(&mut self) { // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access // the CompactString, but this function right now. CompactString::drain() ensured // that the new extracted range does not split a UTF-8 character. unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") }; } } impl Drain<'_> { /// The remaining, unconsumed characters of the extracted substring. #[inline] pub fn as_str(&self) -> &str { self.chars.as_str() } } impl Deref for Drain<'_> { type Target = str; #[inline] fn deref(&self) -> &Self::Target { self.as_str() } } impl Iterator for Drain<'_> { type Item = char; #[inline] fn next(&mut self) -> Option { self.chars.next() } #[inline] fn count(self) -> usize { // ::count() is specialized, and cloning is trivial. self.chars.clone().count() } fn size_hint(&self) -> (usize, Option) { self.chars.size_hint() } #[inline] fn last(mut self) -> Option { self.chars.next_back() } } impl DoubleEndedIterator for Drain<'_> { #[inline] fn next_back(&mut self) -> Option { self.chars.next_back() } } impl FusedIterator for Drain<'_> {} /// A possible error value if allocating or resizing a [`CompactString`] failed. #[derive(Debug, Clone, Copy, PartialEq)] pub struct ReserveError(()); impl fmt::Display for ReserveError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str("Cannot allocate memory to hold CompactString") } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl std::error::Error for ReserveError {} /// A possible error value if [`ToCompactString::try_to_compact_string()`] failed. #[derive(Debug, Clone, Copy, PartialEq)] #[non_exhaustive] pub enum ToCompactStringError { /// Cannot allocate memory to hold CompactString Reserve(ReserveError), /// [`Display::fmt()`][core::fmt::Display::fmt] returned an error Fmt(fmt::Error), } impl fmt::Display for ToCompactStringError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ToCompactStringError::Reserve(err) => err.fmt(f), ToCompactStringError::Fmt(err) => err.fmt(f), } } } impl From for ToCompactStringError { #[inline] fn from(value: ReserveError) -> Self { Self::Reserve(value) } } impl From for ToCompactStringError { #[inline] fn from(value: fmt::Error) -> Self { Self::Fmt(value) } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl std::error::Error for ToCompactStringError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { ToCompactStringError::Reserve(err) => Some(err), ToCompactStringError::Fmt(err) => Some(err), } } } trait UnwrapWithMsg { type T; fn unwrap_with_msg(self) -> Self::T; } impl UnwrapWithMsg for Result { type T = T; #[inline(always)] #[track_caller] fn unwrap_with_msg(self) -> T { match self { Ok(value) => value, Err(err) => unwrap_with_msg_fail(err), } } } #[inline(never)] #[cold] #[track_caller] fn unwrap_with_msg_fail(error: E) -> ! { panic!("{error}") } static_assertions::assert_eq_size!(CompactString, String); compact_str-0.8.0/src/macros.rs000064400000000000000000000030161046102023000145700ustar 00000000000000/// Creates a `CompactString` using interpolation of runtime expressions. /// /// The first argument `format_compact!` receives is a format string. /// This must be a string literal. /// The power of the formatting string is in the `{}`s contained. /// /// Additional parameters passed to `format_compact!` replace the `{}`s within /// the formatting string in the order given unless named or /// positional parameters are used; see [`std::fmt`] for more information. /// /// A common use for `format_compact!` is concatenation and interpolation /// of strings. /// The same convention is used with [`print!`] and [`write!`] macros, /// depending on the intended destination of the string. /// /// To convert a single value to a string, use the /// `ToCompactString::to_compact_string` method, which uses /// the [`std::fmt::Display`] formatting trait. /// /// # Panics /// /// `format_compact!` panics if a formatting trait implementation returns /// an error. /// /// This indicates an incorrect implementation since /// `ToCompactString::to_compact_string` never returns an error itself. #[macro_export] macro_rules! format_compact { ($($arg:tt)*) => { $crate::ToCompactString::to_compact_string(&$crate::core::format_args!($($arg)*)) } } #[cfg(test)] mod tests { #[test] fn test_macros() { assert_eq!(format_compact!("2"), "2"); assert_eq!(format_compact!("{}", 2), "2"); assert!(!format_compact!("2").is_heap_allocated()); assert!(!format_compact!("{}", 2).is_heap_allocated()); } } compact_str-0.8.0/src/repr/bytes.rs000064400000000000000000000144551046102023000154130ustar 00000000000000use core::str::Utf8Error; use bytes::Buf; use super::{ Repr, MAX_SIZE, }; use crate::UnwrapWithMsg; impl Repr { /// Converts a [`Buf`] of bytes to a [`Repr`], checking that the provided bytes are valid UTF-8 pub fn from_utf8_buf(buf: &mut B) -> Result { // SAFETY: We check below to make sure the provided buffer is valid UTF-8 let (repr, bytes_written) = unsafe { Self::collect_buf(buf) }; // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are! match core::str::from_utf8(&repr.as_slice()[..bytes_written]) { Ok(_) => Ok(repr), Err(e) => Err(e), } } /// Converts a [`Buf`] of bytes to a [`Repr`], without checking for valid UTF-8 /// /// # Safety /// * The provided buffer must be valid UTF-8 pub unsafe fn from_utf8_buf_unchecked(buf: &mut B) -> Self { let (repr, _bytes_written) = Self::collect_buf(buf); repr } /// Collects the bytes from a [`Buf`] into a [`Repr`] /// /// # Safety /// * The caller must guarantee that `buf` is valid UTF-8 unsafe fn collect_buf(buf: &mut B) -> (Self, usize) { // Get an empty Repr we can write into let mut repr = super::EMPTY; let mut bytes_written = 0; debug_assert_eq!(repr.len(), bytes_written); while buf.has_remaining() { let chunk = buf.chunk(); let chunk_len = chunk.len(); // There's an edge case where the final byte of this buffer == `HEAP_MASK`, which is // invalid UTF-8, but would result in us creating an inline variant, that identifies as // a heap variant. If a user ever tried to reference the data at all, we'd incorrectly // try and read data from an invalid memory address, causing undefined behavior. if bytes_written < MAX_SIZE && bytes_written + chunk_len == MAX_SIZE { let last_byte = chunk[chunk_len - 1]; // If we hit the edge case, reserve additional space to make the repr becomes heap // allocated, which prevents us from writing this last byte inline if last_byte >= 0b11000000 { repr.reserve(MAX_SIZE + 1).unwrap_with_msg(); } } // reserve at least enough space to fit this chunk repr.reserve(chunk_len).unwrap_with_msg(); // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This // invariant is documented in the public API let slice = repr.as_mut_buf(); // write the chunk into the Repr slice[bytes_written..bytes_written + chunk_len].copy_from_slice(chunk); // Set the length of the Repr // SAFETY: We just wrote an additional `chunk_len` bytes into the Repr bytes_written += chunk_len; repr.set_len(bytes_written); // advance the pointer of the buffer buf.advance(chunk_len); } (repr, bytes_written) } } #[cfg(test)] mod test { #[cfg(feature = "std")] use std::io::Cursor; use test_case::test_case; use super::Repr; #[test_case(""; "empty")] #[test_case("hello world"; "short")] #[test_case("hello, this is a long string which should be heap allocated"; "long")] fn test_from_utf8_buf(word: &'static str) { let mut buf = Cursor::new(word.as_bytes()); let repr = Repr::from_utf8_buf(&mut buf).unwrap(); assert_eq!(repr.as_str(), word); assert_eq!(repr.len(), word.len()); } #[test] fn test_from_utf8_packed() { cfg_if::cfg_if! { if #[cfg(target_pointer_width = "64")] { let packed = "this string is 24 chars!"; } else if #[cfg(target_pointer_width = "32")] { let packed = "i am 12 char"; } else { compile_error!("unsupported architecture!") } } let mut buf = Cursor::new(packed.as_bytes()); let repr = Repr::from_utf8_buf(&mut buf).unwrap(); assert_eq!(repr.as_str(), packed); // This repr should __not__ be heap allocated assert!(!repr.is_heap_allocated()); } #[test] fn test_fuzz_panic() { let bytes = &[ 255, 255, 255, 255, 255, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 12, 0, 0, 96, ]; let mut buf: Cursor<&[u8]> = Cursor::new(bytes); assert!(Repr::from_utf8_buf(&mut buf).is_err()); } #[test] fn test_valid_repr_but_invalid_utf8() { let bytes = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, ]; let mut buf: Cursor<&[u8]> = Cursor::new(bytes); assert!(Repr::from_utf8_buf(&mut buf).is_err()); } #[test] fn test_fake_heap_variant() { let bytes = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, ]; let mut buf: Cursor<&[u8]> = Cursor::new(bytes); assert!(Repr::from_utf8_buf(&mut buf).is_err()); } #[test] fn test_from_non_contiguous() { let data = [ 211, 247, 211, 247, 121, 135, 151, 255, 126, 205, 255, 204, 211, 51, 51, 0, 52, 55, 247, 204, 45, 37, 44, 210, 132, 50, 206, 121, 135, 151, 255, 126, 205, 255, 204, 211, 51, 51, 0, 52, 55, 247, 204, 45, 44, 210, 132, 50, 206, 51, ]; let (front, back) = data.split_at(data.len() / 2 + 1); let mut queue = alloc::collections::VecDeque::with_capacity(data.len()); // create a non-contiguous slice of memory in queue front.into_iter().copied().for_each(|x| queue.push_back(x)); back.into_iter().copied().for_each(|x| queue.push_front(x)); // make sure it's non-contiguous let (a, b) = queue.as_slices(); assert!(data.is_empty() || !a.is_empty()); assert!(data.is_empty() || !b.is_empty()); assert_eq!(data.len(), queue.len()); assert!(Repr::from_utf8_buf(&mut queue).is_err()); } #[test] #[should_panic(expected = "Utf8Error")] fn test_invalid_utf8() { let invalid = &[0, 159]; let mut buf: Cursor<&[u8]> = Cursor::new(invalid); Repr::from_utf8_buf(&mut buf).unwrap(); } } compact_str-0.8.0/src/repr/capacity.rs000064400000000000000000000131241046102023000160520ustar 00000000000000use core::fmt; use crate::repr::LastUtf8Char; // how many bytes a `usize` occupies const USIZE_SIZE: usize = core::mem::size_of::(); /// Mask of bits in [`Capacity`] that encode the value. const VALID_MASK: usize = { let mut bytes = [255; USIZE_SIZE]; bytes[USIZE_SIZE - 1] = 0; usize::from_ne_bytes(bytes) }; /// Mask of bits that are set in [`Capacity`] if the string data is stored on the heap. const HEAP_MARKER: usize = { let mut bytes = [0; USIZE_SIZE]; bytes[USIZE_SIZE - 1] = LastUtf8Char::Heap as u8; usize::from_ne_bytes(bytes) }; /// State that describes the capacity as being stored on the heap. /// /// All bytes `255`, with the last being [`LastUtf8Char::Heap`], using the same amount of bytes /// as `usize`. Example (64-bit): `[255, 255, 255, 255, 255, 255, 255, 216]` const CAPACITY_IS_ON_THE_HEAP: Capacity = Capacity(VALID_MASK | HEAP_MARKER); /// The maximum value we're able to store, e.g. on 64-bit arch this is 2^56 - 2. pub const MAX_VALUE: usize = { let mut bytes = [255; USIZE_SIZE]; bytes[USIZE_SIZE - 1] = 0; usize::from_le_bytes(bytes) - 1 }; /// An integer type that uses `core::mem::size_of::() - 1` bytes to store the capacity of /// a heap buffer. /// /// Assuming a 64-bit arch, a [`super::BoxString`] uses 8 bytes for a pointer, 8 bytes for a /// length, and then needs 1 byte for a discriminant. We need to store the capacity somewhere, and /// we could store it on the heap, but we also have 7 unused bytes. [`Capacity`] handles storing a /// value in these 7 bytes, returning an error if it's not possible, at which point we'll store the /// capacity on the heap. /// /// # Max Values /// * __64-bit:__ `(2 ^ (7 * 8)) - 2 = 72_057_594_037_927_934 ~= 64 petabytes` /// * __32-bit:__ `(2 ^ (3 * 8)) - 2 = 16_777_214 ~= 16 megabytes` /// /// Practically speaking, on a 64-bit architecture we'll never need to store the capacity on the /// heap, because with it's impossible to create a string that is 64 petabytes or larger. But for /// 32-bit architectures we need to be able to store a capacity larger than 16 megabytes, since a /// string larger than 16 megabytes probably isn't that uncommon. #[derive(Copy, Clone, PartialEq, Eq)] #[repr(transparent)] pub struct Capacity(usize); static_assertions::assert_eq_size!(Capacity, usize); static_assertions::assert_eq_align!(Capacity, usize); impl fmt::Debug for Capacity { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Capacity(0x{:x})", usize::from_le(self.0)) } } impl Capacity { #[inline] pub const fn new(capacity: usize) -> Self { cfg_if::cfg_if! { if #[cfg(target_pointer_width = "64")] { // on 64-bit arches we can always fit the capacity inline debug_assert!(capacity <= MAX_VALUE); Capacity(capacity.to_le() | HEAP_MARKER) } else if #[cfg(target_pointer_width = "32")] { // on 32-bit arches we might need to store the capacity on the heap if capacity > MAX_VALUE { // if we need the last byte to encode this capacity then we need to put the capacity on // the heap. return an Error so `BoxString` can do the right thing CAPACITY_IS_ON_THE_HEAP } else { // otherwise, we can store this capacity inline! Set the last byte to be our `LastUtf8Char::Heap as u8` // for our discriminant, using the leading bytes to store the actual value Capacity(capacity.to_le() | HEAP_MARKER) } } else { compile_error!("Unsupported target_pointer_width"); } } } /// Re-interprets a [`Capacity`] as a `usize` /// /// # SAFETY: /// * `self` must be less than or equal to [`MAX_VALUE`] #[inline(always)] pub unsafe fn as_usize(self) -> usize { usize::from_le(self.0 & VALID_MASK) } /// Returns whether or not this [`Capacity`] has a value that indicates the capacity is being /// stored on the heap #[inline(always)] pub fn is_heap(self) -> bool { self == CAPACITY_IS_ON_THE_HEAP } } #[cfg(test)] mod tests { use super::Capacity; #[test] fn test_zero_roundtrips() { let og = 0; let cap = Capacity::new(og); let after = unsafe { cap.as_usize() }; assert_eq!(og, after); } #[test] fn test_max_value() { let available_bytes = (core::mem::size_of::() - 1) as u32; let max_value = 2usize.pow(available_bytes * 8) - 2; #[cfg(target_pointer_width = "64")] assert_eq!(max_value, 72057594037927934); #[cfg(target_pointer_width = "32")] assert_eq!(max_value, 16777214); let cap = Capacity::new(max_value); let after = unsafe { cap.as_usize() }; assert_eq!(max_value, after); } #[cfg(target_pointer_width = "32")] #[test] fn test_invalid_value() { let invalid_val = usize::MAX; let cap = Capacity::new(invalid_val); let after = unsafe { cap.as_usize() }; // anything greater than or equal to 16777215, should "resolve" to 16777215 assert_eq!(16777215, after); } #[test] #[cfg_attr(miri, ignore)] fn test_all_valid_32bit_values() { #[cfg(target_pointer_width = "32")] assert_eq!(16_777_214, super::MAX_VALUE); for i in 0..=16_777_214 { let cap = Capacity::new(i); let val = unsafe { cap.as_usize() }; assert_eq!(val, i, "value roundtriped to wrong value?"); } } } compact_str-0.8.0/src/repr/heap.rs000064400000000000000000000473001046102023000151750ustar 00000000000000use core::alloc::Layout; use core::{ cmp, mem, ptr, }; use super::capacity::Capacity; use super::{ Repr, MAX_SIZE, }; use crate::{ ReserveError, UnwrapWithMsg, }; /// The minimum size we'll allocate on the heap is one usize larger than our max inline size const MIN_HEAP_SIZE: usize = MAX_SIZE + mem::size_of::(); const UNKNOWN: usize = 0; pub type StrBuffer = [u8; UNKNOWN]; /// [`HeapBuffer`] grows at an amortized rates of 1.5x /// /// Note: this is different than [`std::string::String`], which grows at a rate of 2x. It's debated /// which is better, for now we'll stick with a rate of 1.5x #[inline(always)] pub fn amortized_growth(cur_len: usize, additional: usize) -> usize { let required = cur_len.saturating_add(additional); let amortized = cur_len.saturating_mul(3) / 2; amortized.max(required) } #[repr(C)] pub struct HeapBuffer { pub ptr: ptr::NonNull, pub len: usize, pub cap: Capacity, } static_assertions::assert_eq_size!(HeapBuffer, Repr); static_assertions::assert_eq_align!(HeapBuffer, Repr); impl HeapBuffer { /// Create a [`HeapBuffer`] with the provided text #[inline] pub fn new(text: &str) -> Result { let len = text.len(); let (cap, ptr) = allocate_ptr(len)?; // copy our string into the buffer we just allocated // // SAFETY: We know both `src` and `dest` are valid for respectively reads and writes of // length `len` because `len` comes from `src`, and `dest` was allocated to be at least that // length. We also know they're non-overlapping because `dest` is newly allocated unsafe { ptr.as_ptr().copy_from_nonoverlapping(text.as_ptr(), len) }; Ok(HeapBuffer { ptr, len, cap }) } /// Create an empty [`HeapBuffer`] with a specific capacity #[inline] pub fn with_capacity(capacity: usize) -> Result { let len = 0; let (cap, ptr) = allocate_ptr(capacity)?; Ok(HeapBuffer { ptr, len, cap }) } /// Create a [`HeapBuffer`] with `text` that has _at least_ `additional` bytes of capacity /// /// To prevent frequent re-allocations, this method will create a [`HeapBuffer`] with a capacity /// of `text.len() + additional` or `text.len() * 1.5`, whichever is greater #[inline] pub fn with_additional(text: &str, additional: usize) -> Result { let len = text.len(); let new_capacity = amortized_growth(len, additional); let (cap, ptr) = allocate_ptr(new_capacity)?; // copy our string into the buffer we just allocated // // SAFETY: We know both `src` and `dest` are valid for respectively reads and writes of // length `len` because `len` comes from `src`, and `dest` was allocated to be at least that // length. We also know they're non-overlapping because `dest` is newly allocated unsafe { ptr.as_ptr().copy_from_nonoverlapping(text.as_ptr(), len) }; Ok(HeapBuffer { ptr, len, cap }) } /// Return the capacity of the [`HeapBuffer`] #[inline] pub fn capacity(&self) -> usize { #[cold] fn read_capacity_from_heap(this: &HeapBuffer) -> usize { // re-adjust the pointer to include the capacity that's on the heap let adj_ptr: *const u8 = this.ptr.as_ptr().wrapping_sub(mem::size_of::()); let mut buf = [0u8; mem::size_of::()]; // SAFETY: `src` and `dst` don't overlap, and are valid for usize number of bytes unsafe { ptr::copy_nonoverlapping(adj_ptr, buf.as_mut_ptr(), mem::size_of::()); } usize::from_ne_bytes(buf) } if self.cap.is_heap() { read_capacity_from_heap(self) } else { // SAFETY: Checked above that the capacity is on the stack unsafe { self.cap.as_usize() } } } /// Try to grow the [`HeapBuffer`] by reallocating, returning an error if we fail pub fn realloc(&mut self, new_capacity: usize) -> Result { let new_cap = Capacity::new(new_capacity); // We can't reallocate to a size less than our length, or else we'd clip the string if new_capacity < self.len { return Err(()); } // HeapBuffer doesn't support 0 byte heap sizes if new_capacity == 0 { return Err(()); } // Always allocate at least MIN_HEAP_SIZE let new_capacity = cmp::max(new_capacity, MIN_HEAP_SIZE); let (new_cap, new_ptr) = match (self.cap.is_heap(), new_cap.is_heap()) { // both current and new capacity can be stored inline (false, false) => { // SAFETY: checked above that our capacity is valid let cap = unsafe { self.cap.as_usize() }; // current capacity is the same as the new, nothing to do! if cap == new_capacity { return Ok(new_capacity); } let cur_layout = inline_capacity::layout(cap); let new_layout = inline_capacity::layout(new_capacity); let new_size = new_layout.size(); // It's possible `new_size` could overflow since inline_capacity::layout pads for // alignment if new_size < new_capacity { return Err(()); } // SAFETY: // * We're using the same allocator that we used for `ptr` // * The layout is the same because we checked that the capacity is inline // * `new_size` will be > 0, we return early if the requested capacity is 0 // * Checked above if `new_size` overflowed when rounding to alignment match ptr::NonNull::new(unsafe { ::alloc::alloc::realloc(self.ptr.as_ptr(), cur_layout, new_size) }) { Some(ptr) => (new_cap, ptr), None => return Err(()), } } // both current and new capacity need to be stored on the heap (true, true) => { let cur_layout = heap_capacity::layout(self.capacity()); let new_layout = heap_capacity::layout(new_capacity); let new_size = new_layout.size(); // alloc::realloc requires that size > 0 debug_assert!(new_size > 0); // It's possible `new_size` could overflow since heap_capacity::layout requires a // few additional bytes if new_size < new_capacity { return Err(()); } // move our pointer back one WORD since our capacity is behind it let raw_ptr = self.ptr.as_ptr(); let adj_ptr = raw_ptr.wrapping_sub(mem::size_of::()); // SAFETY: // * We're using the same allocator that we used for `ptr` // * The layout is the same because we checked that the capacity is on the heap // * `new_size` will be > 0, we return early if the requested capacity is 0 // * Checked above if `new_size` overflowed when rounding to alignment let cap_ptr = unsafe { alloc::alloc::realloc(adj_ptr, cur_layout, new_size) }; // Check if reallocation succeeded if cap_ptr.is_null() { return Err(()); } // Our allocation succeeded! Write the new capacity // // SAFETY: // * `src` and `dst` are both valid for reads of `usize` number of bytes // * `src` and `dst` don't overlap because we created `src` unsafe { ptr::copy_nonoverlapping( new_capacity.to_ne_bytes().as_ptr(), cap_ptr, mem::size_of::(), ) }; // Finally, adjust our pointer backup so it points at the string content let str_ptr = cap_ptr.wrapping_add(mem::size_of::()); // SAFETY: We checked above to make sure the pointer was non-null let ptr = unsafe { ptr::NonNull::new_unchecked(str_ptr) }; (new_cap, ptr) } // capacity is currently inline or on the heap, but needs to move, can't realloc because // we'd need to change the layout! (false, true) | (true, false) => return Err(()), }; // set our new pointer and new capacity self.ptr = new_ptr; self.cap = new_cap; Ok(new_capacity) } /// Set's the length of the content for this [`HeapBuffer`] /// /// # SAFETY: /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8 #[inline] pub unsafe fn set_len(&mut self, len: usize) { self.len = len; } /// Deallocates the memory owned by the provided [`HeapBuffer`] #[inline] pub fn dealloc(&mut self) { deallocate_ptr(self.ptr, self.cap); } } impl Clone for HeapBuffer { fn clone(&self) -> Self { // Create a new HeapBuffer with the same capacity as the original let mut new = Self::with_capacity(self.capacity()).unwrap_with_msg(); // SAFETY: // * `src` and `dst` don't overlap because we just created `dst` // * `src` and `dst` are both valid for `self.len` bytes because self.len < capacity unsafe { new.ptr .as_ptr() .copy_from_nonoverlapping(self.ptr.as_ptr(), self.len) }; // SAFETY: // * We copied the text from self, which is valid UTF-8 unsafe { new.set_len(self.len) }; new } } impl Drop for HeapBuffer { fn drop(&mut self) { self.dealloc() } } /// Allocates a buffer on the heap that we can use to store a string, optionally stores the capacity /// of said buffer on the heap. /// /// Returns a [`Capacity`] that either indicates the capacity is stored on the heap, or is stored /// in the `Capacity` itself. #[inline] pub fn allocate_ptr(capacity: usize) -> Result<(Capacity, ptr::NonNull), ReserveError> { // We allocate at least MIN_HEAP_SIZE bytes because we need to allocate at least one byte let capacity = capacity.max(MIN_HEAP_SIZE); let cap = Capacity::new(capacity); // HeapBuffer doesn't support 0 sized allocations, we should always allocate at least // MIN_HEAP_SIZE bytes debug_assert!(capacity > 0); #[cold] fn allocate_with_capacity_on_heap(capacity: usize) -> Result, ReserveError> { // write our capacity onto the heap // SAFETY: we know that the capacity is not zero let ptr = unsafe { heap_capacity::alloc(capacity)? }; // SAFETY: // * `src` and `dst` don't overlap and are both valid for `usize` bytes unsafe { ptr::copy_nonoverlapping( capacity.to_ne_bytes().as_ptr(), ptr.as_ptr(), mem::size_of::(), ) }; let raw_ptr = ptr.as_ptr().wrapping_add(core::mem::size_of::()); // SAFETY: We know `raw_ptr` is non-null because we just created it Ok(unsafe { ptr::NonNull::new_unchecked(raw_ptr) }) } let ptr = if cap.is_heap() { allocate_with_capacity_on_heap(capacity) } else { unsafe { inline_capacity::alloc(capacity) } }; Ok((cap, ptr?)) } /// Deallocates a buffer on the heap, handling when the capacity is also stored on the heap #[inline] pub fn deallocate_ptr(ptr: ptr::NonNull, cap: Capacity) { #[cold] fn deallocate_with_capacity_on_heap(ptr: ptr::NonNull) { // re-adjust the pointer to include the capacity that's on the heap let adj_ptr = ptr.as_ptr().wrapping_sub(mem::size_of::()); // read the capacity from the heap so we know how much to deallocate let mut buf = [0u8; mem::size_of::()]; // SAFETY: `src` and `dst` don't overlap, and are valid for usize number of bytes unsafe { ptr::copy_nonoverlapping(adj_ptr, buf.as_mut_ptr(), mem::size_of::()); } let capacity = usize::from_ne_bytes(buf); // SAFETY: We know the pointer is not null since we got it as a NonNull let ptr = unsafe { ptr::NonNull::new_unchecked(adj_ptr) }; // SAFETY: We checked above that our capacity is on the heap, and we readjusted the // pointer to reference the capacity unsafe { heap_capacity::dealloc(ptr, capacity) } } if cap.is_heap() { deallocate_with_capacity_on_heap(ptr); } else { // SAFETY: Our capacity is always inline on 64-bit archs unsafe { inline_capacity::dealloc(ptr, cap.as_usize()) } } } /// SAFETY: `layout` must not be zero sized #[inline] pub unsafe fn do_alloc(layout: Layout) -> Result, ReserveError> { debug_assert!(layout.size() > 0); // SAFETY: `alloc(...)` has undefined behavior if the layout is zero-sized. We specify that // `capacity` must be > 0 as a constraint to uphold the safety of this method. If capacity // is greater than 0, then our layout will be non-zero-sized. let raw_ptr = ::alloc::alloc::alloc(layout); // Check to make sure our pointer is non-null. // Implementations are encouraged to return null on memory exhaustion rather than aborting. ptr::NonNull::new(raw_ptr).ok_or(ReserveError(())) } mod heap_capacity { use core::{ alloc, ptr, }; use super::{ do_alloc, StrBuffer, }; use crate::ReserveError; /// SAFETY: `capacity` must not be zero pub unsafe fn alloc(capacity: usize) -> Result, ReserveError> { do_alloc(layout(capacity)) } /// Deallocates a pointer which references a `HeapBuffer` whose capacity is on the heap /// /// # Safety /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on the heap. i.e. we /// must have `ptr -> [cap ; string]` pub unsafe fn dealloc(ptr: ptr::NonNull, capacity: usize) { let layout = layout(capacity); ::alloc::alloc::dealloc(ptr.as_ptr(), layout); } #[repr(C)] struct HeapBufferInnerHeapCapacity { capacity: usize, buffer: StrBuffer, } #[inline(always)] pub fn layout(capacity: usize) -> alloc::Layout { let buffer_layout = alloc::Layout::array::(capacity).expect("valid capacity"); alloc::Layout::new::() .extend(buffer_layout) .expect("valid layout") .0 .pad_to_align() } } mod inline_capacity { use core::{ alloc, ptr, }; use super::{ do_alloc, StrBuffer, }; use crate::ReserveError; /// # SAFETY: /// * `capacity` must be > 0 pub unsafe fn alloc(capacity: usize) -> Result, ReserveError> { do_alloc(layout(capacity)) } /// Deallocates a pointer which references a `HeapBuffer` whose capacity is stored inline /// /// # Safety /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on the inline pub unsafe fn dealloc(ptr: ptr::NonNull, capacity: usize) { let layout = layout(capacity); ::alloc::alloc::dealloc(ptr.as_ptr(), layout); } #[repr(C)] struct HeapBufferInnerInlineCapacity { buffer: StrBuffer, } #[inline(always)] pub fn layout(capacity: usize) -> alloc::Layout { let buffer_layout = alloc::Layout::array::(capacity).expect("valid capacity"); alloc::Layout::new::() .extend(buffer_layout) .expect("valid layout") .0 .pad_to_align() } } #[cfg(test)] mod test { use test_case::test_case; use super::{ HeapBuffer, MIN_HEAP_SIZE, }; const EIGHTEEN_MB: usize = 18 * 1024 * 1024; #[test] fn test_min_capacity() { let h = HeapBuffer::new("short").unwrap(); assert_eq!(h.capacity(), MIN_HEAP_SIZE); } #[test_case(&[42; 8]; "short")] #[test_case(&[42; 50]; "long")] #[test_case(&[42; EIGHTEEN_MB]; "huge")] fn test_capacity(buf: &[u8]) { // we know the buffer is valid UTF-8 let s = unsafe { core::str::from_utf8_unchecked(buf) }; let h = HeapBuffer::new(s).unwrap(); assert_eq!(h.capacity(), core::cmp::max(s.len(), MIN_HEAP_SIZE)); } #[test_case(&[42; 0], 0, Err(MIN_HEAP_SIZE); "empty_empty")] #[test_case(&[42; 64], 0, Err(64); "short_empty")] #[test_case(&[42; 64], 32, Err(64); "short_to_shorter")] #[test_case(&[42; 64], 128, Ok(128); "short_to_longer")] #[test_case(&[42; EIGHTEEN_MB], EIGHTEEN_MB + 128, Ok(EIGHTEEN_MB + 128); "heap_to_heap")] fn test_realloc(buf: &[u8], realloc: usize, result: Result) { // we know the buffer is valid UTF-8 let s = unsafe { core::str::from_utf8_unchecked(buf) }; let mut h = HeapBuffer::new(s).unwrap(); // reallocate, asserting our result let expected_cap = match result { Ok(c) | Err(c) => c, }; let expected_res = result.map_err(|_| ()); assert_eq!(h.realloc(realloc), expected_res); assert_eq!(h.capacity(), expected_cap); } #[test] fn test_realloc_inline_to_heap() { // we know the buffer is valid UTF-8 let s = unsafe { core::str::from_utf8_unchecked(&[42; 128]) }; let mut h = HeapBuffer::new(s).unwrap(); cfg_if::cfg_if! { if #[cfg(target_pointer_width = "64")] { let expected_result = Ok(EIGHTEEN_MB); let expected_capacity = EIGHTEEN_MB; } else if #[cfg(target_pointer_width = "32")] { // on 32-bit architectures we'd need to change the layout from capacity being inline // to the capacity being on the heap, which isn't possible let expected_result = Err(()); let expected_capacity = 128; } else { compile_error!("Unsupported pointer width!"); } } assert_eq!(h.realloc(EIGHTEEN_MB), expected_result); assert_eq!(h.capacity(), expected_capacity); } #[test_case(&[42; 64], 128, 100, Ok(100); "sanity")] fn test_realloc_shrink( buf: &[u8], realloc_one: usize, realloc_two: usize, exp_result: Result, ) { // we know the buffer is valid UTF-8 let s = unsafe { core::str::from_utf8_unchecked(buf) }; let mut h = HeapBuffer::new(s).unwrap(); assert!( realloc_one > realloc_two, "we have to grow before we can shrink" ); // grow our allocation assert_eq!(h.realloc(realloc_one), Ok(realloc_one)); // shrink our allocation, asserting our result let expected_cap = match exp_result { Ok(c) | Err(c) => c, }; let expected_res = exp_result.map_err(|_| ()); assert_eq!(h.realloc(realloc_two), expected_res); assert_eq!(h.capacity(), expected_cap); } #[test] fn test_realloc_shrink_heap_to_inline() { // TODO: test this case assert_eq!(1, 1); } #[test_case(&[42; 0]; "empty")] #[test_case(&[42; 3]; "short")] #[test_case(&[42; 64]; "long")] #[test_case(&[42; EIGHTEEN_MB]; "huge")] fn test_clone(buf: &[u8]) { let s = unsafe { core::str::from_utf8_unchecked(buf) }; let h_a = HeapBuffer::new(s).unwrap(); let h_b = h_a.clone(); assert_eq!(h_a.capacity(), h_b.capacity()); } } compact_str-0.8.0/src/repr/inline.rs000064400000000000000000000151621046102023000155370ustar 00000000000000use core::ptr; use super::{ Repr, LENGTH_MASK, MAX_SIZE, }; /// A buffer stored on the stack whose size is equal to the stack size of `String` #[cfg(target_pointer_width = "64")] #[repr(C, align(8))] pub struct InlineBuffer(pub [u8; MAX_SIZE]); #[cfg(target_pointer_width = "32")] #[repr(C, align(4))] pub struct InlineBuffer(pub [u8; MAX_SIZE]); static_assertions::assert_eq_size!(InlineBuffer, Repr); static_assertions::assert_eq_align!(InlineBuffer, Repr); impl InlineBuffer { /// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack /// /// SAFETY: /// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`] #[inline] pub unsafe fn new(text: &str) -> Self { debug_assert!(text.len() <= MAX_SIZE); let len = text.len(); let mut buffer = InlineBuffer([0u8; MAX_SIZE]); // set the length in the last byte buffer.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; // copy the string into our buffer // // note: in the case where len == MAX_SIZE, we'll overwrite the len, but that's okay because // when reading the length we can detect that the last byte is part of UTF-8 and return a // length of MAX_SIZE // // SAFETY: // * src (`text`) is valid for `len` bytes because `len` comes from `text` // * dst (`buffer`) is valid for `len` bytes because we assert src is less than MAX_SIZE // * src and dst don't overlap because we created dst // ptr::copy_nonoverlapping(text.as_ptr(), buffer.0.as_mut_ptr(), len); buffer } #[inline] pub const fn new_const(text: &str) -> Self { if text.len() > MAX_SIZE { panic!("Provided string has a length greater than our MAX_SIZE"); } let len = text.len(); let mut buffer = [0u8; MAX_SIZE]; // set the length buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; // Note: for loops aren't allowed in `const fn`, hence the while. // Note: Iterating forward results in badly optimized code, because the compiler tries to // unroll the loop. let text = text.as_bytes(); let mut i = len; while i > 0 { buffer[i - 1] = text[i - 1]; i -= 1; } InlineBuffer(buffer) } /// Returns an empty [`InlineBuffer`] #[inline(always)] pub const fn empty() -> Self { Self::new_const("") } /// Consumes the [`InlineBuffer`] returning the entire underlying array and the length of the /// string that it contains #[inline] #[cfg(feature = "smallvec")] pub fn into_array(self) -> ([u8; MAX_SIZE], usize) { let mut buffer = self.0; let length = core::cmp::min( (buffer[MAX_SIZE - 1].wrapping_sub(LENGTH_MASK)) as usize, MAX_SIZE, ); let last_byte_ref = &mut buffer[MAX_SIZE - 1]; // unset the last byte of the buffer if it's just storing the length of the string // // Note: we should never add an `else` statement here, keeping the conditional simple allows // the compiler to optimize this to a conditional-move instead of a branch if length < MAX_SIZE { *last_byte_ref = 0; } (buffer, length) } /// Set's the length of the content for this [`InlineBuffer`] /// /// # SAFETY: /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8 #[inline] pub unsafe fn set_len(&mut self, len: usize) { debug_assert!(len <= MAX_SIZE); // If `length` == MAX_SIZE, then we infer the length to be the capacity of the buffer. We // can infer this because the way we encode length doesn't overlap with any valid UTF-8 // bytes if len < MAX_SIZE { self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; } } } #[cfg(test)] mod tests { #[rustversion::since(1.63)] #[test] #[ignore] // we run this in CI, but unless you're compiling in release, this takes a while fn test_unused_utf8_bytes() { use rayon::prelude::*; // test to validate for all char the first and last bytes are never within a specified range // note: according to the UTF-8 spec it shouldn't be, but we double check that here (0..u32::MAX).into_par_iter().for_each(|i| { if let Ok(c) = char::try_from(i) { let mut buf = [0_u8; 4]; c.encode_utf8(&mut buf); // check ranges for first byte match buf[0] { x @ 128..=191 => panic!("first byte within 128..=191, {}", x), x @ 248..=255 => panic!("first byte within 248..=255, {}", x), _ => (), } // check ranges for last byte if let x @ 192..=255 = buf[c.len_utf8() - 1] { panic!("last byte within 192..=255, {}", x) } } }) } #[cfg(feature = "smallvec")] mod smallvec { use alloc::string::String; use quickcheck_macros::quickcheck; use crate::repr::{ InlineBuffer, MAX_SIZE, }; #[test] fn test_into_array() { let s = "hello world!"; let inline = unsafe { InlineBuffer::new(s) }; let (array, length) = inline.into_array(); assert_eq!(s.len(), length); // all bytes after the length should be 0 assert!(array[length..].iter().all(|b| *b == 0)); // taking a string slice should give back the same string as the original let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) }; assert_eq!(s, ex_s); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_into_array(s: String) { let mut total_length = 0; let s: String = s .chars() .take_while(|c| { total_length += c.len_utf8(); total_length < MAX_SIZE }) .collect(); let inline = unsafe { InlineBuffer::new(&s) }; let (array, length) = inline.into_array(); assert_eq!(s.len(), length); // all bytes after the length should be 0 assert!(array[length..].iter().all(|b| *b == 0)); // taking a string slice should give back the same string as the original let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) }; assert_eq!(s, ex_s); } } } compact_str-0.8.0/src/repr/iter.rs000064400000000000000000000127211046102023000152220ustar 00000000000000//! Implementations of the [`FromIterator`] trait to make building [`Repr`]s more ergonomic use alloc::borrow::Cow; use alloc::boxed::Box; use alloc::string::String; use super::{ InlineBuffer, Repr, EMPTY, MAX_SIZE, }; use crate::{ CompactString, UnwrapWithMsg, }; impl FromIterator for Repr { #[inline] fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); let mut this = match Repr::with_capacity(lower_bound) { Ok(this) => this, Err(_) => EMPTY, // Ignore the error and hope that the lower_bound is incorrect. }; for c in iter { this.push_str(c.encode_utf8(&mut [0; 4])); } this } } impl<'a> FromIterator<&'a char> for Repr { fn from_iter>(iter: T) -> Self { iter.into_iter().copied().collect() } } fn from_as_ref_str_iterator(mut iter: I) -> Repr where S: AsRef, I: Iterator, String: core::iter::Extend, String: FromIterator, { // Note: We don't check the lower bound here like we do in the character iterator because it's // possible for the iterator to be full of empty strings! In which case checking the lower bound // could cause us to heap allocate when there's no need. // Continuously pull strings from the iterator let mut curr_len = 0; let mut inline_buf = InlineBuffer::new_const(""); while let Some(s) = iter.next() { let str_slice = s.as_ref(); let bytes_len = str_slice.len(); // this new string is too large to fit into our inline buffer, so heap allocate the rest if bytes_len + curr_len > MAX_SIZE { let (min_remaining, _) = iter.size_hint(); let mut string = String::with_capacity(bytes_len + curr_len + min_remaining); // push existing strings onto the heap // SAFETY: `inline_buf` has been filled with `&str`s which are valid UTF-8 string.push_str(unsafe { core::str::from_utf8_unchecked(&inline_buf.0[..curr_len]) }); // push current string onto the heap string.push_str(str_slice); // extend heap with remaining strings string.extend(iter); return Repr::from_string(string, true).unwrap_with_msg(); } // write the current string into a slice of the unoccupied space inline_buf.0[curr_len..][..bytes_len].copy_from_slice(str_slice.as_bytes()); curr_len += bytes_len; } // SAFETY: Everything we just pushed onto the buffer is a `str` which is valid UTF-8 unsafe { inline_buf.set_len(curr_len) } Repr::from_inline(inline_buf) } impl<'a> FromIterator<&'a str> for Repr { fn from_iter>(iter: T) -> Self { from_as_ref_str_iterator(iter.into_iter()) } } impl FromIterator> for Repr { fn from_iter>>(iter: T) -> Self { from_as_ref_str_iterator(iter.into_iter()) } } impl FromIterator for Repr { fn from_iter>(iter: T) -> Self { from_as_ref_str_iterator(iter.into_iter()) } } impl FromIterator for Repr { fn from_iter>(iter: T) -> Self { from_as_ref_str_iterator(iter.into_iter()) } } impl<'a> FromIterator> for Repr { fn from_iter>>(iter: T) -> Self { from_as_ref_str_iterator(iter.into_iter()) } } #[cfg(test)] mod tests { use alloc::string::String; use super::Repr; #[test] fn short_char_iter() { let chars = ['a', 'b', 'c']; let repr: Repr = chars.iter().collect(); assert_eq!(repr.as_str(), "abc"); assert!(!repr.is_heap_allocated()); } #[test] fn short_char_ref_iter() { let chars = ['a', 'b', 'c']; let repr: Repr = chars.iter().collect(); assert_eq!(repr.as_str(), "abc"); assert!(!repr.is_heap_allocated()); } #[test] #[cfg_attr(target_pointer_width = "32", ignore)] fn packed_char_iter() { let chars = [ '\u{92f01}', '\u{81515}', '\u{81515}', '\u{81515}', '\u{81515}', '\u{41515}', ]; let repr: Repr = chars.iter().collect(); let s: String = chars.iter().collect(); assert_eq!(repr.as_str(), s.as_str()); assert!(!repr.is_heap_allocated()); } #[test] fn long_char_iter() { let long = "This is supposed to be a really long string"; let repr: Repr = long.chars().collect(); assert_eq!(repr.as_str(), "This is supposed to be a really long string"); assert!(repr.is_heap_allocated()); } #[test] fn short_string_iter() { let strings = vec!["hello", "world"]; let repr: Repr = strings.into_iter().collect(); assert_eq!(repr.as_str(), "helloworld"); assert!(!repr.is_heap_allocated()); } #[test] fn long_short_string_iter() { let strings = vec![ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", ]; let repr: Repr = strings.into_iter().collect(); assert_eq!(repr.as_str(), "1234567891011121314151617181920"); assert!(repr.is_heap_allocated()); } } compact_str-0.8.0/src/repr/last_utf8_char.rs000064400000000000000000000101341046102023000171610ustar 00000000000000use alloc::string::String; /// [`LastUtf8Char`] is an unsigned 8-bit integer data type that has a valid range of `[0, 216]`. /// Excluding `[217, 255]` allows the Rust compiler to use these values as niches. /// /// Specifically the compiler can use a value in this range to encode the `None` variant of /// `Option` allowing /// `std::mem::size_of:: == std::mem::size_of::>()` #[allow(dead_code)] #[derive(Copy, Clone, Debug)] #[repr(u8)] pub enum LastUtf8Char { // single character, ASCII: V0 = 0, V1 = 1, V2 = 2, V3 = 3, V4 = 4, V5 = 5, V6 = 6, V7 = 7, V8 = 8, V9 = 9, V10 = 10, V11 = 11, V12 = 12, V13 = 13, V14 = 14, V15 = 15, V16 = 16, V17 = 17, V18 = 18, V19 = 19, V20 = 20, V21 = 21, V22 = 22, V23 = 23, V24 = 24, V25 = 25, V26 = 26, V27 = 27, V28 = 28, V29 = 29, V30 = 30, V31 = 31, V32 = 32, V33 = 33, V34 = 34, V35 = 35, V36 = 36, V37 = 37, V38 = 38, V39 = 39, V40 = 40, V41 = 41, V42 = 42, V43 = 43, V44 = 44, V45 = 45, V46 = 46, V47 = 47, V48 = 48, V49 = 49, V50 = 50, V51 = 51, V52 = 52, V53 = 53, V54 = 54, V55 = 55, V56 = 56, V57 = 57, V58 = 58, V59 = 59, V60 = 60, V61 = 61, V62 = 62, V63 = 63, V64 = 64, V65 = 65, V66 = 66, V67 = 67, V68 = 68, V69 = 69, V70 = 70, V71 = 71, V72 = 72, V73 = 73, V74 = 74, V75 = 75, V76 = 76, V77 = 77, V78 = 78, V79 = 79, V80 = 80, V81 = 81, V82 = 82, V83 = 83, V84 = 84, V85 = 85, V86 = 86, V87 = 87, V88 = 88, V89 = 89, V90 = 90, V91 = 91, V92 = 92, V93 = 93, V94 = 94, V95 = 95, V96 = 96, V97 = 97, V98 = 98, V99 = 99, V100 = 100, V101 = 101, V102 = 102, V103 = 103, V104 = 104, V105 = 105, V106 = 106, V107 = 107, V108 = 108, V109 = 109, V110 = 110, V111 = 111, V112 = 112, V113 = 113, V114 = 114, V115 = 115, V116 = 116, V117 = 117, V118 = 118, V119 = 119, V120 = 120, V121 = 121, V122 = 122, V123 = 123, V124 = 124, V125 = 125, V126 = 126, V127 = 127, // trailing byte in a multi-byte UTF-8 sequence V128 = 128, V129 = 129, V130 = 130, V131 = 131, V132 = 132, V133 = 133, V134 = 134, V135 = 135, V136 = 136, V137 = 137, V138 = 138, V139 = 139, V140 = 140, V141 = 141, V142 = 142, V143 = 143, V144 = 144, V145 = 145, V146 = 146, V147 = 147, V148 = 148, V149 = 149, V150 = 150, V151 = 151, V152 = 152, V153 = 153, V154 = 154, V155 = 155, V156 = 156, V157 = 157, V158 = 158, V159 = 159, V160 = 160, V161 = 161, V162 = 162, V163 = 163, V164 = 164, V165 = 165, V166 = 166, V167 = 167, V168 = 168, V169 = 169, V170 = 170, V171 = 171, V172 = 172, V173 = 173, V174 = 174, V175 = 175, V176 = 176, V177 = 177, V178 = 178, V179 = 179, V180 = 180, V181 = 181, V182 = 182, V183 = 183, V184 = 184, V185 = 185, V186 = 186, V187 = 187, V188 = 188, V189 = 189, V190 = 190, V191 = 191, // Cannot be last character of a UTF-8 sequence (leading byte of a sequence) // V192 .. V244, // Can never occur in UTF-8 (start for a codepoint > U+10FFFF) // V245 .. 255, // length marker: L0 = 192, L1 = 193, L2 = 194, L3 = 195, L4 = 196, L5 = 197, L6 = 198, L7 = 199, L8 = 200, L9 = 201, L10 = 202, L11 = 203, L12 = 204, L13 = 205, L14 = 206, L15 = 207, L16 = 208, L17 = 209, L18 = 210, L19 = 211, L20 = 212, L21 = 213, L22 = 214, L23 = 215, Heap = 216, Static = 217, } static_assertions::assert_eq_size!(LastUtf8Char, Option, u8); static_assertions::const_assert!(core::mem::size_of::() <= 24); compact_str-0.8.0/src/repr/mod.rs000064400000000000000000001210151046102023000150330ustar 00000000000000use alloc::borrow::Cow; use alloc::boxed::Box; use core::str::Utf8Error; use core::{ mem, ptr, }; #[cfg(feature = "bytes")] mod bytes; #[cfg(feature = "smallvec")] mod smallvec; mod capacity; mod heap; mod inline; mod iter; mod last_utf8_char; mod num; mod static_str; mod traits; use alloc::string::String; use capacity::Capacity; use heap::HeapBuffer; use inline::InlineBuffer; use last_utf8_char::LastUtf8Char; use static_str::StaticStr; pub(crate) use traits::IntoRepr; use crate::{ ReserveError, UnwrapWithMsg, }; /// The max size of a string we can fit inline pub const MAX_SIZE: usize = core::mem::size_of::(); /// Used as a discriminant to identify different variants pub const HEAP_MASK: u8 = LastUtf8Char::Heap as u8; /// Used for `StaticStr` variant pub const STATIC_STR_MASK: u8 = LastUtf8Char::Static as u8; /// When our string is stored inline, we represent the length of the string in the last byte, offset /// by `LENGTH_MASK` pub const LENGTH_MASK: u8 = 0b11000000; const EMPTY: Repr = Repr::const_new(""); #[repr(C)] pub struct Repr( // We have a pointer in the representation to properly carry provenance *const (), // Then we need two `usize`s (aka WORDs) of data, for the first we just define a `usize`... usize, // ...but the second we breakup into multiple pieces... #[cfg(target_pointer_width = "64")] u32, u16, u8, // ...so that the last byte can be a NonMax, which allows the compiler to see a niche value LastUtf8Char, ); static_assertions::assert_eq_size!([u8; MAX_SIZE], Repr); unsafe impl Send for Repr {} unsafe impl Sync for Repr {} impl Repr { #[inline] pub fn new(text: &str) -> Result { let len = text.len(); if len == 0 { Ok(EMPTY) } else if len <= MAX_SIZE { // SAFETY: We checked that the length of text is less than or equal to MAX_SIZE let inline = unsafe { InlineBuffer::new(text) }; Ok(Repr::from_inline(inline)) } else { HeapBuffer::new(text).map(Repr::from_heap) } } #[inline] pub const fn const_new(text: &'static str) -> Self { if text.len() <= MAX_SIZE { let inline = InlineBuffer::new_const(text); Repr::from_inline(inline) } else { let repr = StaticStr::new(text); Repr::from_static(repr) } } /// Create a [`Repr`] with the provided `capacity` #[inline] pub fn with_capacity(capacity: usize) -> Result { if capacity <= MAX_SIZE { Ok(EMPTY) } else { HeapBuffer::with_capacity(capacity).map(Repr::from_heap) } } /// Create a [`Repr`] from a slice of bytes that is UTF-8 #[inline] pub fn from_utf8>(buf: B) -> Result { // Get a &str from the Vec, failing if it's not valid UTF-8 let s = core::str::from_utf8(buf.as_ref())?; // Construct a Repr from the &str Ok(Self::new(s).unwrap_with_msg()) } /// Create a [`Repr`] from a slice of bytes that is UTF-8, without validating that it is indeed /// UTF-8 /// /// # Safety /// * The caller must guarantee that `buf` is valid UTF-8 #[inline] pub unsafe fn from_utf8_unchecked>(buf: B) -> Result { let bytes = buf.as_ref(); let bytes_len = bytes.len(); // Create a Repr with enough capacity for the entire buffer let mut repr = Repr::with_capacity(bytes_len)?; // There's an edge case where the final byte of this buffer == `HEAP_MASK`, which is // invalid UTF-8, but would result in us creating an inline variant, that identifies as // a heap variant. If a user ever tried to reference the data at all, we'd incorrectly // try and read data from an invalid memory address, causing undefined behavior. if bytes_len == MAX_SIZE { let last_byte = bytes[bytes_len - 1]; // If we hit the edge case, reserve additional space to make the repr becomes heap // allocated, which prevents us from writing this last byte inline if last_byte >= 0b11000000 { repr.reserve(MAX_SIZE + 1)?; } } // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This // invariant is documented in the public API let slice = repr.as_mut_buf(); // write the chunk into the Repr slice[..bytes_len].copy_from_slice(bytes); // Set the length of the Repr // SAFETY: We just wrote the entire `buf` into the Repr repr.set_len(bytes_len); Ok(repr) } /// Create a [`Repr`] from a [`String`], in `O(1)` time. We'll attempt to inline the string /// if `should_inline` is `true` /// /// Note: If the provided [`String`] is >16 MB and we're on a 32-bit arch, we'll copy the /// `String`. #[inline] pub fn from_string(s: String, should_inline: bool) -> Result { let og_cap = s.capacity(); let cap = Capacity::new(og_cap); #[cold] fn capacity_on_heap(s: String) -> Result { HeapBuffer::new(s.as_str()).map(Repr::from_heap) } #[cold] fn empty() -> Result { Ok(EMPTY) } if cap.is_heap() { // We only hit this case if the provided String is > 16MB and we're on a 32-bit arch. We // expect it to be unlikely, thus we hint that to the compiler capacity_on_heap(s) } else if og_cap == 0 { // We don't expect converting from an empty String often, so we make this code path cold empty() } else if should_inline && s.len() <= MAX_SIZE { // SAFETY: Checked to make sure the string would fit inline let inline = unsafe { InlineBuffer::new(s.as_str()) }; Ok(Repr::from_inline(inline)) } else { let mut s = mem::ManuallyDrop::new(s.into_bytes()); let len = s.len(); let raw_ptr = s.as_mut_ptr(); let ptr = ptr::NonNull::new(raw_ptr).expect("string with capacity has null ptr?"); let heap = HeapBuffer { ptr, len, cap }; Ok(Repr::from_heap(heap)) } } /// Converts a [`Repr`] into a [`String`], in `O(1)` time, if possible #[inline] pub fn into_string(self) -> String { #[cold] fn into_string_heap(this: HeapBuffer) -> String { // SAFETY: We know pointer is valid for `length` bytes let slice = unsafe { core::slice::from_raw_parts(this.ptr.as_ptr(), this.len) }; // SAFETY: A `Repr` contains valid UTF-8 let s = unsafe { core::str::from_utf8_unchecked(slice) }; String::from(s) } if self.is_heap_allocated() { // SAFETY: we just checked that the discriminant indicates we're a HeapBuffer let heap_buffer = unsafe { self.into_heap() }; if heap_buffer.cap.is_heap() { // We don't expect capacity to be on the heap often, so we mark it as cold into_string_heap(heap_buffer) } else { // Wrap the BoxString in a ManuallyDrop so the underlying buffer doesn't get freed let this = mem::ManuallyDrop::new(heap_buffer); // SAFETY: We checked above to make sure capacity is valid let cap = unsafe { this.cap.as_usize() }; // SAFETY: // * The memory in `ptr` was previously allocated by the same allocator the standard // library uses, with a required alignment of exactly 1. // * `length` is less than or equal to capacity, due to internal invaraints. // * `capacity` is correctly maintained internally. // * `BoxString` only ever contains valid UTF-8. unsafe { String::from_raw_parts(this.ptr.as_ptr(), this.len, cap) } } } else { String::from(self.as_str()) } } /// Reserves at least `additional` bytes. If there is already enough capacity to store /// `additional` bytes this is a no-op #[inline] pub fn reserve(&mut self, additional: usize) -> Result<(), ReserveError> { let len = self.len(); let needed_capacity = len.checked_add(additional).ok_or(ReserveError(()))?; if !self.is_static_str() && needed_capacity <= self.capacity() { // we already have enough space, no-op // If self.is_static_str() is true, then we would have to convert // it to other variants since static_str variant cannot be modified. Ok(()) } else if needed_capacity <= MAX_SIZE { // It's possible to have a `Repr` that is heap allocated with a capacity less than // MAX_SIZE, if that `Repr` was created From a String or Box // // SAFETY: Our needed_capacity is >= our length, which is <= than MAX_SIZE let inline = unsafe { InlineBuffer::new(self.as_str()) }; *self = Repr::from_inline(inline); Ok(()) } else if !self.is_heap_allocated() { // We're not heap allocated, but need to be, create a HeapBuffer let heap = HeapBuffer::with_additional(self.as_str(), additional)?; *self = Repr::from_heap(heap); Ok(()) } else { // We're already heap allocated, but we need more capacity // // SAFETY: We checked above to see if we're heap allocated let heap_buffer = unsafe { self.as_mut_heap() }; // To reduce allocations, we amortize our growth let amortized_capacity = heap::amortized_growth(len, additional); // Attempt to grow our capacity, allocating a new HeapBuffer on failure if heap_buffer.realloc(amortized_capacity).is_err() { // Create a new HeapBuffer let heap = HeapBuffer::with_additional(self.as_str(), additional)?; *self = Repr::from_heap(heap); } Ok(()) } } pub fn shrink_to(&mut self, min_capacity: usize) { // Note: We can't shrink the inline variant since it's buffer is a fixed size // or the static str variant since it is just a pointer, so we only // take action here if our string is heap allocated if !self.is_heap_allocated() { return; } // SAFETY: We just checked the discriminant to make sure we're heap allocated let heap = unsafe { self.as_mut_heap() }; let old_capacity = heap.capacity(); let new_capacity = heap.len.max(min_capacity); if new_capacity <= MAX_SIZE { // Inline string if possible. let mut inline = InlineBuffer::empty(); // SAFETY: Our src is on the heap, so it does not overlap with our new inline // buffer, and the src is a `Repr` so we can assume it's valid UTF-8 unsafe { inline .0 .as_mut_ptr() .copy_from_nonoverlapping(heap.ptr.as_ptr(), heap.len) }; // SAFETY: The src we wrote from was a `Repr` which we can assume is valid UTF-8 unsafe { inline.set_len(heap.len) } *self = Repr::from_inline(inline); return; } // Return if the string cannot be strunk. if new_capacity >= old_capacity { return; } // Try to shrink in-place. if heap.realloc(new_capacity).is_ok() { return; } // Otherwise try to allocate a new, smaller chunk. // We can ignore the error. The string keeps its old capacity, but that's okay. if let Ok(mut new_this) = Repr::with_capacity(new_capacity) { new_this.push_str(self.as_str()); *self = new_this; } } #[inline] pub fn push_str(&mut self, s: &str) { // If `s` is empty, then there's no reason to reserve or push anything // at all. if s.is_empty() { return; } let len = self.len(); let str_len = s.len(); // Reserve at least enough space to fit `s` self.reserve(str_len).unwrap_with_msg(); // SAFETY: `s` which we're appending to the buffer, is valid UTF-8 let slice = unsafe { self.as_mut_buf() }; let push_buffer = &mut slice[len..len + str_len]; debug_assert_eq!(push_buffer.len(), s.as_bytes().len()); // Copy the string into our buffer push_buffer.copy_from_slice(s.as_bytes()); // Increment the length of our string // // SAFETY: We appended `s` which is valid UTF-8, and if our size became greater than // MAX_SIZE, our call to reserve would make us heap allocated unsafe { self.set_len(len + str_len) }; } #[inline] pub fn pop(&mut self) -> Option { let ch = self.as_str().chars().next_back()?; // SAFETY: We know this is is a valid length which falls on a char boundary unsafe { self.set_len(self.len() - ch.len_utf8()) }; Some(ch) } /// Returns the string content, and only the string content, as a slice of bytes. #[inline] pub fn as_slice(&self) -> &[u8] { // initially has the value of the stack pointer, conditionally becomes the heap pointer let mut pointer = self as *const Self as *const u8; let heap_pointer = self.0 as *const u8; if self.last_byte() >= HEAP_MASK { pointer = heap_pointer; } // initially has the value of the stack length, conditionally becomes the heap length let mut length = core::cmp::min( self.last_byte().wrapping_sub(LENGTH_MASK) as usize, MAX_SIZE, ); let heap_length = self.1; if self.last_byte() >= HEAP_MASK { length = heap_length; } // SAFETY: We know the data is valid, aligned, and part of the same contiguous allocated // chunk. It's also valid for the lifetime of self unsafe { core::slice::from_raw_parts(pointer, length) } } #[inline] pub fn as_str(&self) -> &str { // SAFETY: A `Repr` contains valid UTF-8 unsafe { core::str::from_utf8_unchecked(self.as_slice()) } } /// Returns the length of the string that we're storing #[inline] pub fn len(&self) -> usize { // This ugly looking code results in two conditional moves and only one comparison, without // branching. The outcome of a comparison is a tristate `{lt, eq, gt}`, but the compiler // won't use this optimization if you match on `len_inline.cmp(&MAX_SIZE)`, so we have to // do it manually. // Force the compiler to read the variable, so it won't put the reading in a branch. let len_heap = ensure_read(self.1); let last_byte = self.last_byte(); // Extending the variable early results in fewer instructions, because loading and // extending can be done in one instruction. let mut len = (last_byte as usize) .wrapping_sub(LENGTH_MASK as usize) .min(MAX_SIZE); // our discriminant is stored in the last byte and denotes stack vs heap // // Note: We should never add an `else` statement here, keeping the conditional simple allows // the compiler to optimize this to a conditional-move instead of a branch if last_byte >= HEAP_MASK { len = len_heap; } len } /// Returns `true` if the length is 0, `false` otherwise #[inline] pub fn is_empty(&self) -> bool { let len_heap = ensure_read(self.1); let last_byte = self.last_byte() as usize; let mut len = last_byte.wrapping_sub(LastUtf8Char::L0 as u8 as usize); if last_byte >= LastUtf8Char::Heap as u8 as usize { len = len_heap; } len == 0 } /// Returns the overall capacity of the underlying buffer #[inline] pub fn capacity(&self) -> usize { #[cold] fn heap_capacity(this: &Repr) -> usize { // SAFETY: We just checked the discriminant to make sure we're heap allocated let heap_buffer = unsafe { this.as_heap() }; heap_buffer.capacity() } if let Some(s) = self.as_static_str() { s.len() } else if self.is_heap_allocated() { heap_capacity(self) } else { MAX_SIZE } } #[inline(always)] pub fn is_heap_allocated(&self) -> bool { let last_byte = self.last_byte(); last_byte == HEAP_MASK } #[inline(always)] const fn is_static_str(&self) -> bool { let last_byte = self.last_byte(); last_byte == STATIC_STR_MASK } #[inline] #[rustversion::attr(since(1.64), const)] pub fn as_static_str(&self) -> Option<&'static str> { if self.is_static_str() { // SAFETY: A `Repr` is transmuted from `StaticStr` let s: &StaticStr = unsafe { &*(self as *const Self as *const StaticStr) }; Some(s.get_text()) } else { None } } #[inline] fn as_static_variant_mut(&mut self) -> Option<&mut StaticStr> { if self.is_static_str() { // SAFETY: A `Repr` is transmuted from `StaticStr` let s: &mut StaticStr = unsafe { &mut *(self as *mut Self as *mut StaticStr) }; Some(s) } else { None } } /// Return a mutable reference to the entirely underlying buffer /// /// # Safety /// * Callers must guarantee that any modifications made to the buffer are valid UTF-8 pub unsafe fn as_mut_buf(&mut self) -> &mut [u8] { #[cold] fn inline_static_str(this: &mut Repr) { if let Some(s) = this.as_static_str() { *this = Repr::new(s).unwrap_with_msg(); } } if self.is_static_str() { inline_static_str(self); } // the last byte stores our discriminant and stack length let last_byte = self.last_byte(); let (ptr, cap) = if last_byte == HEAP_MASK { // SAFETY: We just checked the discriminant to make sure we're heap allocated let heap_buffer = self.as_heap(); let ptr = heap_buffer.ptr.as_ptr(); let cap = heap_buffer.capacity(); (ptr, cap) } else { let ptr = self as *mut Self as *mut u8; (ptr, MAX_SIZE) }; // SAFETY: Our data is valid for `cap` bytes, and is initialized core::slice::from_raw_parts_mut(ptr, cap) } /// Sets the length of the string that our underlying buffer contains /// /// # Safety /// * `len` bytes in the buffer must be valid UTF-8 /// * If the underlying buffer is stored inline, `len` must be <= MAX_SIZE pub unsafe fn set_len(&mut self, len: usize) { if let Some(s) = self.as_static_variant_mut() { s.set_len(len); } else if self.is_heap_allocated() { // SAFETY: We just checked the discriminant to make sure we're heap allocated let heap_buffer = self.as_mut_heap(); // SAFETY: The caller guarantees that `len` bytes is valid UTF-8 heap_buffer.set_len(len); } else { // SAFETY: We just checked the discriminant to make sure we're an InlineBuffer let inline_buffer = self.as_mut_inline(); // SAFETY: The caller guarantees that len <= MAX_SIZE, and `len` bytes is valid UTF-8 inline_buffer.set_len(len); } } /// Returns the last byte that's on the stack. /// /// The last byte stores the discriminant that indicates whether the string is on the stack or /// on the heap. When the string is on the stack the last byte also stores the length #[inline(always)] const fn last_byte(&self) -> u8 { cfg_if::cfg_if! { if #[cfg(target_pointer_width = "64")] { let last_byte = self.5; } else if #[cfg(target_pointer_width = "32")] { let last_byte = self.4; } else { compile_error!("Unsupported target_pointer_width"); } }; last_byte as u8 } /// Reinterprets an [`InlineBuffer`] into a [`Repr`] /// /// Note: This is safe because [`InlineBuffer`] and [`Repr`] are the same size. We used to /// define [`Repr`] as a `union` which implicitly transmuted between the two types, but that /// prevented us from defining a "niche" value to make `Option` the same size as /// just `CompactString` #[inline(always)] const fn from_inline(inline: InlineBuffer) -> Self { // SAFETY: An `InlineBuffer` and `Repr` have the same size unsafe { core::mem::transmute(inline) } } /// Reinterprets a [`HeapBuffer`] into a [`Repr`] /// /// Note: This is safe because [`HeapBuffer`] and [`Repr`] are the same size. We used to define /// [`Repr`] as a `union` which implicitly transmuted between the two types, but that prevented /// us from defining a "niche" value to make `Option` the same size as just /// `CompactString` #[inline(always)] const fn from_heap(heap: HeapBuffer) -> Self { // SAFETY: A `HeapBuffer` and `Repr` have the same size unsafe { core::mem::transmute(heap) } } /// Reinterprets a [`StaticStr`] into a [`Repr`] /// /// Note: This is safe because [`StaticStr`] and [`Repr`] are the same size. We used to define /// [`Repr`] as a `union` which implicitly transmuted between the two types, but that prevented /// us from defining a "niche" value to make `Option` the same size as just /// `CompactString` #[inline(always)] const fn from_static(heap: StaticStr) -> Self { // SAFETY: A `StaticStr` and `Repr` have the same size unsafe { core::mem::transmute(heap) } } /// Reinterprets a [`Repr`] as a [`HeapBuffer`] /// /// # SAFETY /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by /// checking the discriminant. /// /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two /// types, but that prevented us from defining a "niche" value to make `Option` /// the same size as just `CompactString` #[inline(always)] const unsafe fn into_heap(self) -> HeapBuffer { core::mem::transmute(self) } /// Reinterprets a `&mut Repr` as a `&mut HeapBuffer` /// /// # SAFETY /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by /// checking the discriminant. /// /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two /// types, but that prevented us from defining a "niche" value to make `Option` /// the same size as just `CompactString` #[inline(always)] unsafe fn as_mut_heap(&mut self) -> &mut HeapBuffer { // SAFETY: A `HeapBuffer` and `Repr` have the same size &mut *(self as *mut _ as *mut HeapBuffer) } /// Reinterprets a `&Repr` as a `&HeapBuffer` /// /// # SAFETY /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by /// checking the discriminant. /// /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two /// types, but that prevented us from defining a "niche" value to make `Option` /// the same size as just `CompactString` #[inline(always)] unsafe fn as_heap(&self) -> &HeapBuffer { // SAFETY: A `HeapBuffer` and `Repr` have the same size &*(self as *const _ as *const HeapBuffer) } /// Reinterprets a [`Repr`] as an [`InlineBuffer`] /// /// # SAFETY /// * The caller must guarantee that the provided [`Repr`] is actually an [`InlineBuffer`] by /// checking the discriminant. /// /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two /// types, but that prevented us from defining a "niche" value to make `Option` /// the same size as just `CompactString` #[inline(always)] #[cfg(feature = "smallvec")] const unsafe fn into_inline(self) -> InlineBuffer { core::mem::transmute(self) } /// Reinterprets a `&mut Repr` as an `&mut InlineBuffer` /// /// # SAFETY /// * The caller must guarantee that the provided [`Repr`] is actually an [`InlineBuffer`] by /// checking the discriminant. /// /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two /// types, but that prevented us from defining a "niche" value to make `Option` /// the same size as just `CompactString` #[inline(always)] unsafe fn as_mut_inline(&mut self) -> &mut InlineBuffer { // SAFETY: An `InlineBuffer` and `Repr` have the same size &mut *(self as *mut _ as *mut InlineBuffer) } } impl Clone for Repr { #[inline] fn clone(&self) -> Self { #[inline(never)] fn clone_heap(this: &Repr) -> Repr { Repr::new(this.as_str()).unwrap_with_msg() } // There are only two cases we need to care about: If the string is allocated on the heap // or not. If it is, then the data must be cloned properly, otherwise we can simply copy // the `Repr`. if self.is_heap_allocated() { clone_heap(self) } else { // SAFETY: We just checked that `self` can be copied because it is an inline string or // a reference to a `&'static str`. unsafe { core::ptr::read(self) } } } #[inline] fn clone_from(&mut self, source: &Self) { #[inline(never)] fn clone_from_heap(this: &mut Repr, source: &Repr) { unsafe { this.set_len(0) }; this.push_str(source.as_str()); } // There are only two cases we need to care about: If the string is allocated on the heap // or not. If it is, then the data must be cloned proberly, otherwise we can simply copy // the `Repr`. if source.is_heap_allocated() { clone_from_heap(self, source) } else { // SAFETY: We just checked that `source` can be copied because it is an inline string or // a reference to a `&'static str`. *self = unsafe { core::ptr::read(source) } } } } impl Drop for Repr { #[inline] fn drop(&mut self) { // By "outlining" the actual Drop code and only calling it if we're a heap variant, it // allows dropping an inline variant to be as cheap as possible. if self.is_heap_allocated() { outlined_drop(self) } #[cold] fn outlined_drop(this: &mut Repr) { // SAFETY: We just checked the discriminant to make sure we're heap allocated let heap_buffer = unsafe { this.as_mut_heap() }; heap_buffer.dealloc(); } } } impl Extend for Repr { #[inline] fn extend>(&mut self, iter: T) { let iter = iter.into_iter(); let (lower_bound, _) = iter.size_hint(); if lower_bound > 0 { // Ignore the error and hope that the lower_bound is incorrect. let _: Result<(), ReserveError> = self.reserve(lower_bound); } for c in iter { self.push_str(c.encode_utf8(&mut [0; 4])); } } } impl<'a> Extend<&'a char> for Repr { fn extend>(&mut self, iter: T) { self.extend(iter.into_iter().copied()); } } impl<'a> Extend<&'a str> for Repr { fn extend>(&mut self, iter: T) { iter.into_iter().for_each(|s| self.push_str(s)); } } impl Extend> for Repr { fn extend>>(&mut self, iter: T) { iter.into_iter().for_each(move |s| self.push_str(&s)); } } impl<'a> Extend> for Repr { fn extend>>(&mut self, iter: T) { iter.into_iter().for_each(move |s| self.push_str(&s)); } } impl Extend for Repr { fn extend>(&mut self, iter: T) { iter.into_iter().for_each(move |s| self.push_str(&s)); } } /// Returns the supplied value, and ensures that the value is eagerly loaded into a register. #[inline(always)] fn ensure_read(value: usize) -> usize { // SAFETY: This assembly instruction is a noop that only affects the instruction ordering. // // TODO(parkmycar): Re-add loongarch and riscv once we have CI coverage for them. #[cfg(all( not(miri), any( target_arch = "x86", target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64", ) ))] unsafe { core::arch::asm!( "/* {value} */", value = in(reg) value, options(nomem, nostack), ); }; value } #[cfg(test)] mod tests { use alloc::string::{ String, ToString, }; use alloc::vec::Vec; use quickcheck_macros::quickcheck; use test_case::test_case; use super::{ Repr, MAX_SIZE, }; use crate::ReserveError; const EIGHTEEN_MB: usize = 18 * 1024 * 1024; const EIGHTEEN_MB_STR: &str = unsafe { core::str::from_utf8_unchecked(&[42; EIGHTEEN_MB]) }; #[test_case("hello world!"; "inline")] #[test_case("this is a long string that should be stored on the heap"; "heap")] fn test_create(s: &'static str) { let repr = Repr::new(s).unwrap(); assert_eq!(repr.as_str(), s); assert_eq!(repr.len(), s.len()); // test StaticStr variant let repr = Repr::const_new(s); assert_eq!(repr.as_str(), s); assert_eq!(repr.len(), s.len()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_create(s: String) { let repr = Repr::new(&s).unwrap(); assert_eq!(repr.as_str(), s); assert_eq!(repr.len(), s.len()); } #[test_case(0; "empty")] #[test_case(10; "short")] #[test_case(64; "long")] #[test_case(EIGHTEEN_MB; "huge")] fn test_with_capacity(cap: usize) { let r = Repr::with_capacity(cap).unwrap(); assert!(r.capacity() >= MAX_SIZE); assert_eq!(r.len(), 0); } #[test_case(""; "empty")] #[test_case("abc"; "short")] #[test_case("hello world! I am a longer string ๐Ÿฆ€"; "long")] fn test_from_utf8_valid(s: &'static str) { let bytes = s.as_bytes(); let r = Repr::from_utf8(bytes).expect("valid UTF-8"); assert_eq!(r.as_str(), s); assert_eq!(r.len(), s.len()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_from_utf8(buf: Vec) { match (core::str::from_utf8(&buf), Repr::from_utf8(&buf)) { (Ok(s), Ok(r)) => { assert_eq!(r.as_str(), s); assert_eq!(r.len(), s.len()); } (Err(e), Err(r)) => assert_eq!(e, r), _ => panic!("core::str and Repr differ on what is valid UTF-8!"), } } #[test_case(String::new(), true; "empty should inline")] #[test_case(String::new(), false; "empty not inline")] #[test_case(String::with_capacity(10), true ; "empty with small capacity inline")] #[test_case(String::with_capacity(10), false ; "empty with small capacity not inline")] #[test_case(String::with_capacity(128), true ; "empty with large capacity inline")] #[test_case(String::with_capacity(128), false ; "empty with large capacity not inline")] #[test_case(String::from("nyc ๐Ÿ—ฝ"), true; "short should inline")] #[test_case(String::from("nyc ๐Ÿ—ฝ"), false ; "short not inline")] #[test_case(String::from("this is a really long string, which is intended"), true; "long")] #[test_case(String::from("this is a really long string, which is intended"), false; "long not inline")] #[test_case(EIGHTEEN_MB_STR.to_string(), true ; "huge should inline")] #[test_case(EIGHTEEN_MB_STR.to_string(), false ; "huge not inline")] fn test_from_string(s: String, try_to_inline: bool) { // note: when cloning a String it truncates capacity, which is why we measure these values // before cloning the string let s_len = s.len(); let s_cap = s.capacity(); let s_str = s.clone(); let r = Repr::from_string(s, try_to_inline).unwrap(); assert_eq!(r.len(), s_len); assert_eq!(r.as_str(), s_str.as_str()); if s_cap == 0 || (try_to_inline && s_len <= MAX_SIZE) { // we should inline the string, if we were asked to, and the length of the string would // fit inline, meaning we would truncate capacity assert!(!r.is_heap_allocated()); } else { assert!(r.is_heap_allocated()); } } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_from_string(s: String, try_to_inline: bool) { let r = Repr::from_string(s.clone(), try_to_inline).unwrap(); assert_eq!(r.len(), s.len()); assert_eq!(r.as_str(), s.as_str()); if s.capacity() == 0 { // we should always inline the string, if the length of the source string is 0 assert!(!r.is_heap_allocated()); } else if s.capacity() <= MAX_SIZE { // we should inline the string, if we were asked to assert_eq!(!r.is_heap_allocated(), try_to_inline); } else { assert!(r.is_heap_allocated()); } } #[test_case(""; "empty")] #[test_case("nyc ๐Ÿ—ฝ"; "short")] #[test_case("this is a really long string, which is intended"; "long")] fn test_into_string(control: &'static str) { let r = Repr::new(control).unwrap(); let s = r.into_string(); assert_eq!(control.len(), s.len()); assert_eq!(control, s.as_str()); // test StaticStr variant let r = Repr::const_new(control); let s = r.into_string(); assert_eq!(control.len(), s.len()); assert_eq!(control, s.as_str()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_into_string(control: String) { let r = Repr::new(&control).unwrap(); let s = r.into_string(); assert_eq!(control.len(), s.len()); assert_eq!(control, s.as_str()); } #[test_case("", "a", false; "empty")] #[test_case("", "๐Ÿ—ฝ", false; "empty_emoji")] #[test_case("abc", "๐Ÿ—ฝ๐Ÿ™‚๐Ÿฆ€๐ŸŒˆ๐Ÿ‘๐Ÿถ", true; "inline_to_heap")] #[test_case("i am a long string that will be on the heap", "extra", true; "heap_to_heap")] fn test_push_str(control: &'static str, append: &'static str, is_heap: bool) { let mut r = Repr::new(control).unwrap(); let mut c = String::from(control); r.push_str(append); c.push_str(append); assert_eq!(r.as_str(), c.as_str()); assert_eq!(r.len(), c.len()); assert_eq!(r.is_heap_allocated(), is_heap); // test StaticStr variant let mut r = Repr::const_new(control); let mut c = String::from(control); r.push_str(append); c.push_str(append); assert_eq!(r.as_str(), c.as_str()); assert_eq!(r.len(), c.len()); assert_eq!(r.is_heap_allocated(), is_heap); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_push_str(control: String, append: String) { let mut r = Repr::new(&control).unwrap(); let mut c = control; r.push_str(&append); c.push_str(&append); assert_eq!(r.as_str(), c.as_str()); assert_eq!(r.len(), c.len()); } #[test_case(&[42; 0], &[42; EIGHTEEN_MB]; "empty_to_heap_capacity")] #[test_case(&[42; 8], &[42; EIGHTEEN_MB]; "inline_to_heap_capacity")] #[test_case(&[42; 128], &[42; EIGHTEEN_MB]; "heap_inline_to_heap_capacity")] #[test_case(&[42; EIGHTEEN_MB], &[42; 64]; "heap_capacity_to_heap_capacity")] fn test_push_str_from_buf(buf: &[u8], append: &[u8]) { // The goal of this test is to exercise the scenario when our capacity is stored on the heap let control = unsafe { core::str::from_utf8_unchecked(buf) }; let append = unsafe { core::str::from_utf8_unchecked(append) }; let mut r = Repr::new(control).unwrap(); let mut c = String::from(control); r.push_str(append); c.push_str(append); assert_eq!(r.as_str(), c.as_str()); assert_eq!(r.len(), c.len()); assert!(r.is_heap_allocated()); } #[test_case("", 0, false; "empty_zero")] #[test_case("", 10, false; "empty_small")] #[test_case("", 64, true; "empty_large")] #[test_case("abc", 0, false; "short_zero")] #[test_case("abc", 8, false; "short_small")] #[test_case("abc", 64, true; "short_large")] #[test_case("I am a long string that will be on the heap", 0, true; "large_zero")] #[test_case("I am a long string that will be on the heap", 10, true; "large_small")] #[test_case("I am a long string that will be on the heap", EIGHTEEN_MB, true; "large_huge")] fn test_reserve(initial: &'static str, additional: usize, is_heap: bool) { let mut r = Repr::new(initial).unwrap(); r.reserve(additional).unwrap(); assert!(r.capacity() >= initial.len() + additional); assert_eq!(r.is_heap_allocated(), is_heap); // Test static_str variant let mut r = Repr::const_new(initial); r.reserve(additional).unwrap(); assert!(r.capacity() >= initial.len() + additional); assert_eq!(r.is_heap_allocated(), is_heap); } #[test] fn test_reserve_overflow() { let mut r = Repr::new("abc").unwrap(); let err = r.reserve(usize::MAX).unwrap_err(); assert_eq!(err, ReserveError(())); } #[test_case(""; "empty")] #[test_case("abc"; "short")] #[test_case("i am a longer string that will be on the heap"; "long")] #[test_case(EIGHTEEN_MB_STR; "huge")] fn test_clone(initial: &'static str) { let r_a = Repr::new(initial).unwrap(); let r_b = r_a.clone(); assert_eq!(r_a.as_str(), initial); assert_eq!(r_a.len(), initial.len()); assert_eq!(r_a.as_str(), r_b.as_str()); assert_eq!(r_a.len(), r_b.len()); assert_eq!(r_a.capacity(), r_b.capacity()); assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated()); // test StaticStr variant let r_a = Repr::const_new(initial); let r_b = r_a.clone(); assert_eq!(r_a.as_str(), initial); assert_eq!(r_a.len(), initial.len()); assert_eq!(r_a.as_str(), r_b.as_str()); assert_eq!(r_a.len(), r_b.len()); assert_eq!(r_a.capacity(), r_b.capacity()); assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated()); } #[test_case(Repr::const_new(""), Repr::const_new(""); "empty clone from static")] #[test_case(Repr::const_new("abc"), Repr::const_new("efg"); "short clone from static")] #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::const_new(EIGHTEEN_MB_STR); "long clone from static")] #[test_case(Repr::const_new(""), Repr::const_new(""); "empty clone from inline")] #[test_case(Repr::const_new("abc"), Repr::const_new("efg"); "short clone from inline")] #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::const_new("small"); "long clone from inline")] #[test_case(Repr::const_new(""), Repr::new(EIGHTEEN_MB_STR).unwrap(); "empty clone from heap")] #[test_case(Repr::const_new("abc"), Repr::new(EIGHTEEN_MB_STR).unwrap(); "short clone from heap")] #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::new(EIGHTEEN_MB_STR).unwrap(); "long clone from heap")] fn test_clone_from(mut initial: Repr, source: Repr) { initial.clone_from(&source); assert_eq!(initial.as_str(), source.as_str()); assert_eq!(initial.is_heap_allocated(), source.is_heap_allocated()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_clone(initial: String) { let r_a = Repr::new(&initial).unwrap(); let r_b = r_a.clone(); assert_eq!(r_a.as_str(), initial); assert_eq!(r_a.len(), initial.len()); assert_eq!(r_a.as_str(), r_b.as_str()); assert_eq!(r_a.len(), r_b.len()); assert_eq!(r_a.capacity(), r_b.capacity()); assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated()); } } compact_str-0.8.0/src/repr/num.rs000064400000000000000000000402731046102023000150610ustar 00000000000000//! Implementations for efficiently converting a number into a [`Repr`] //! //! Adapted from the implementation in the `std` library at //! use core::{ mem, num, ptr, }; use super::traits::IntoRepr; use super::Repr; use crate::{ ToCompactStringError, UnwrapWithMsg, }; const DEC_DIGITS_LUT: &[u8] = b"\ 0001020304050607080910111213141516171819\ 2021222324252627282930313233343536373839\ 4041424344454647484950515253545556575859\ 6061626364656667686970717273747576777879\ 8081828384858687888990919293949596979899"; /// Defines the implementation of [`IntoRepr`] for integer types macro_rules! impl_IntoRepr { ($t:ident, $conv_ty:ident) => { impl IntoRepr for $t { fn into_repr(self) -> Result { // Determine the number of digits in this value // // Note: this considers the `-` symbol let num_digits = NumChars::num_chars(self); let mut repr = Repr::with_capacity(num_digits).unwrap_with_msg(); #[allow(unused_comparisons)] let is_nonnegative = self >= 0; let mut n = if is_nonnegative { self as $conv_ty } else { // convert the negative num to positive by summing 1 to it's 2 complement (!(self as $conv_ty)).wrapping_add(1) }; let mut curr = num_digits as isize; // our string will end up being num_digits long unsafe { repr.set_len(num_digits) }; // get mutable pointer to our buffer let buf_ptr = unsafe { repr.as_mut_buf().as_mut_ptr() }; let lut_ptr = DEC_DIGITS_LUT.as_ptr(); unsafe { // need at least 16 bits for the 4-characters-at-a-time to work. if mem::size_of::<$t>() >= 2 { // eagerly decode 4 characters at a time while n >= 10000 { let rem = (n % 10000) as isize; n /= 10000; let d1 = (rem / 100) << 1; let d2 = (rem % 100) << 1; curr -= 4; ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); ptr::copy_nonoverlapping( lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2, ); } } // if we reach here numbers are <= 9999, so at most 4 chars long let mut n = n as isize; // possibly reduce 64bit math // decode 2 more chars, if > 2 chars if n >= 100 { let d1 = (n % 100) << 1; n /= 100; curr -= 2; ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); } // decode last 1 or 2 chars if n < 10 { curr -= 1; *buf_ptr.offset(curr) = (n as u8) + b'0'; } else { let d1 = n << 1; curr -= 2; ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); } if !is_nonnegative { curr -= 1; *buf_ptr.offset(curr) = b'-'; } } // we should have moved all the way down our buffer debug_assert_eq!(curr, 0); Ok(repr) } } }; } impl_IntoRepr!(u8, u32); impl_IntoRepr!(i8, u32); impl_IntoRepr!(u16, u32); impl_IntoRepr!(i16, u32); impl_IntoRepr!(u32, u32); impl_IntoRepr!(i32, u32); impl_IntoRepr!(u64, u64); impl_IntoRepr!(i64, u64); #[cfg(target_pointer_width = "32")] impl_IntoRepr!(usize, u32); #[cfg(target_pointer_width = "32")] impl_IntoRepr!(isize, u32); #[cfg(target_pointer_width = "64")] impl_IntoRepr!(usize, u64); #[cfg(target_pointer_width = "64")] impl_IntoRepr!(isize, u64); /// For 128-bit integer types we use the [`itoa`] crate because writing into a buffer, and then /// copying the amount of characters we've written, is faster than determining the number of /// characters and then writing. impl IntoRepr for u128 { #[inline] fn into_repr(self) -> Result { let mut buffer = itoa::Buffer::new(); Ok(Repr::new(buffer.format(self))?) } } impl IntoRepr for i128 { #[inline] fn into_repr(self) -> Result { let mut buffer = itoa::Buffer::new(); Ok(Repr::new(buffer.format(self))?) } } /// Defines the implementation of [`IntoRepr`] for NonZero integer types macro_rules! impl_NonZero_IntoRepr { ($t:path) => { impl IntoRepr for $t { #[inline] fn into_repr(self) -> Result { self.get().into_repr() } } }; } impl_NonZero_IntoRepr!(num::NonZeroU8); impl_NonZero_IntoRepr!(num::NonZeroI8); impl_NonZero_IntoRepr!(num::NonZeroU16); impl_NonZero_IntoRepr!(num::NonZeroI16); impl_NonZero_IntoRepr!(num::NonZeroU32); impl_NonZero_IntoRepr!(num::NonZeroI32); impl_NonZero_IntoRepr!(num::NonZeroU64); impl_NonZero_IntoRepr!(num::NonZeroI64); impl_NonZero_IntoRepr!(num::NonZeroUsize); impl_NonZero_IntoRepr!(num::NonZeroIsize); impl_NonZero_IntoRepr!(num::NonZeroU128); impl_NonZero_IntoRepr!(num::NonZeroI128); /// All of these `num_chars(...)` methods are kind of crazy, but they are necessary. /// /// An alternate way to calculate the number of digits in a value is to do: /// ``` /// let val = 42; /// let num_digits = ((val as f32).log10().floor()) as usize + 1; /// assert_eq!(num_digits, 2); /// ``` /// But there are two problems with this approach: /// 1. floating point math is slow /// 2. results are dependent on floating point precision, which is too inaccurate for larger values /// /// For example, consider this relatively large value... /// /// ``` /// let val = 9999995; /// let num_digits = ((val as f32).log10().floor()) as usize + 1; /// /// // this is wrong! There are only 7 digits in this number! /// assert_eq!(num_digits, 8); /// ``` /// /// you can use `f64` to get better precision, e.g. /// /// ``` /// let val = 9999995; /// let num_digits = ((val as f64).log10().floor()) as usize + 1; /// /// // the precision is enough to get the correct value /// assert_eq!(num_digits, 7); /// ``` /// /// ...but still not precise enough! /// /// ``` /// let val: u64 = 9999999999999999999; /// let num_digits = ((val as f64).log10().floor()) as usize + 1; /// /// // this is wrong! the number is only 19 digits but the formula returns 20 /// assert_eq!(num_digits, 20); /// ``` trait NumChars { fn num_chars(val: Self) -> usize; } impl NumChars for u8 { #[inline(always)] fn num_chars(val: u8) -> usize { match val { u8::MIN..=9 => 1, 10..=99 => 2, 100..=u8::MAX => 3, } } } impl NumChars for i8 { #[inline(always)] fn num_chars(val: i8) -> usize { match val { i8::MIN..=-100 => 4, -99..=-10 => 3, -9..=-1 => 2, 0..=9 => 1, 10..=99 => 2, 100..=i8::MAX => 3, } } } impl NumChars for u16 { #[inline(always)] fn num_chars(val: u16) -> usize { match val { u16::MIN..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=u16::MAX => 5, } } } impl NumChars for i16 { #[inline(always)] fn num_chars(val: i16) -> usize { match val { i16::MIN..=-10000 => 6, -9999..=-1000 => 5, -999..=-100 => 4, -99..=-10 => 3, -9..=-1 => 2, 0..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=i16::MAX => 5, } } } impl NumChars for u32 { #[inline(always)] fn num_chars(val: u32) -> usize { match val { u32::MIN..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=99999 => 5, 100000..=999999 => 6, 1000000..=9999999 => 7, 10000000..=99999999 => 8, 100000000..=999999999 => 9, 1000000000..=u32::MAX => 10, } } } impl NumChars for i32 { #[inline(always)] fn num_chars(val: i32) -> usize { match val { i32::MIN..=-1000000000 => 11, -999999999..=-100000000 => 10, -99999999..=-10000000 => 9, -9999999..=-1000000 => 8, -999999..=-100000 => 7, -99999..=-10000 => 6, -9999..=-1000 => 5, -999..=-100 => 4, -99..=-10 => 3, -9..=-1 => 2, 0..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=99999 => 5, 100000..=999999 => 6, 1000000..=9999999 => 7, 10000000..=99999999 => 8, 100000000..=999999999 => 9, 1000000000..=i32::MAX => 10, } } } impl NumChars for u64 { #[inline(always)] fn num_chars(val: u64) -> usize { match val { u64::MIN..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=99999 => 5, 100000..=999999 => 6, 1000000..=9999999 => 7, 10000000..=99999999 => 8, 100000000..=999999999 => 9, 1000000000..=9999999999 => 10, 10000000000..=99999999999 => 11, 100000000000..=999999999999 => 12, 1000000000000..=9999999999999 => 13, 10000000000000..=99999999999999 => 14, 100000000000000..=999999999999999 => 15, 1000000000000000..=9999999999999999 => 16, 10000000000000000..=99999999999999999 => 17, 100000000000000000..=999999999999999999 => 18, 1000000000000000000..=9999999999999999999 => 19, 10000000000000000000..=u64::MAX => 20, } } } impl NumChars for i64 { #[inline(always)] fn num_chars(val: i64) -> usize { match val { i64::MIN..=-1000000000000000000 => 20, -999999999999999999..=-100000000000000000 => 19, -99999999999999999..=-10000000000000000 => 18, -9999999999999999..=-1000000000000000 => 17, -999999999999999..=-100000000000000 => 16, -99999999999999..=-10000000000000 => 15, -9999999999999..=-1000000000000 => 14, -999999999999..=-100000000000 => 13, -99999999999..=-10000000000 => 12, -9999999999..=-1000000000 => 11, -999999999..=-100000000 => 10, -99999999..=-10000000 => 9, -9999999..=-1000000 => 8, -999999..=-100000 => 7, -99999..=-10000 => 6, -9999..=-1000 => 5, -999..=-100 => 4, -99..=-10 => 3, -9..=-1 => 2, 0..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=99999 => 5, 100000..=999999 => 6, 1000000..=9999999 => 7, 10000000..=99999999 => 8, 100000000..=999999999 => 9, 1000000000..=9999999999 => 10, 10000000000..=99999999999 => 11, 100000000000..=999999999999 => 12, 1000000000000..=9999999999999 => 13, 10000000000000..=99999999999999 => 14, 100000000000000..=999999999999999 => 15, 1000000000000000..=9999999999999999 => 16, 10000000000000000..=99999999999999999 => 17, 100000000000000000..=999999999999999999 => 18, 1000000000000000000..=i64::MAX => 19, } } } impl NumChars for usize { fn num_chars(val: usize) -> usize { #[cfg(target_pointer_width = "32")] { u32::num_chars(val as u32) } #[cfg(target_pointer_width = "64")] { u64::num_chars(val as u64) } } } impl NumChars for isize { fn num_chars(val: isize) -> usize { #[cfg(target_pointer_width = "32")] { i32::num_chars(val as i32) } #[cfg(target_pointer_width = "64")] { i64::num_chars(val as i64) } } } #[cfg(test)] mod tests { use alloc::string::ToString; use super::IntoRepr; #[test] fn test_from_u8_sanity() { let vals = [u8::MIN, u8::MIN + 1, 0, 42, u8::MAX - 1, u8::MAX]; for x in &vals { let repr = u8::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_i8_sanity() { let vals = [i8::MIN, i8::MIN + 1, 0, 42, i8::MAX - 1, i8::MAX]; for x in &vals { let repr = i8::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_u16_sanity() { let vals = [u16::MIN, u16::MIN + 1, 0, 42, u16::MAX - 1, u16::MAX]; for x in &vals { let repr = u16::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_i16_sanity() { let vals = [i16::MIN, i16::MIN + 1, 0, 42, i16::MAX - 1, i16::MAX]; for x in &vals { let repr = i16::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_u32_sanity() { let vals = [u32::MIN, u32::MIN + 1, 0, 42, u32::MAX - 1, u32::MAX]; for x in &vals { let repr = u32::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_i32_sanity() { let vals = [i32::MIN, i32::MIN + 1, 0, 42, i32::MAX - 1, i32::MAX]; for x in &vals { let repr = i32::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_u64_sanity() { let vals = [u64::MIN, u64::MIN + 1, 0, 42, u64::MAX - 1, u64::MAX]; for x in &vals { let repr = u64::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_i64_sanity() { let vals = [i64::MIN, i64::MIN + 1, 0, 42, i64::MAX - 1, i64::MAX]; for x in &vals { let repr = i64::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_usize_sanity() { let vals = [ usize::MIN, usize::MIN + 1, 0, 42, usize::MAX - 1, usize::MAX, ]; for x in &vals { let repr = usize::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_isize_sanity() { let vals = [ isize::MIN, isize::MIN + 1, 0, 42, isize::MAX - 1, isize::MAX, ]; for x in &vals { let repr = isize::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_u128_sanity() { let vals = [u128::MIN, u128::MIN + 1, 0, 42, u128::MAX - 1, u128::MAX]; for x in &vals { let repr = u128::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } #[test] fn test_from_i128_sanity() { let vals = [i128::MIN, i128::MIN + 1, 0, 42, i128::MAX - 1, i128::MAX]; for x in &vals { let repr = i128::into_repr(*x).unwrap(); assert_eq!(repr.as_str(), x.to_string()); } } } compact_str-0.8.0/src/repr/smallvec.rs000064400000000000000000000031611046102023000160630ustar 00000000000000use smallvec::SmallVec; use super::{ Repr, MAX_SIZE, }; impl Repr { /// Consumes the [`Repr`] returning a byte vector in a [`SmallVec`] /// /// Note: both for the inlined case and the heap case, the buffers are re-used #[inline] pub fn into_bytes(self) -> SmallVec<[u8; MAX_SIZE]> { if let Some(s) = self.as_static_str() { SmallVec::from(s.as_bytes()) } else if self.is_heap_allocated() { let string = self.into_string(); let bytes = string.into_bytes(); SmallVec::from_vec(bytes) } else { // SAFETY: We just checked the discriminant to make sure we're an InlineBuffer let inline = unsafe { self.into_inline() }; let (array, length) = inline.into_array(); SmallVec::from_buf_and_len(array, length) } } } #[cfg(test)] mod tests { use test_case::test_case; use crate::CompactString; #[test_case("" ; "empty")] #[test_case("abc" ; "short")] #[test_case("I am a long string ๐Ÿ˜Š๐Ÿ˜Š๐Ÿ˜Š๐Ÿ˜Š๐Ÿ˜Š" ; "long")] fn proptest_roundtrip(s: &'static str) { let og_compact = CompactString::from(s); assert_eq!(og_compact, s); let bytes = og_compact.into_bytes(); let ex_compact = CompactString::from_utf8(bytes).unwrap(); assert_eq!(ex_compact, s); // test `StaticStr` variant let og_compact = CompactString::const_new(s); assert_eq!(og_compact, s); let bytes = og_compact.into_bytes(); let ex_compact = CompactString::from_utf8(bytes).unwrap(); assert_eq!(ex_compact, s); } } compact_str-0.8.0/src/repr/static_str.rs000064400000000000000000000031621046102023000164350ustar 00000000000000use core::{ mem, ptr, slice, str, }; use super::{ Repr, MAX_SIZE, STATIC_STR_MASK, }; pub(super) const DISCRIMINANT_SIZE: usize = MAX_SIZE - mem::size_of::<&'static str>(); /// A buffer stored on the stack whose size is equal to the stack size of `String` /// The last byte is set to 0. #[derive(Copy, Clone)] #[repr(C)] pub struct StaticStr { ptr: ptr::NonNull, len: usize, #[allow(unused)] discriminant: [u8; DISCRIMINANT_SIZE], } static_assertions::assert_eq_size!(StaticStr, Repr); static_assertions::assert_eq_align!(StaticStr, Repr); static_assertions::assert_eq_size!(&'static str, (*const u8, usize)); impl StaticStr { #[inline] pub const fn new(text: &'static str) -> Self { let mut discriminant = [0; DISCRIMINANT_SIZE]; discriminant[DISCRIMINANT_SIZE - 1] = STATIC_STR_MASK; Self { // SAFETY: `&'static str` must have a non-null, properly aligned // address ptr: unsafe { ptr::NonNull::new_unchecked(text.as_ptr() as *mut _) }, len: text.len(), discriminant, } } #[rustversion::attr(since(1.64), const)] pub(super) fn get_text(&self) -> &'static str { // SAFETY: `StaticStr` invariants requires it to be a valid str unsafe { str::from_utf8_unchecked(slice::from_raw_parts(self.ptr.as_ptr(), self.len)) } } /// # Safety /// * `len` bytes in the buffer must be valid UTF-8 and /// * `len` must be <= `self.get_text().len()` pub(super) unsafe fn set_len(&mut self, len: usize) { *self = Self::new(&self.get_text()[..len]); } } compact_str-0.8.0/src/repr/traits.rs000064400000000000000000000110131046102023000155560ustar 00000000000000use core::hint::unreachable_unchecked; use super::Repr; use crate::ToCompactStringError; const FALSE: Repr = Repr::const_new("false"); const TRUE: Repr = Repr::const_new("true"); /// Defines how to _efficiently_ create a [`Repr`] from `self` pub(crate) trait IntoRepr { fn into_repr(self) -> Result; } impl IntoRepr for f32 { #[inline] fn into_repr(self) -> Result { let mut buf = ryu::Buffer::new(); let s = buf.format(self); Ok(Repr::new(s)?) } } impl IntoRepr for f64 { #[inline] fn into_repr(self) -> Result { let mut buf = ryu::Buffer::new(); let s = buf.format(self); Ok(Repr::new(s)?) } } impl IntoRepr for bool { #[inline] fn into_repr(self) -> Result { if self { Ok(TRUE) } else { Ok(FALSE) } } } impl IntoRepr for char { #[inline] fn into_repr(self) -> Result { let mut buf = [0_u8; 4]; let s = self.encode_utf8(&mut buf); // This match is just a hint for the compiler. match s.len() { 1..=4 => (), // SAFETY: a UTF-8 character is 1 to 4 bytes. _ => unsafe { unreachable_unchecked() }, } Ok(Repr::new(s)?) } } #[cfg(test)] mod tests { use alloc::string::ToString; use quickcheck_macros::quickcheck; use super::IntoRepr; #[test] fn test_into_repr_bool() { let t = true; let repr = t.into_repr().unwrap(); assert_eq!(repr.as_str(), t.to_string()); let f = false; let repr = f.into_repr().unwrap(); assert_eq!(repr.as_str(), f.to_string()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_into_repr_char(val: char) { let repr = char::into_repr(val).unwrap(); assert_eq!(repr.as_str(), val.to_string()); } #[test] fn test_into_repr_f64_sanity() { let vals = [ f64::MIN, f64::MIN_POSITIVE, f64::MAX, f64::NEG_INFINITY, f64::INFINITY, ]; for x in &vals { let repr = f64::into_repr(*x).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); assert_eq!(*x, roundtrip); } } #[test] fn test_into_repr_f64_nan() { let repr = f64::into_repr(f64::NAN).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); assert!(roundtrip.is_nan()); } #[quickcheck] #[cfg_attr(miri, ignore)] fn quickcheck_into_repr_f64(val: f64) { let repr = f64::into_repr(val).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); // Note: The formatting of floats by `ryu` sometimes differs from that of `std`, so instead // of asserting equality with `std` we just make sure the value roundtrips if val.is_nan() != roundtrip.is_nan() { assert_eq!(val, roundtrip); } } // `f32` formatting is broken on powerpc64le, not only in `ryu` but also `std` // // See: https://github.com/rust-lang/rust/issues/96306 #[test] #[cfg_attr(all(target_arch = "powerpc64", target_pointer_width = "64"), ignore)] fn test_into_repr_f32_sanity() { let vals = [ f32::MIN, f32::MIN_POSITIVE, f32::MAX, f32::NEG_INFINITY, f32::INFINITY, ]; for x in &vals { let repr = f32::into_repr(*x).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); assert_eq!(*x, roundtrip); } } #[test] #[cfg_attr(all(target_arch = "powerpc64", target_pointer_width = "64"), ignore)] fn test_into_repr_f32_nan() { let repr = f32::into_repr(f32::NAN).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); assert!(roundtrip.is_nan()); } #[quickcheck] #[cfg_attr(all(target_arch = "powerpc64", target_pointer_width = "64"), ignore)] fn proptest_into_repr_f32(val: f32) { let repr = f32::into_repr(val).unwrap(); let roundtrip = repr.as_str().parse::().unwrap(); // Note: The formatting of floats by `ryu` sometimes differs from that of `std`, so instead // of asserting equality with `std` we just make sure the value roundtrips if val.is_nan() != roundtrip.is_nan() { assert_eq!(val, roundtrip); } } } compact_str-0.8.0/src/tests.rs000064400000000000000000001607531046102023000144620ustar 00000000000000use alloc::borrow::Cow; use alloc::boxed::Box; use alloc::string::{ String, ToString, }; use alloc::vec::Vec; use core::str::FromStr; use core::{ num, slice, }; use proptest::collection::SizeRange; use proptest::prelude::*; use test_case::test_case; use test_strategy::proptest; use crate::{ format_compact, CompactString, ToCompactString, }; #[cfg(target_pointer_width = "64")] const MAX_SIZE: usize = 24; #[cfg(target_pointer_width = "32")] const MAX_SIZE: usize = 12; const SIXTEEN_MB: usize = 16 * 1024 * 1024; /// generates random unicode strings, upto 80 chars long pub fn rand_unicode() -> impl Strategy { proptest::collection::vec(proptest::char::any(), 0..80).prop_map(|v| v.into_iter().collect()) } /// generates a random collection of bytes, upto 80 bytes long pub fn rand_bytes() -> impl Strategy> { proptest::collection::vec(any::(), 0..80) } /// generates a random collection of `u16`s, upto 80 elements long pub fn rand_u16s() -> impl Strategy> { proptest::collection::vec(any::(), 0..80) } /// [`proptest::strategy::Strategy`] that generates [`String`]s with up to `len` bytes pub fn rand_unicode_with_range(range: impl Into) -> impl Strategy { proptest::collection::vec(proptest::char::any(), range).prop_map(|v| v.into_iter().collect()) } /// generates groups upto 40 strings long of random unicode strings, upto 80 chars long fn rand_unicode_collection() -> impl Strategy> { proptest::collection::vec(rand_unicode(), 0..40) } /// Asserts a [`CompactString`] is allocated properly fn assert_allocated_properly(compact: &CompactString) { if compact.len() <= MAX_SIZE { assert!(!compact.is_heap_allocated()) } else { assert!(compact.is_heap_allocated()) } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_strings_roundtrip(#[strategy(rand_unicode())] word: String) { let compact = CompactString::new(&word); prop_assert_eq!(&word, &compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_strings_allocated_properly(#[strategy(rand_unicode())] word: String) { let compact = CompactString::new(word); assert_allocated_properly(&compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_char_iterator_roundtrips(#[strategy(rand_unicode())] word: String) { let compact: CompactString = word.clone().chars().collect(); prop_assert_eq!(&word, &compact) } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_string_iterator_roundtrips( #[strategy(rand_unicode_collection())] collection: Vec, ) { let compact: CompactString = collection.clone().into_iter().collect(); let word: String = collection.into_iter().collect(); prop_assert_eq!(&word, &compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_bytes_roundtrips(#[strategy(rand_unicode())] word: String) { let bytes = word.into_bytes(); let compact = CompactString::from_utf8(&bytes).unwrap(); let word = String::from_utf8(bytes).unwrap(); prop_assert_eq!(compact, word); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_bytes_only_valid_utf8(#[strategy(rand_bytes())] bytes: Vec) { let compact_result = CompactString::from_utf8(&bytes); let word_result = String::from_utf8(bytes); match (compact_result, word_result) { (Ok(c), Ok(s)) => prop_assert_eq!(c, s), (Err(c_err), Err(s_err)) => prop_assert_eq!(c_err, s_err.utf8_error()), _ => panic!("CompactString and core::str read UTF-8 differently?"), } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_lossy_cow_roundtrips(#[strategy(rand_bytes())] bytes: Vec) { let cow = String::from_utf8_lossy(&bytes[..]); let compact = CompactString::from(cow.clone()); prop_assert_eq!(cow, compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_reserve_and_write_bytes(#[strategy(rand_unicode())] word: String) { let mut compact = CompactString::default(); prop_assert!(compact.is_empty()); // reserve enough space to write our bytes compact.reserve(word.len()); // SAFETY: We're writing a String which we know is UTF-8 let slice = unsafe { compact.as_mut_bytes() }; slice[..word.len()].copy_from_slice(word.as_bytes()); // SAFETY: We know this is the length of our string, since `compact` started with 0 bytes // and we just wrote `word.len()` bytes unsafe { compact.set_len(word.len()) } prop_assert_eq!(&word, &compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_reserve_and_write_bytes_allocated_properly(#[strategy(rand_unicode())] word: String) { let mut compact = CompactString::default(); prop_assert!(compact.is_empty()); // reserve enough space to write our bytes compact.reserve(word.len()); // SAFETY: We're writing a String which we know is UTF-8 let slice = unsafe { compact.as_mut_bytes() }; slice[..word.len()].copy_from_slice(word.as_bytes()); // SAFETY: We know this is the length of our string, since `compact` started with 0 bytes // and we just wrote `word.len()` bytes unsafe { compact.set_len(word.len()) } prop_assert_eq!(compact.len(), word.len()); // The string should be heap allocated if `word` was > MAX_SIZE // // NOTE: The reserve and write API's don't currently support the Packed representation prop_assert_eq!(compact.is_heap_allocated(), word.len() > MAX_SIZE); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_arbitrary_compact_string_converts_to_string(#[strategy(rand_unicode())] word: String) { let compact = CompactString::new(&word); let result = String::from(compact); prop_assert_eq!(result.len(), word.len()); prop_assert_eq!(result, word); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_extend_chars_allocated_properly( #[strategy(rand_unicode())] start: String, #[strategy(rand_unicode())] extend: String, ) { let mut compact = CompactString::new(&start); compact.extend(extend.chars()); let mut control = start.clone(); #[allow(clippy::string_extend_chars)] control.extend(extend.chars()); prop_assert_eq!(&compact, &control); assert_allocated_properly(&compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_truncate(#[strategy(rand_unicode())] mut control: String, val: u8) { let initial_len = control.len(); let mut compact = CompactString::new(&control); // turn the arbitrary number `val` into character indices let new_len = control .char_indices() .cycle() .nth(val as usize) .unwrap_or_default() .0; // then truncate both strings string control.truncate(new_len); compact.truncate(new_len); // assert they're equal prop_assert_eq!(&control, &compact); prop_assert_eq!(control.len(), compact.len()); // If we started as heap allocated, we should stay heap allocated. This prevents us from // needing to deallocate the buffer on the heap if initial_len > MAX_SIZE { prop_assert!(compact.is_heap_allocated()); } else { prop_assert!(!compact.is_heap_allocated()); } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf16_roundtrips(#[strategy(rand_unicode())] control: String) { let utf16_buf: Vec = control.encode_utf16().collect(); let compact = CompactString::from_utf16(utf16_buf).unwrap(); assert_eq!(compact, control); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf16_random(#[strategy(rand_u16s())] buf: Vec) { let compact = CompactString::from_utf16(&buf); let std_str = String::from_utf16(&buf); match (compact, std_str) { (Ok(c), Ok(s)) => assert_eq!(c, s), (Err(_), Err(_)) => (), (c_res, s_res) => panic!( "CompactString and String decode UTF-16 differently? {:?} {:?}", c_res, s_res ), } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf16_lossy_roundtrips(#[strategy(rand_unicode())] control: String) { let utf16_buf: Vec = control.encode_utf16().collect(); let compact = CompactString::from_utf16_lossy(utf16_buf); assert_eq!(compact, control); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf16_lossy_random(#[strategy(rand_u16s())] buf: Vec) { let control = String::from_utf16_lossy(&buf); let compact = CompactString::from_utf16_lossy(&buf); assert_eq!(compact, control); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_remove(#[strategy(rand_unicode_with_range(1..80))] mut control: String, val: u8) { let initial_len = control.len(); let mut compact = CompactString::new(&control); let idx = control .char_indices() .cycle() .nth(val as usize) .unwrap_or_default() .0; let control_char = control.remove(idx); let compact_char = compact.remove(idx); prop_assert_eq!(control_char, compact_char); prop_assert_eq!(control_char, compact_char); prop_assert_eq!(control.len(), compact.len()); // If we started as heap allocated, we should stay heap allocated. This prevents us from // needing to deallocate the buffer on the heap if initial_len > MAX_SIZE { prop_assert!(compact.is_heap_allocated()); } else { prop_assert!(!compact.is_heap_allocated()); } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf8_unchecked(#[strategy(rand_bytes())] bytes: Vec) { let compact = unsafe { CompactString::from_utf8_unchecked(&bytes) }; let std_str = unsafe { String::from_utf8_unchecked(bytes.clone()) }; // we might not make valid strings, but we should be able to read the underlying bytes assert_eq!(compact.as_bytes(), std_str.as_bytes()); assert_eq!(compact.as_bytes(), bytes); // make sure the length is correct assert_eq!(compact.len(), bytes.len()); // check if we were valid UTF-8, if so, assert the data written into the CompactString is // correct let data_is_valid = core::str::from_utf8(&bytes); let compact_is_valid = core::str::from_utf8(compact.as_bytes()); let std_str_is_valid = core::str::from_utf8(std_str.as_bytes()); match (data_is_valid, compact_is_valid, std_str_is_valid) { (Ok(d), Ok(c), Ok(s)) => { // if we get &str's back, make sure they're all equal assert_eq!(d, c); assert_eq!(c, s); } (Err(d), Err(c), Err(s)) => { // if we get errors back, the errors should be the same assert_eq!(d, c); assert_eq!(c, s); } _ => panic!("data, CompactString, and String disagreed?"), } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_ascii_uppercase(#[strategy(rand_unicode())] control: String) { let compact = CompactString::new(&control); let control = control.to_ascii_uppercase(); let compact = compact.to_ascii_uppercase(); prop_assert_eq!(control, compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_ascii_lowercase(#[strategy(rand_unicode())] control: String) { let compact = CompactString::new(&control); let control = control.to_ascii_lowercase(); let compact = compact.to_ascii_lowercase(); prop_assert_eq!(control, compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_uppercase(#[strategy(rand_unicode())] control: String) { let compact = CompactString::new(&control); let control = control.to_uppercase(); let compact = compact.to_uppercase(); prop_assert_eq!(control, compact); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_lowercase(#[strategy(rand_unicode())] control: String) { let compact = CompactString::new(&control); let control = control.to_lowercase(); let compact = compact.to_lowercase(); prop_assert_eq!(control, compact); } #[test] fn test_const_creation() { const EMPTY: CompactString = CompactString::const_new(""); const SHORT: CompactString = CompactString::const_new("rust"); const EMPTY_STATIC_STR: CompactString = CompactString::const_new(""); const SHORT_STATIC_STR: CompactString = CompactString::const_new("rust"); #[cfg(target_pointer_width = "64")] const PACKED: CompactString = CompactString::const_new("i am 24 characters long!"); #[cfg(target_pointer_width = "32")] const PACKED: CompactString = CompactString::const_new("i am 12 char"); const PACKED_STATIC_STR0: CompactString = CompactString::const_new("i am 24 characters long!"); const PACKED_STATIC_STR1: CompactString = CompactString::const_new("i am 12 char"); assert_eq!(EMPTY, CompactString::new("")); assert_eq!(SHORT, CompactString::new("rust")); assert_eq!(EMPTY_STATIC_STR, CompactString::new("")); assert_eq!(SHORT_STATIC_STR, CompactString::new("rust")); #[cfg(target_pointer_width = "64")] assert_eq!(PACKED, CompactString::new("i am 24 characters long!")); #[cfg(target_pointer_width = "32")] assert_eq!(PACKED, CompactString::new("i am 12 char")); assert_eq!( PACKED_STATIC_STR0, CompactString::new("i am 24 characters long!") ); assert_eq!(PACKED_STATIC_STR1, CompactString::new("i am 12 char")); } #[test] fn test_short_ascii() { // always inlined on all archs let strs = vec!["nyc", "statue", "liberty", "img_1234.png"]; for s in strs { let compact = CompactString::new(s); assert_eq!(compact, s); assert_eq!(s, compact); assert!(!compact.is_heap_allocated()); } } #[test] fn test_short_unicode() { let strs = vec![ ("๐Ÿฆ€", false), ("๐ŸŒงโ˜€๏ธ", false), // str is 12 bytes long, and leading character is non-ASCII ("ๅ’ฌ๐“…ˆ๊ˆ:_", false), ]; for (s, is_heap) in strs { let compact = CompactString::new(s); assert_eq!(compact, s); assert_eq!(s, compact); assert_eq!(compact.is_heap_allocated(), is_heap); } } #[test] fn test_medium_ascii() { let strs = vec![ "rustconf 2021", "new york city", "nyc pizza is good", "test the 24 char limit!!", ]; for s in strs { let compact = CompactString::new(s); assert_eq!(compact, s); assert_eq!(s, compact); #[cfg(target_pointer_width = "64")] let is_heap = false; #[cfg(target_pointer_width = "32")] let is_heap = true; assert_eq!(compact.is_heap_allocated(), is_heap); } } #[test] fn test_medium_unicode() { let strs = vec![ ("โ˜•๏ธ๐Ÿ‘€๐Ÿ˜๐ŸŽ‰", false), // str is 24 bytes long, and leading character is non-ASCII ("๐Ÿฆ€๐Ÿ˜€๐Ÿ˜ƒ๐Ÿ˜„๐Ÿ˜๐Ÿฆ€", false), ]; #[allow(unused_variables)] for (s, is_heap) in strs { let compact = CompactString::new(s); assert_eq!(compact, s); assert_eq!(s, compact); #[cfg(target_pointer_width = "32")] let is_heap = true; assert_eq!(compact.is_heap_allocated(), is_heap); } } #[test] fn test_from_str_trait() { let s = "hello_world"; // Until the never type `!` is stabilized, we have to unwrap here let c = CompactString::from_str(s).unwrap(); assert_eq!(s, c); } #[test] #[cfg_attr(target_pointer_width = "32", ignore)] fn test_from_char_iter() { let s = "\u{0} 0 \u{0}a๐€€๐€€ ๐€€a๐€€"; let compact: CompactString = s.chars().collect(); assert!(!compact.is_heap_allocated()); assert_eq!(s, compact); } #[test] #[cfg_attr(target_pointer_width = "32", ignore)] fn test_extend_packed_from_empty() { let s = " 0\u{80}A\u{0}๐€€ ๐€€ยกa๐€€0"; let mut compact = CompactString::new(s); assert!(!compact.is_heap_allocated()); // extend from an empty iterator compact.extend("".chars()); // we should still be heap allocated assert!(!compact.is_heap_allocated()); } #[test_case(CompactString::from(""); "inline")] #[test_case(CompactString::const_new(""); "static_str")] fn test_pop_empty(mut compact: CompactString) { let num_pops = 256; (0..num_pops).for_each(|_| { let ch = compact.pop(); assert!(ch.is_none()); }); assert!(compact.is_empty()); assert_eq!(compact, ""); } #[test] fn test_extend_from_empty_strs() { let strs = vec![ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ]; let compact: CompactString = strs.clone().into_iter().collect(); assert_eq!(compact, ""); assert!(compact.is_empty()); assert!(!compact.is_heap_allocated()); } #[test] fn test_compact_str_is_send_and_sync() { fn is_send_and_sync() {} is_send_and_sync::(); } #[test_case(CompactString::default(); "inline")] #[test_case(CompactString::const_new(""); "static_str")] fn test_fmt_write(mut compact: CompactString) { use core::fmt::Write; write!(compact, "test").unwrap(); assert_eq!(compact, "test"); writeln!(compact, "{}", 1234).unwrap(); assert_eq!(compact, "test1234\n"); #[allow(clippy::write_literal)] write!(compact, "{:>8} {} {:<8}", "some", "more", "words").unwrap(); assert_eq!(compact, "test1234\n some more words "); } #[allow(clippy::unnecessary_to_owned, clippy::op_ref)] #[test] fn test_plus_operator() { // + &CompactString assert_eq!(CompactString::from("a") + &CompactString::from("b"), "ab"); // + &str assert_eq!(CompactString::from("a") + "b", "ab"); // + &String assert_eq!(CompactString::from("a") + &String::from("b"), "ab"); // + &Box let box_str = String::from("b").into_boxed_str(); assert_eq!(CompactString::from("a") + &box_str, "ab"); // + &Cow<'a, str> let cow = Cow::from("b"); assert_eq!(CompactString::from("a") + &cow, "ab"); // Implementing `Add for String` can break adding &String or other types to String, so we // explicitly don't do this. See https://github.com/rust-lang/rust/issues/77143 for more details. // Below we assert adding types to String still compiles // String + &CompactString assert_eq!(String::from("a") + &CompactString::from("b"), "ab"); // String + &String assert_eq!(String::from("a") + &("b".to_string()), "ab"); // String + &str assert_eq!(String::from("a") + "b", "ab"); } #[allow(clippy::unnecessary_to_owned, clippy::op_ref)] #[test] fn test_plus_operator_static_str() { // + &CompactString assert_eq!( CompactString::const_new("a") + &CompactString::const_new("b"), "ab" ); // + &str assert_eq!(CompactString::const_new("a") + "b", "ab"); // + &String assert_eq!(CompactString::const_new("a") + &String::from("b"), "ab"); // + &Box let box_str = String::from("b").into_boxed_str(); assert_eq!(CompactString::const_new("a") + &box_str, "ab"); // + &Cow<'a, str> let cow = Cow::from("b"); assert_eq!(CompactString::const_new("a") + &cow, "ab"); // Implementing `Add for String` can break adding &String or other types to String, so we // explicitly don't do this. See https://github.com/rust-lang/rust/issues/77143 for more details. // Below we assert adding types to String still compiles // String + &CompactString assert_eq!(String::from("a") + &CompactString::const_new("b"), "ab"); // String + &String assert_eq!(String::from("a") + &("b".to_string()), "ab"); // String + &str assert_eq!(String::from("a") + "b", "ab"); } #[test] fn test_plus_equals_operator() { let mut m = CompactString::from("a"); m += "b"; assert_eq!(m, "ab"); } #[test] fn test_plus_equals_operator_static_str() { let mut m = CompactString::const_new("a"); m += "b"; assert_eq!(m, "ab"); } // Allow these lints because we're explicitly testing impls for owned types and // reference types. #[allow(clippy::cmp_owned)] #[allow(clippy::op_ref)] #[test] fn test_eq_operator() { let x = CompactString::const_new("foo"); let y = x.clone(); macro_rules! test_impl { ($a:expr, $b:expr) => { let _ = $a == $b; let _ = &$a == $b; let _ = &$a == &$b; let _ = $b == $a; let _ = &$b == $a; let _ = &$b == &$a; }; } test_impl!("a", x); test_impl!(String::from("a"), x); test_impl!(Cow::Borrowed("a"), x); test_impl!(y, x); } #[test] fn test_u8_to_compact_string() { let vals = [u8::MIN, 1, 42, u8::MAX - 2, u8::MAX - 1, u8::MAX]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_i8_to_compact_string() { let vals = [ i8::MIN, i8::MIN + 1, i8::MIN + 2, -1, 0, 1, 42, i8::MAX - 2, i8::MAX - 1, i8::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_u16_to_compact_string() { let vals = [u16::MIN, 1, 42, 999, u16::MAX - 2, u16::MAX - 1, u16::MAX]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_i16_to_compact_string() { let vals = [ i16::MIN, i16::MIN + 1, i16::MIN + 2, -42, -1, 0, 1, 42, 999, i16::MAX - 2, i16::MAX - 1, i16::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_u32_to_compact_string() { let vals = [ u32::MIN, 1, 42, 999, 123456789, u32::MAX - 2, u32::MAX - 1, u32::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_i32_to_compact_string() { let vals = [ i32::MIN, i32::MIN + 2, i32::MIN + 1, -12345678, -42, -1, 0, 1, 999, 123456789, i32::MAX - 2, i32::MAX - 1, i32::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } } #[test] fn test_u64_to_compact_string() { let vals = [ u64::MIN, 1, 999, 123456789, 98765432123456, u64::MAX - 2, u64::MAX - 1, u64::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); // u64 can be up-to 20 characters long, which can't be inlined on 32-bit arches #[cfg(target_pointer_width = "64")] assert!(!c.is_heap_allocated()); } } #[test] fn test_i64_to_compact_string() { let vals = [ i64::MIN, i64::MIN + 1, i64::MIN + 2, -22222222, -42, 0, 1, 999, 123456789, i64::MAX - 2, i64::MAX - 1, i64::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); // i64 can be up-to 20 characters long, which can't be inlined on 32-bit arches #[cfg(target_pointer_width = "64")] assert!(!c.is_heap_allocated()); } } #[test] fn test_u128_to_compact_string() { let vals = [ u128::MIN, 1, 999, 123456789, u128::MAX - 2, u128::MAX - 1, u128::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); } } #[test] fn test_i128_to_compact_string() { let vals = [ i128::MIN, i128::MIN + 1, i128::MIN + 2, -22222222, -42, 0, 1, 999, 123456789, i128::MAX - 2, i128::MAX - 1, i128::MAX, ]; for x in &vals { let c = x.to_compact_string(); let s = x.to_string(); assert_eq!(c, s); } } #[test] fn test_bool_to_compact_string() { let c = true.to_compact_string(); let s = true.to_string(); assert_eq!("true", c); assert_eq!(c, s); assert!(!c.is_heap_allocated()); let c = false.to_compact_string(); let s = false.to_string(); assert_eq!("false", c); assert_eq!(c, s); assert!(!c.is_heap_allocated()); } macro_rules! assert_int_MAX_to_compact_string { ($int: ty) => { assert_eq!(&*<$int>::MAX.to_string(), &*<$int>::MAX.to_compact_string()); }; } #[test] fn test_to_compact_string() { // Test specialisation for bool, char and String assert_eq!(&*true.to_string(), "true".to_compact_string()); assert_eq!(&*false.to_string(), "false".to_compact_string()); assert_eq!("1", '1'.to_compact_string()); assert_eq!("2333", "2333".to_string().to_compact_string()); assert_eq!("2333", "2333".to_compact_string().to_compact_string()); // Test specialisation for int and nonzero_int using itoa assert_eq!("234", 234.to_compact_string()); assert_eq!( "234", num::NonZeroU64::new(234).unwrap().to_compact_string() ); assert_int_MAX_to_compact_string!(u8); assert_int_MAX_to_compact_string!(i8); assert_int_MAX_to_compact_string!(u16); assert_int_MAX_to_compact_string!(i16); assert_int_MAX_to_compact_string!(u32); assert_int_MAX_to_compact_string!(i32); assert_int_MAX_to_compact_string!(u64); assert_int_MAX_to_compact_string!(i64); assert_int_MAX_to_compact_string!(usize); assert_int_MAX_to_compact_string!(isize); // Test specialisation for f32 and f64 using ryu // TODO: Fix bug in powerpc64, which is a little endian system #[cfg(not(all(target_arch = "powerpc64", target_pointer_width = "64")))] { assert_eq!( (&*3.2_f32.to_string(), &*288888.290028_f64.to_string()), ( &*3.2_f32.to_compact_string(), &*288888.290028_f64.to_compact_string() ) ); assert_eq!("inf", f32::INFINITY.to_compact_string()); assert_eq!("-inf", f32::NEG_INFINITY.to_compact_string()); assert_eq!("inf", f64::INFINITY.to_compact_string()); assert_eq!("-inf", f64::NEG_INFINITY.to_compact_string()); assert_eq!("NaN", f32::NAN.to_compact_string()); assert_eq!("NaN", f64::NAN.to_compact_string()); } // Test generic Display implementation assert_eq!("234", "234".to_compact_string()); assert_eq!("12345", format_compact!("{}", "12345")); assert_eq!("112345", format_compact!("1{}", "12345")); assert_eq!("1123452", format_compact!("1{}{}", "12345", 2)); assert_eq!("11234522", format_compact!("1{}{}{}", "12345", 2, '2')); assert_eq!( "112345221000", format_compact!("1{}{}{}{}", "12345", 2, '2', 1000) ); // Test string longer than repr::MAX_SIZE assert_eq!( "01234567890123456789999999", format_compact!("0{}67890123456789{}", "12345", 999999) ); } #[test] fn test_into_string_large_string_with_excess_capacity() { let mut string = String::with_capacity(128); string.push_str("abcdefghijklmnopqrstuvwxyz"); let str_addr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); let compact = CompactString::from(string); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(str_addr, new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_cap, new_str_cap); } #[test] fn test_into_string_where_32_bit_capacity_is_on_heap() { let buf = vec![b'a'; SIXTEEN_MB - 1]; // SAFETY: `buf` is filled with ASCII `a`s. // This primarily speeds up miri, as we don't need to check every byte // in the input buffer let string = unsafe { String::from_utf8_unchecked(buf) }; let str_addr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); let compact = CompactString::from(string); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(str_len, new_str_len); if cfg!(target_pointer_width = "64") { assert_eq!(str_addr, new_str_addr); assert_eq!(str_cap, new_str_cap); } else { assert_eq!(&new_string.as_bytes()[0..10], b"aaaaaaaaaa"); assert_eq!(str_len, new_str_cap); } } #[test] fn test_into_string_small_string_with_excess_capacity() { let mut string = String::with_capacity(128); string.push_str("abcdef"); let str_len = string.len(); let compact = CompactString::from(string); // we should inline this string, which would truncate capacity // // note: String truncates capacity on Clone, so truncating here seems reasonable assert!(!compact.is_heap_allocated()); assert_eq!(compact.len(), str_len); assert_eq!(compact.capacity(), MAX_SIZE); } #[test] fn test_from_string_buffer_small_string_with_excess_capacity() { let mut string = String::with_capacity(128); string.push_str("abcedfg"); let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); // using from_string_buffer should always re-use the underlying buffer let compact = CompactString::from_string_buffer(string); assert!(compact.is_heap_allocated()); let cpt_ptr = compact.as_ptr(); let cpt_len = compact.len(); let cpt_cap = compact.capacity(); assert_eq!(str_ptr, cpt_ptr); assert_eq!(str_len, cpt_len); assert_eq!(str_cap, cpt_cap); } #[test] fn test_into_string_small_string_with_no_excess_capacity() { let string = String::from("abcdef"); let str_len = string.len(); let compact = CompactString::from(string); // we should eagerly inline the string assert!(!compact.is_heap_allocated()); assert_eq!(compact.len(), str_len); assert_eq!(compact.capacity(), MAX_SIZE); } #[test] fn test_from_string_buffer_small_string_with_no_excess_capacity() { let string = String::from("abcdefg"); let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); // using from_string_buffer should always re-use the underlying buffer let compact = CompactString::from_string_buffer(string); assert!(compact.is_heap_allocated()); let cpt_ptr = compact.as_ptr(); let cpt_len = compact.len(); let cpt_cap = compact.capacity(); assert_eq!(str_ptr, cpt_ptr); assert_eq!(str_len, cpt_len); assert_eq!(str_cap, cpt_cap); } #[test] fn test_roundtrip_from_string_empty_string() { let string = String::new(); let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); let compact = CompactString::from(string); // we should always inline empty strings assert!(!compact.is_heap_allocated()); let new_string = String::from(compact); let new_str_ptr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(str_ptr, new_str_ptr); assert_eq!(str_len, new_str_len); assert_eq!(str_cap, new_str_cap); } #[test] fn test_roundtrip_from_string_buffer_empty_string() { let string = String::new(); let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); let compact = CompactString::from_string_buffer(string); // we should always inline empty strings assert!(!compact.is_heap_allocated()); let new_string = String::from(compact); let new_str_ptr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(str_ptr, new_str_ptr); assert_eq!(str_len, new_str_len); assert_eq!(str_cap, new_str_cap); } #[test] fn test_into_string_small_str() { let data = "abcdef"; let str_addr = data.as_ptr(); let str_len = data.len(); let compact = CompactString::from(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_ne!(str_addr, new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_into_string_small_static_str() { let data = "abcdef"; let str_addr = data.as_ptr(); let str_len = data.len(); let compact = CompactString::const_new(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_ne!(str_addr, new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_into_string_long_str() { let data = "this is a long string that will be on the heap"; let str_addr = data.as_ptr(); let str_len = data.len(); let compact = CompactString::from(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_ne!(str_addr, new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_into_string_long_static_str() { let data = "this is a long string that will be on the heap"; let str_addr = data.as_ptr(); let str_len = data.len(); let compact = CompactString::const_new(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_ne!(str_addr, new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_into_string_empty_str() { let data = ""; let str_len = data.len(); let compact = CompactString::from(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(String::new().as_ptr(), new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_into_string_empty_static_str() { let data = ""; let str_len = data.len(); let compact = CompactString::const_new(data); let new_string = String::from(compact); let new_str_addr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); assert_eq!(String::new().as_ptr(), new_str_addr); assert_eq!(str_len, new_str_len); assert_eq!(str_len, new_str_cap); } #[test] fn test_truncate_noops_if_new_len_greater_than_current() { let mut short = CompactString::from("short"); let short_cap = short.capacity(); short.truncate(100); assert_eq!(short.len(), 5); assert_eq!(short.capacity(), short_cap); let mut long = CompactString::from("i am a long string that will be allocated on the heap"); let long_cap = long.capacity(); long.truncate(500); assert_eq!(long.len(), 53); assert_eq!(long.capacity(), long_cap); } #[test] fn test_truncate_noops_if_new_len_greater_than_current_static_str() { let mut short = CompactString::const_new("short"); short.truncate(100); assert_eq!(short.len(), 5); assert_eq!(short.capacity(), MAX_SIZE); let mut long = CompactString::const_new("i am a long string that will be allocated on the heap"); long.truncate(500); assert_eq!(long.len(), 53); assert_eq!(long.capacity(), 53); } #[test] #[should_panic(expected = "new_len must lie on char boundary")] fn test_truncate_panics_on_non_char_boundary() { let mut emojis = CompactString::from("๐Ÿ˜€๐Ÿ˜€๐Ÿ˜€๐Ÿ˜€"); assert!('๐Ÿ˜€'.len_utf8() > 1); emojis.truncate(1); } #[test_case(CompactString::from; "inline")] #[test_case(CompactString::const_new; "static_str")] fn test_insert(to_compact: fn(&'static str) -> CompactString) { // insert into empty string let mut one_byte = to_compact(""); one_byte.insert(0, '.'); assert_eq!(one_byte, "."); let mut two_bytes = to_compact(""); two_bytes.insert(0, 'รœ'); assert_eq!(two_bytes, "รœ"); let mut three_bytes = to_compact(""); three_bytes.insert(0, 'โ‚ฌ'); assert_eq!(three_bytes, "โ‚ฌ"); let mut four_bytes = to_compact(""); four_bytes.insert(0, '๐Ÿ˜€'); assert_eq!(four_bytes, "๐Ÿ˜€"); // insert at the front of string let mut one_byte = to_compact("๐Ÿ˜€"); one_byte.insert(0, '.'); assert_eq!(one_byte, ".๐Ÿ˜€"); let mut two_bytes = to_compact("๐Ÿ˜€"); two_bytes.insert(0, 'รœ'); assert_eq!(two_bytes, "รœ๐Ÿ˜€"); let mut three_bytes = to_compact("๐Ÿ˜€"); three_bytes.insert(0, 'โ‚ฌ'); assert_eq!(three_bytes, "โ‚ฌ๐Ÿ˜€"); let mut four_bytes = to_compact("๐Ÿ˜€"); four_bytes.insert(0, '๐Ÿ˜€'); assert_eq!(four_bytes, "๐Ÿ˜€๐Ÿ˜€"); // insert at the end of string let mut one_byte = to_compact("๐Ÿ˜€"); one_byte.insert(4, '.'); assert_eq!(one_byte, "๐Ÿ˜€."); let mut two_bytes = to_compact("๐Ÿ˜€"); two_bytes.insert(4, 'รœ'); assert_eq!(two_bytes, "๐Ÿ˜€รœ"); let mut three_bytes = to_compact("๐Ÿ˜€"); three_bytes.insert(4, 'โ‚ฌ'); assert_eq!(three_bytes, "๐Ÿ˜€โ‚ฌ"); let mut four_bytes = to_compact("๐Ÿ˜€"); four_bytes.insert(4, '๐Ÿ˜€'); assert_eq!(four_bytes, "๐Ÿ˜€๐Ÿ˜€"); // insert in the middle of string let mut one_byte = to_compact("๐Ÿ˜€๐Ÿ˜€"); one_byte.insert(4, '.'); assert_eq!(one_byte, "๐Ÿ˜€.๐Ÿ˜€"); let mut two_bytes = to_compact("๐Ÿ˜€๐Ÿ˜€"); two_bytes.insert(4, 'รœ'); assert_eq!(two_bytes, "๐Ÿ˜€รœ๐Ÿ˜€"); let mut three_bytes = to_compact("๐Ÿ˜€๐Ÿ˜€"); three_bytes.insert(4, 'โ‚ฌ'); assert_eq!(three_bytes, "๐Ÿ˜€โ‚ฌ๐Ÿ˜€"); let mut four_bytes = to_compact("๐Ÿ˜€๐Ÿ˜€"); four_bytes.insert(4, '๐Ÿ˜€'); assert_eq!(four_bytes, "๐Ÿ˜€๐Ÿ˜€๐Ÿ˜€"); // edge case: new length is 24 bytes let mut s = to_compact("\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}"); s.insert(21, '\u{ffff}'); assert_eq!( s, "\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}\u{ffff}", ); } #[test] fn test_remove() { let mut control = String::from("๐Ÿฆ„๐Ÿฆ€hello๐ŸŽถworld๐Ÿ‡บ๐Ÿ‡ธ"); let mut compact = CompactString::from(&control); assert_eq!(control.remove(0), compact.remove(0)); assert_eq!(control, compact); assert_eq!(compact, "๐Ÿฆ€hello๐ŸŽถworld๐Ÿ‡บ๐Ÿ‡ธ"); let music_idx = control .char_indices() .find(|(_idx, c)| *c == '๐ŸŽถ') .map(|(idx, _c)| idx) .unwrap(); assert_eq!(control.remove(music_idx), compact.remove(music_idx)); assert_eq!(control, compact); assert_eq!(compact, "๐Ÿฆ€helloworld๐Ÿ‡บ๐Ÿ‡ธ"); } #[test] #[should_panic(expected = "cannot remove a char from the end of a string")] fn test_remove_empty_string() { let mut compact = CompactString::new(""); compact.remove(0); } #[test] #[should_panic(expected = "cannot remove a char from the end of a string")] fn test_remove_empty_string_static() { let mut compact = CompactString::const_new(""); compact.remove(0); } #[test] #[should_panic(expected = "cannot remove a char from the end of a string")] fn test_remove_str_len() { let mut compact = CompactString::new("hello world"); compact.remove(compact.len()); } #[test] fn test_with_capacity_16711422() { // Fuzzing with AFL on a 32-bit ARM arch found this bug! // // We have our own heap implementation called BoxString, which optionally stores the capacity // on the heap, which is really only relevant for 32-bit architectures. The discriminant it used // to determine if capacity was on the heap, was when the last `usize` number of bytes were all // equal to our internal HEAP_MASK, which at the time was `255`. At the time this worked and was // correct. // // When we released support to make the size of CompactString == Option, we // changed the HEAP_MASK to `254`, which unintentionally made our discriminant for determining // if our capacity was on the heap, all `254`s, yet our "max inline capacity value" was still // based on the discriminant being all `255`s. // // When creating a BoxString with capacity 16711422, we'd correctly decide we could store the // capacity inline, but this would create a capacity with an underlying value of // [254, 254, 254, HEAP_MASK]. Once the HEAP_MASK changed to 254, this capacity was now the same // as the discriminant to determine if the capacity was on the heap, so we'd incorrectly // identify the capacity as being on the heap, when it was really inline. assert_eq!(16711422_u32.to_le_bytes(), [254, 254, 254, 0]); let compact = CompactString::with_capacity(16711422); let std_str = String::with_capacity(16711422); assert!(compact.is_heap_allocated()); assert_eq!(compact.capacity(), std_str.capacity()); assert_eq!(compact, ""); assert_eq!(compact, std_str); } #[test] fn test_from_utf16() { let control = String::from("๐Ÿฆ„ hello world! ๐ŸŽฎ "); let utf16_buf: Vec = control.encode_utf16().collect(); let compact = CompactString::from_utf16(utf16_buf).unwrap(); assert_eq!(compact, control); cfg_if::cfg_if! { if #[cfg(target_pointer_width = "64")] { assert!(!compact.is_heap_allocated()); } else if #[cfg(target_pointer_width = "32")] { assert!(compact.is_heap_allocated()); } else { compile_error!("unsupported pointer width!"); } } } #[test] fn test_reserve_shrink_roundtrip() { const TEXT: &str = "Hello."; let mut s = CompactString::new(TEXT); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); s.reserve(128); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 128 + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); } #[test] fn test_reserve_shrink_roundtrip_static() { // longer than 24 bytes, so the string does not get inlined const TEXT: &str = "Hello, world! How are you today?"; let mut s = CompactString::const_new(TEXT); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), TEXT.len()); assert_eq!(s, TEXT); s.reserve(128); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 128 + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(s.is_heap_allocated()); assert_eq!(s.capacity(), s.len()); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(s.is_heap_allocated()); assert_eq!(s.capacity(), s.len()); assert_eq!(s, TEXT); } #[test] fn test_reserve_shrink_roundtrip_static_inline() { // shorter than 12 bytes, so the string gets inlined const TEXT: &str = "Hello."; let mut s = CompactString::const_new(TEXT); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); s.reserve(128); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 128 + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to(64); assert!(s.is_heap_allocated()); assert!(s.capacity() >= 64); assert_eq!(s, TEXT); s.reserve(SIXTEEN_MB); assert!(s.is_heap_allocated()); assert!(s.capacity() >= SIXTEEN_MB + TEXT.len()); assert_eq!(s, TEXT); s.shrink_to_fit(); assert!(!s.is_heap_allocated()); assert_eq!(s.capacity(), MAX_SIZE); assert_eq!(s, TEXT); } #[test] fn test_from_utf8_unchecked_sanity() { let text = "hello ๐ŸŒŽ, you are nice"; let compact = unsafe { CompactString::from_utf8_unchecked(text) }; assert_eq!(compact, text); } #[test] fn test_from_utf8_unchecked_long() { let bytes = [255; 2048]; let compact = unsafe { CompactString::from_utf8_unchecked(bytes) }; assert_eq!(compact.len(), 2048); assert_eq!(compact.as_bytes(), bytes); } #[test] fn test_from_utf8_unchecked_short() { let bytes = [255; 10]; let compact = unsafe { CompactString::from_utf8_unchecked(bytes) }; assert_eq!(compact.len(), 10); assert_eq!(compact.as_bytes(), bytes); } #[test] fn test_from_utf8_unchecked_empty() { let bytes = [255; 0]; let compact = unsafe { CompactString::from_utf8_unchecked(bytes) }; assert_eq!(compact.len(), 0); assert_eq!(compact.as_bytes(), bytes); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf8_lossy(#[strategy(rand_bytes())] bytes: Vec) { let compact = CompactString::from_utf8_lossy(&bytes); let control = String::from_utf8_lossy(&bytes); assert_eq!(compact, control); assert_eq!(compact.len(), control.len()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_from_utf16(#[strategy(rand_u16s())] buf: Vec) { type FromUtf16Func = fn(&[u8]) -> Result; type FromU16Endian = fn(u16) -> u16; type FromU16EndianBytes = fn([u8; 2]) -> u16; const FUNCS: &[(FromUtf16Func, FromU16Endian, FromU16EndianBytes)] = &[ ( |v| CompactString::from_utf16le(v), u16::from_le, u16::from_le_bytes, ), ( |v| CompactString::from_utf16be(v), u16::from_be, u16::from_be_bytes, ), ]; for (new_compact_string, from_int, from_bytes) in FUNCS { let buf = &*buf; let bytes: &[u8] = unsafe { slice::from_raw_parts(buf.as_ptr().cast(), buf.len() * 2) }; let compact = new_compact_string(bytes); let control = String::from_utf16(&buf.iter().copied().map(from_int).collect::>()); assert_eq!(compact.is_ok(), control.is_ok()); if let (Ok(compact), Ok(control)) = (compact, control) { assert_eq!(compact.len(), control.len()); assert_eq!(compact, control); } if bytes.len() >= 2 { // Test if `CompactString::from_utf16x()` works with misaligned slices. let bytes: &[u8] = &bytes[1..bytes.len() - 1]; let buf: Vec = bytes .chunks_exact(2) .map(|v| from_bytes([v[0], v[1]])) .collect(); let compact = new_compact_string(bytes); let control = String::from_utf16(&buf); assert_eq!(compact.is_ok(), control.is_ok()); if let (Ok(compact), Ok(control)) = (compact, control) { assert_eq!(compact.len(), control.len()); assert_eq!(compact, control); } } } } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_repeat(n: u16, s: String) { let compact = CompactString::new(&s).repeat(n as usize); let control = s.repeat(n as usize); assert_eq!(compact, control); } #[test] fn test_from_utf16x() { let dancing_men = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe"; assert_eq!(CompactString::from_utf16le(dancing_men).unwrap(), "๐Ÿ‘ฏโ€โ™‚๏ธ"); let dancing_men = b"0\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe"; assert!(CompactString::from_utf16le(dancing_men).is_err()); assert_eq!( CompactString::from_utf16le(&dancing_men[1..]).unwrap(), "๐Ÿ‘ฏโ€โ™‚๏ธ", ); let dancing_women = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f"; assert_eq!(CompactString::from_utf16be(dancing_women).unwrap(), "๐Ÿ‘ฏโ€โ™€๏ธ"); let dancing_women = b"0\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f"; assert!(CompactString::from_utf16be(dancing_women).is_err()); assert_eq!( CompactString::from_utf16be(&dancing_women[1..]).unwrap(), "๐Ÿ‘ฏโ€โ™€๏ธ", ); } #[test] fn test_from_utf16x_lossy() { let dancing_men = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe"; assert_eq!( CompactString::from_utf16le_lossy(dancing_men), "๏ฟฝ\u{fc6f}\u{200d}โ™‚๏ธ", ); let dancing_men = b"0\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe"; assert_eq!( CompactString::from_utf16le_lossy(&dancing_men[1..]), "๏ฟฝ\u{fc6f}\u{200d}โ™‚๏ธ", ); let dancing_women = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f"; assert_eq!( CompactString::from_utf16be_lossy(dancing_women), "๐Ÿ‘ฏ\u{200d}โ™€๏ฟฝ", ); let dancing_women = b"0\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f"; assert_eq!( CompactString::from_utf16be_lossy(&dancing_women[1..]), "๐Ÿ‘ฏ\u{200d}โ™€๏ฟฝ", ); } #[test] fn test_collect() { const VALUES: &[&str] = &["foo", "bar", "baz"]; assert_eq!( VALUES .iter() .copied() .map(Cow::Borrowed) .collect::(), "foobarbaz", ); assert_eq!( VALUES .iter() .copied() .map(|s| Cow::Owned(s.into())) .collect::(), "foobarbaz", ); assert_eq!( VALUES .iter() .copied() .map(Box::::from) .collect::(), "foobarbaz", ); assert_eq!( VALUES .iter() .copied() .map(CompactString::from) .collect::(), "foobarbaz", ); assert_eq!( VALUES .iter() .copied() .map(CompactString::from) .collect::>(), "foobarbaz", ); assert_eq!( VALUES .iter() .copied() .flat_map(|s| s.chars()) .collect::>(), "foobarbaz", ); } #[test] fn test_into_cow() { let og = "aaa"; let compact = CompactString::new(og); let cow: alloc::borrow::Cow<'_, str> = compact.into(); assert_eq!(og, cow); } #[test] fn test_into_arc() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let arc = alloc::sync::Arc::::from(CompactString::new(short)); assert_eq!(short, &*arc); let arc = alloc::sync::Arc::::from(CompactString::new(long)); assert_eq!(long, &*arc); } #[test] fn test_into_rc() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let rc = alloc::rc::Rc::::from(CompactString::new(short)); assert_eq!(short, &*rc); let rc = alloc::rc::Rc::::from(CompactString::new(long)); assert_eq!(long, &*rc); } #[test] fn test_into_error() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let short_error_ss = Box::::from(CompactString::new(short)); assert_eq!(short, format!("{short_error_ss}")); assert_eq!(format!("{short:?}"), format!("{short_error_ss:?}")); let long_error_ss = Box::::from(CompactString::new(long)); assert_eq!(long, format!("{long_error_ss}")); assert_eq!(format!("{long:?}"), format!("{long_error_ss:?}")); let short_error = Box::::from(CompactString::new(short)); assert_eq!(short, format!("{short_error}")); assert_eq!(format!("{short:?}"), format!("{short_error:?}")); let long_error = Box::::from(CompactString::new(long)); assert_eq!(long, format!("{long_error}")); assert_eq!(format!("{long:?}"), format!("{long_error:?}")); } #[test] fn test_into_box_str() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let s = Box::::from(CompactString::new(short)); assert_eq!(short, &*s); let l = Box::::from(CompactString::new(long)); assert_eq!(long, &*l); } #[test] fn test_into_os_string() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let s = std::ffi::OsString::from(CompactString::new(short)); assert_eq!(s.as_os_str().to_str().unwrap(), short); let l = std::ffi::OsString::from(CompactString::new(long)); assert_eq!(l.as_os_str().to_str().unwrap(), long); } #[test] fn test_into_path_buf() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let s = std::path::PathBuf::from(CompactString::new(short)); assert_eq!(s.as_os_str().to_str().unwrap(), short); let l = std::path::PathBuf::from(CompactString::new(long)); assert_eq!(l.as_os_str().to_str().unwrap(), long); } #[test] fn test_as_ref_path() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let s = CompactString::new(short); assert_eq!( AsRef::::as_ref(&s).to_str().unwrap(), short ); let l = CompactString::new(long); assert_eq!(AsRef::::as_ref(&l).to_str().unwrap(), long); } #[test] fn test_into_vec_u8() { let short = "short"; let long = "i am a long string that will be allocated on the heap"; let s = Vec::::from(CompactString::new(short)); assert_eq!(&s, short.as_bytes()); let l = Vec::::from(CompactString::new(long)); assert_eq!(&l, long.as_bytes()); } #[test] fn test_from_string_buffer_inlines_on_push() { let mut compact = CompactString::from_string_buffer("hello".to_string()); assert!(compact.is_heap_allocated()); compact.push_str(" world"); // when growing the CompactString we should inline it assert!(!compact.is_heap_allocated()); } #[test] fn test_from_string_buffer_inlines_on_clone() { let a = CompactString::from_string_buffer("hello".to_string()); assert!(a.is_heap_allocated()); let b = a.clone(); // when cloning the CompactString we should inline it assert!(!b.is_heap_allocated()); } // With debug assertions enabled the invocation will panic if you try to allocate more memory than // the system even has. #[cfg(not(debug_assertions))] #[cfg(target_pointer_width = "64")] #[test] #[should_panic = "Cannot allocate memory to hold CompactString"] fn test_alloc_excessively_long_string() { // 2**56 - 2 bytes, the maximum number `Capacity` can hold CompactString::with_capacity((1 << 56) - 2); } // This feature was enabled by which was first // released in Rust 1.65. #[rustversion::since(1.65)] #[test] fn multiple_niches_test() { #[allow(unused)] enum Value { String(CompactString), Bool(bool), Signed(isize), Unsigned(usize), Null, } assert_eq!( core::mem::size_of::(), core::mem::size_of::() ); } #[test] fn test_is_empty() { const ZEROS: &[&str] = &[ "\0", // 1 "\0\0\0\0\0\0\0\0\0\0\0\0", // 12 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", // 24 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", // 25 ]; assert!(CompactString::new("").is_empty()); assert!(CompactString::const_new("").is_empty()); for (len, s) in ZEROS.iter().copied().enumerate() { let mut a = CompactString::new(s); let mut b = CompactString::new(s); for _ in (1..=len).rev() { a.truncate(len); b.truncate(len); assert!(!a.is_empty()); assert!(!b.is_empty()); } } } compact_str-0.8.0/src/traits.rs000064400000000000000000000352451046102023000146230ustar 00000000000000use alloc::string::String; use core::fmt::{ self, Write, }; use core::num; use castaway::{ match_type, LifetimeFree, }; use super::repr::{ IntoRepr, Repr, }; use crate::{ CompactString, ToCompactStringError, UnwrapWithMsg, }; /// A trait for converting a value to a `CompactString`. /// /// This trait is automatically implemented for any type which implements the /// [`fmt::Display`] trait. As such, [`ToCompactString`] shouldn't be implemented directly: /// [`fmt::Display`] should be implemented instead, and you get the [`ToCompactString`] /// implementation for free. pub trait ToCompactString { /// Converts the given value to a [`CompactString`]. /// /// # Panics /// /// Panics if the system runs out of memory and it cannot hold the whole string, /// or if [`Display::fmt()`][core::fmt::Display::fmt] returns an error. /// /// # Examples /// /// Basic usage: /// /// ``` /// use compact_str::ToCompactString; /// # use compact_str::CompactString; /// /// let i = 5; /// let five = CompactString::new("5"); /// /// assert_eq!(i.to_compact_string(), five); /// ``` #[inline] #[track_caller] fn to_compact_string(&self) -> CompactString { self.try_to_compact_string().unwrap_with_msg() } /// Fallible version of [`ToCompactString::to_compact_string()`] /// /// This method won't panic if the system is out-of-memory, but return a /// [`ReserveError`][crate::ReserveError]. /// Otherwise it behaves the same as [`ToCompactString::to_compact_string()`]. fn try_to_compact_string(&self) -> Result; } /// # Safety /// /// * [`CompactString`] does not contain any lifetime /// * [`CompactString`] is 'static /// * [`CompactString`] is a container to `u8`, which is `LifetimeFree`. unsafe impl LifetimeFree for CompactString {} unsafe impl LifetimeFree for Repr {} /// # Panics /// /// In this implementation, the `to_compact_string` method panics if the `Display` implementation /// returns an error. This indicates an incorrect `Display` implementation since /// `std::fmt::Write for CompactString` never returns an error itself. /// /// # Note /// /// We use the [`castaway`] crate to provide zero-cost specialization for several types, those are: /// * `u8`, `u16`, `u32`, `u64`, `u128`, `usize` /// * `i8`, `i16`, `i32`, `i64`, `i128`, `isize` /// * `NonZeroU*`, `NonZeroI*` /// * `bool` /// * `char` /// * `String`, `CompactString` /// * `f32`, `f64` /// * For floats we use [`ryu`] crate which sometimes provides different formatting than [`std`] impl ToCompactString for T { #[inline] fn try_to_compact_string(&self) -> Result { let repr = match_type!(self, { &u8 as s => s.into_repr()?, &i8 as s => s.into_repr()?, &u16 as s => s.into_repr()?, &i16 as s => s.into_repr()?, &u32 as s => s.into_repr()?, &i32 as s => s.into_repr()?, &u64 as s => s.into_repr()?, &i64 as s => s.into_repr()?, &u128 as s => s.into_repr()?, &i128 as s => s.into_repr()?, &usize as s => s.into_repr()?, &isize as s => s.into_repr()?, &f32 as s => s.into_repr()?, &f64 as s => s.into_repr()?, &bool as s => s.into_repr()?, &char as s => s.into_repr()?, &String as s => Repr::new(s)?, &CompactString as s => Repr::new(s)?, &num::NonZeroU8 as s => s.into_repr()?, &num::NonZeroI8 as s => s.into_repr()?, &num::NonZeroU16 as s => s.into_repr()?, &num::NonZeroI16 as s => s.into_repr()?, &num::NonZeroU32 as s => s.into_repr()?, &num::NonZeroI32 as s => s.into_repr()?, &num::NonZeroU64 as s => s.into_repr()?, &num::NonZeroI64 as s => s.into_repr()?, &num::NonZeroUsize as s => s.into_repr()?, &num::NonZeroIsize as s => s.into_repr()?, &num::NonZeroU128 as s => s.into_repr()?, &num::NonZeroI128 as s => s.into_repr()?, s => { let mut c = CompactString::const_new(""); write!(c, "{}", s)?; return Ok(c); } }); Ok(CompactString(repr)) } } /// A trait that provides convenience methods for creating a [`CompactString`] from a collection of /// items. It is implemented for all types that can be converted into an iterator, and that iterator /// yields types that can be converted into a `str`. /// /// i.e. `C: IntoIterator>`. /// /// # Concatenate and Join /// Two methods that this trait provides are `concat_compact(...)` and `join_compact(...)` /// ``` /// use compact_str::CompactStringExt; /// /// let words = vec!["โ˜€๏ธ", "๐ŸŒ•", "๐ŸŒ‘", "โ˜€๏ธ"]; /// /// // directly concatenate all the words together /// let concat = words.concat_compact(); /// assert_eq!(concat, "โ˜€๏ธ๐ŸŒ•๐ŸŒ‘โ˜€๏ธ"); /// /// // join the words, with a separator /// let join = words.join_compact(" โžก๏ธ "); /// assert_eq!(join, "โ˜€๏ธ โžก๏ธ ๐ŸŒ• โžก๏ธ ๐ŸŒ‘ โžก๏ธ โ˜€๏ธ"); /// ``` pub trait CompactStringExt { /// Concatenates all the items of a collection into a [`CompactString`] /// /// # Example /// ``` /// use compact_str::CompactStringExt; /// /// let items = ["hello", " ", "world", "!"]; /// let compact = items.concat_compact(); /// /// assert_eq!(compact, "hello world!"); /// ``` fn concat_compact(&self) -> CompactString; /// Joins all the items of a collection, placing a separator between them, forming a /// [`CompactString`] /// /// # Example /// ``` /// use compact_str::CompactStringExt; /// /// let fruits = vec!["apples", "oranges", "bananas"]; /// let compact = fruits.join_compact(", "); /// /// assert_eq!(compact, "apples, oranges, bananas"); /// ``` fn join_compact>(&self, separator: S) -> CompactString; } impl CompactStringExt for C where I: AsRef, for<'a> &'a C: IntoIterator, { fn concat_compact(&self) -> CompactString { self.into_iter() .fold(CompactString::const_new(""), |mut s, item| { s.push_str(item.as_ref()); s }) } fn join_compact>(&self, separator: S) -> CompactString { let mut compact_string = CompactString::const_new(""); let mut iter = self.into_iter().peekable(); let sep = separator.as_ref(); while let Some(item) = iter.next() { compact_string.push_str(item.as_ref()); if iter.peek().is_some() { compact_string.push_str(sep); } } compact_string } } #[cfg(test)] mod tests { use alloc::string::{ String, ToString, }; use alloc::vec::Vec; use core::num; use proptest::prelude::*; use test_strategy::proptest; use super::{ CompactStringExt, ToCompactString, }; use crate::CompactString; #[test] fn test_join() { let slice = ["hello", "world"]; let c = slice.join_compact(" "); assert_eq!(c, "hello world"); let vector = vec!["๐ŸŽ", "๐ŸŠ", "๐ŸŒ"]; let c = vector.join_compact(","); assert_eq!(c, "๐ŸŽ,๐ŸŠ,๐ŸŒ"); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_join(items: Vec, separator: String) { let c: CompactString = items.join_compact(&separator); let s: String = items.join(&separator); assert_eq!(c, s); } #[test] fn test_concat() { let items = vec!["hello", "world"]; let c = items.join_compact(" "); assert_eq!(c, "hello world"); let vector = vec!["๐ŸŽ", "๐ŸŠ", "๐ŸŒ"]; let c = vector.concat_compact(); assert_eq!(c, "๐ŸŽ๐ŸŠ๐ŸŒ"); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_concat(items: Vec) { let c: CompactString = items.concat_compact(); let s: String = items.concat(); assert_eq!(c, s); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_u8(val: u8) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_i8(val: i8) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_u16(val: u16) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_i16(val: i16) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_u32(val: u32) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_i32(val: i32) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_u64(val: u64) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_i64(val: i64) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_usize(val: usize) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_isize(val: isize) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_u128(val: u128) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_i128(val: i128) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_u8( #[strategy((1..=u8::MAX).prop_map(|x| unsafe { num::NonZeroU8::new_unchecked(x)} ))] val: num::NonZeroU8, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_u16( #[strategy((1..=u16::MAX).prop_map(|x| unsafe { num::NonZeroU16::new_unchecked(x)} ))] val: num::NonZeroU16, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_u32( #[strategy((1..=u32::MAX).prop_map(|x| unsafe { num::NonZeroU32::new_unchecked(x)} ))] val: num::NonZeroU32, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_u64( #[strategy((1..=u64::MAX).prop_map(|x| unsafe { num::NonZeroU64::new_unchecked(x)} ))] val: num::NonZeroU64, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_u128( #[strategy((1..=u128::MAX).prop_map(|x| unsafe { num::NonZeroU128::new_unchecked(x)} ))] val: num::NonZeroU128, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_usize( #[strategy((1..=usize::MAX).prop_map(|x| unsafe { num::NonZeroUsize::new_unchecked(x)} ))] val: num::NonZeroUsize, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_i8( #[strategy((1..=u8::MAX).prop_map(|x| unsafe { num::NonZeroI8::new_unchecked(x as i8)} ))] val: num::NonZeroI8, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_i16( #[strategy((1..=u16::MAX).prop_map(|x| unsafe { num::NonZeroI16::new_unchecked(x as i16)} ))] val: num::NonZeroI16, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_i32( #[strategy((1..=u32::MAX).prop_map(|x| unsafe { num::NonZeroI32::new_unchecked(x as i32)} ))] val: num::NonZeroI32, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_i64( #[strategy((1..=u64::MAX).prop_map(|x| unsafe { num::NonZeroI64::new_unchecked(x as i64)} ))] val: num::NonZeroI64, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_i128( #[strategy((1..=u128::MAX).prop_map(|x| unsafe { num::NonZeroI128::new_unchecked(x as i128)} ))] val: num::NonZeroI128, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } #[proptest] #[cfg_attr(miri, ignore)] fn proptest_to_compact_string_non_zero_isize( #[strategy((1..=usize::MAX).prop_map(|x| unsafe { num::NonZeroIsize::new_unchecked(x as isize)} ))] val: num::NonZeroIsize, ) { let compact = val.to_compact_string(); prop_assert_eq!(compact.as_str(), val.to_string()); } } compact_str-0.8.0/src/unicode_data.rs000064400000000000000000000163401046102023000157270ustar 00000000000000//! Adapted from //! #[inline(always)] fn skip_search( needle: u32, short_offset_runs: &[u32; SOR], offsets: &[u8; OFFSETS], ) -> bool { // Note that this *cannot* be past the end of the array, as the last // element is greater than char::MAX (the largest possible needle). // // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct // location cannot be past it, so Err(idx) != length either. // // This means that we can avoid bounds checking for the accesses below, too. let last_idx = match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) { Ok(idx) => idx + 1, Err(idx) => idx, }; let mut offset_idx = decode_length(short_offset_runs[last_idx]); let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { decode_length(*next) - offset_idx } else { offsets.len() - offset_idx }; let prev = last_idx .checked_sub(1) .map(|prev| decode_prefix_sum(short_offset_runs[prev])) .unwrap_or(0); let total = needle - prev; let mut prefix_sum = 0; for _ in 0..(length - 1) { let offset = offsets[offset_idx]; prefix_sum += offset as u32; if prefix_sum > total { break; } offset_idx += 1; } offset_idx % 2 == 1 } #[inline(always)] const fn decode_prefix_sum(short_offset_run_header: u32) -> u32 { short_offset_run_header & ((1 << 21) - 1) } #[inline(always)] const fn decode_length(short_offset_run_header: u32) -> usize { (short_offset_run_header >> 21) as usize } #[rustfmt::skip] pub mod case_ignorable { static SHORT_OFFSET_RUNS: [u32; 35] = [ 688, 44045149, 572528402, 576724925, 807414908, 878718981, 903913493, 929080568, 933275148, 937491230, 1138818560, 1147208189, 1210124160, 1222707713, 1235291428, 1260457643, 1264654383, 1499535675, 1507925040, 1566646003, 1629566000, 1650551536, 1658941263, 1671540720, 1688321181, 1700908800, 1709298023, 1717688832, 1738661888, 1763828398, 1797383403, 1805773008, 1809970171, 1819148289, 1824457200, ]; static OFFSETS: [u8; 875] = [ 39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2, 1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 6, 8, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31, 49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2, 80, 3, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 3, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 0, 17, 6, 15, 0, 5, 59, 7, 9, 4, 0, 1, 63, 17, 64, 2, 1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0, ]; #[inline(always)] pub fn lookup(c: char) -> bool { super::skip_search( c as u32, &SHORT_OFFSET_RUNS, &OFFSETS, ) } } #[rustfmt::skip] pub mod cased { static SHORT_OFFSET_RUNS: [u32; 22] = [ 4256, 115348384, 136322176, 144711446, 163587254, 320875520, 325101120, 350268208, 392231680, 404815649, 413205504, 421595008, 467733632, 484513952, 492924480, 497144832, 501339814, 578936576, 627171376, 639756544, 643952944, 649261450, ]; static OFFSETS: [u8; 315] = [ 65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5, 96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 9, 7, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132, 102, 3, 4, 1, 59, 5, 2, 1, 1, 1, 5, 24, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, 51, 13, 51, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0, ]; #[inline(always)] pub fn lookup(c: char) -> bool { super::skip_search( c as u32, &SHORT_OFFSET_RUNS, &OFFSETS, ) } }