vec-strings-0.4.8/.cargo_vcs_info.json0000644000000001360000000000100133110ustar { "git": { "sha1": "410b86675678b7d87368fc88237788a5a11cdb14" }, "path_in_vcs": "" }vec-strings-0.4.8/.github/workflows/rust.yml000064400000000000000000000036371046102023000172270ustar 00000000000000name: Rust env: CARGO_TERM_COLOR: always on: push: paths-ignore: - 'README.md' - 'LICENSE' - '.gitignore' pull_request: paths-ignore: - 'README.md' - 'LICENSE' - '.gitignore' jobs: check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ target/ key: ${{ github.event.repository.name }}-${{ runner.os }}-cargo-check-${{ hashFiles('**/Cargo.lock') }}-v2 - name: Run check run: cargo check --all --all-features check_format: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Check format run: cargo fmt --all -- --check Run_clippy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/cache@v2 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ target/ key: ${{ github.event.repository.name }}-${{ runner.os }}-cargo-clippy-${{ hashFiles('**/Cargo.lock') }}-v2 - name: Run clippy run: cargo clippy --all --all-features build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Install latest nightly uses: actions-rs/toolchain@v1 with: toolchain: nightly override: true components: miri - uses: actions/cache@v2 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ target/ key: ${{ github.event.repository.name }}-${{ runner.os }}-cargo-test-${{ hashFiles('**/Cargo.lock') }}-v2 - uses: taiki-e/install-action@nextest - name: Run tests run: ./run_tests.sh vec-strings-0.4.8/.gitignore000064400000000000000000000006311046102023000140710ustar 00000000000000# Generated by Cargo # will have compiled files and executables /target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk # Added by cargo # # already existing elements were commented out /target #Cargo.lock vec-strings-0.4.8/Cargo.toml0000644000000020410000000000100113040ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "vec-strings" version = "0.4.8" authors = ["Jiahao XU "] description = "Store any string efficiently in an immutable way." readme = "README.md" keywords = [ "string", "utilities", ] categories = ["data-structures"] license = "MIT" repository = "https://github.com/NobodyXu/vec-strings" [dependencies.serde] version = "1.0" optional = true [dependencies.thin-vec] version = "0.2.4" [dev-dependencies.once_cell] version = "1.8.0" [dev-dependencies.serde_json] version = "1.0" [dev-dependencies.serde_test] version = "1.0" vec-strings-0.4.8/Cargo.toml.orig000064400000000000000000000007401046102023000147710ustar 00000000000000[package] name = "vec-strings" version = "0.4.8" edition = "2018" authors = ["Jiahao XU "] license = "MIT" description = "Store any string efficiently in an immutable way." repository = "https://github.com/NobodyXu/vec-strings" keywords = ["string", "utilities"] categories = ["data-structures"] [dependencies] serde = { version = "1.0", optional = true } thin-vec = "0.2.4" [dev-dependencies] serde_json = "1.0" serde_test = "1.0" once_cell = "1.8.0" vec-strings-0.4.8/LICENSE000064400000000000000000000020521046102023000131050ustar 00000000000000MIT License Copyright (c) 2021 Jiahao XU Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. vec-strings-0.4.8/README.md000064400000000000000000000012201046102023000133530ustar 00000000000000# vec-strings [![Rust](https://github.com/NobodyXu/vec-strings/actions/workflows/rust.yml/badge.svg)](https://github.com/NobodyXu/vec-strings/actions/workflows/rust.yml) [![crate.io downloads](https://img.shields.io/crates/d/vec-strings)](https://crates.io/crates/vec-strings) [![crate.io version](https://img.shields.io/crates/v/vec-strings)](https://crates.io/crates/vec-strings) [![docs](https://docs.rs/vec-strings/badge.svg)](https://docs.rs/vec-strings) More compact `Vec>` and `(Box, Box)` This crate contains feature `serde`, which enables serialization/deserialization support. ## How to run tests ``` ./run_test.sh ``` vec-strings-0.4.8/run_tests.sh000075500000000000000000000007011046102023000144640ustar 00000000000000#!/bin/bash set -euxo pipefail cd "$(dirname "$(realpath "$0")")" export RUST_TEST_THREADS=1 rep=$(seq 1 3) for _ in $rep; do cargo nextest run --all-features --nocapture done export RUSTFLAGS='-Zsanitizer=address' export RUSTDOCFLAGS="$RUSTFLAGS" for _ in $rep; do cargo +nightly nextest run --all-features --nocapture done #export MIRIFLAGS="-Zmiri-disable-isolation" exec cargo +nightly miri nextest run --all-features --nocapture vec-strings-0.4.8/src/lib.rs000064400000000000000000000004771046102023000140140ustar 00000000000000//! This crate contains feature `serde`, which enables serialization/deserialization //! support. #[cfg(feature = "serde")] mod serde; mod small_array_box; mod strings; mod strings_no_index; mod two_strs; pub use small_array_box::SmallArrayBox; pub use strings::*; pub use strings_no_index::*; pub use two_strs::*; vec-strings-0.4.8/src/serde.rs000064400000000000000000000252751046102023000143530ustar 00000000000000use super::small_array_box::*; use super::{Strings, StringsIter, StringsNoIndex, StringsNoIndexIter, TwoStrs}; use std::convert::TryInto; use std::fmt; use std::iter::Iterator; use std::marker::PhantomData; use std::mem::MaybeUninit; use std::ops::Deref; use std::ops::DerefMut; use serde::de::{Deserialize, Deserializer, Error, SeqAccess, Visitor}; use serde::ser::{Serialize, SerializeTuple, Serializer}; macro_rules! impl_ser_de_for_strings { ($Strings:ident) => { impl Serialize for $Strings { fn serialize(&self, serializer: S) -> Result { serializer.collect_seq(self) } } impl<'de> Deserialize<'de> for $Strings { fn deserialize>(deserializer: D) -> Result { struct StringsVisitor; impl<'de> Visitor<'de> for StringsVisitor { type Value = $Strings; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "A u32 length and &[str]") } fn visit_seq(self, mut seq: V) -> Result where V: SeqAccess<'de>, { let len = seq.size_hint().unwrap_or(10); let mut values = $Strings::with_capacity(len.try_into().map_err(|_| { V::Error::invalid_length(len, &"Expect u32 length") })?); while let Some(value) = seq.next_element()? { values.push(value); } Ok(values) } } deserializer.deserialize_seq(StringsVisitor) } } }; } impl_ser_de_for_strings!(Strings); impl_ser_de_for_strings!(StringsNoIndex); macro_rules! impl_Serialize_for_iter { ($Iter:ident) => { /// The iterator is formatted as (&str, ...) impl Serialize for $Iter<'_> { fn serialize(&self, serializer: S) -> Result { let mut tuple_serializer = serializer.serialize_tuple(self.size_hint().0)?; for string in self.clone() { tuple_serializer.serialize_element(string)?; } tuple_serializer.end() } } }; } impl_Serialize_for_iter!(StringsIter); impl_Serialize_for_iter!(StringsNoIndexIter); /// Format: (&str, &str) impl Serialize for TwoStrs { fn serialize(&self, serializer: S) -> Result { self.get().serialize(serializer) } } /// Format: (&str, &str) impl<'de> Deserialize<'de> for TwoStrs { fn deserialize>(deserializer: D) -> Result { let (s1, s2) = <(&'de str, &'de str)>::deserialize(deserializer)?; Ok(Self::new(s1, s2)) } } impl Serialize for SmallArrayBox { fn serialize(&self, serializer: S) -> Result { self.deref().serialize(serializer) } } impl<'de, T: Deserialize<'de>, const INLINE_LEN: usize> Deserialize<'de> for SmallArrayBox { fn deserialize>(deserializer: D) -> Result { struct SmallArrayBoxVisitor(PhantomData); impl<'de, T: Deserialize<'de>, const INLINE_LEN: usize> Visitor<'de> for SmallArrayBoxVisitor { type Value = SmallArrayBox; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "Expected slice") } fn visit_seq(self, mut seq: V) -> Result where V: SeqAccess<'de>, { let size_hint = seq.size_hint(); if let Some(len) = size_hint { if len <= INLINE_LEN { let mut this = SmallArrayBox::uninit_inline_storage(); let inline_storage = unsafe { this.storage.inline_storage.deref_mut() }; while let Some(value) = seq.next_element()? { inline_storage[this.len] = MaybeUninit::new(value); this.len += 1; } return Ok(this); } } let mut values = Vec::with_capacity(size_hint.unwrap_or(10)); while let Some(value) = seq.next_element()? { values.push(value); } Ok(values.into()) } } deserializer.deserialize_seq(SmallArrayBoxVisitor(PhantomData)) } } #[cfg(test)] mod tests { const INLINE_LEN: usize = 8; use super::{Strings, StringsNoIndex, TwoStrs}; type SmallArrayBox = super::SmallArrayBox; use std::error::Error; use std::fmt::{self, Display}; use std::mem::MaybeUninit; use once_cell::sync::OnceCell; use serde_test::{assert_ser_tokens, assert_tokens, Token}; use serde::de::{self, value::SeqAccessDeserializer, Deserialize, DeserializeSeed, SeqAccess}; // Test using serde_test #[test] fn test_ser_de_empty_serde_strings() { assert_tokens( &Strings::new(), &[Token::Seq { len: Some(0) }, Token::SeqEnd], ); } #[test] fn test_ser_de_empty_serde_strings_no_index() { assert_tokens( &StringsNoIndex::new(), &[Token::Seq { len: Some(0) }, Token::SeqEnd], ); } macro_rules! assert_ser_de_serde { ($strings:expr) => { let strings = $strings; // Test Strings let mut tokens = vec![Token::Seq { len: Some(strings.len() as usize), }]; for string in strings { tokens.push(Token::BorrowedStr(string)); } tokens.push(Token::SeqEnd); assert_tokens(strings, &tokens); // Test StringsIter tokens[0] = Token::Tuple { len: strings.len() as usize, }; *tokens.last_mut().unwrap() = Token::TupleEnd; assert_ser_tokens(&strings.iter(), &tokens); }; } fn get_strings() -> &'static Strings { static STRINGS: OnceCell = OnceCell::new(); STRINGS.get_or_init(|| { let mut strings = Strings::new(); for i in 0..1024 { strings.push(&i.to_string()); } strings }) } #[test] fn test_ser_de_serde_strings() { assert_ser_de_serde!(get_strings()); } fn get_strings_no_index() -> &'static StringsNoIndex { static STRINGS: OnceCell = OnceCell::new(); STRINGS.get_or_init(|| { let mut strings = StringsNoIndex::new(); for i in 0..1024 { strings.push(&i.to_string()); } strings }) } #[test] fn test_ser_de_serde_strings_no_index() { assert_ser_de_serde!(get_strings_no_index()); } // Test using serde_json macro_rules! assert_ser_de_json { ($strings:expr, $strings_type:ident) => { let strings = $strings; let json = serde_json::to_string(strings).unwrap(); assert_eq!( serde_json::from_str::<'_, $strings_type>(&json).unwrap(), *strings ); }; } #[test] fn test_ser_de_serde_json_strings() { assert_ser_de_json!(get_strings(), Strings); } #[test] fn test_ser_de_serde_json_strings_no_index() { assert_ser_de_json!(get_strings_no_index(), StringsNoIndex); } #[test] fn test_ser_de_two_strs() { let s1 = "1234<<"; let s2 = "234a"; let two_strs = TwoStrs::new(s1, s2); assert_tokens( &two_strs, &[ Token::Tuple { len: 2 }, Token::BorrowedStr(s1), Token::BorrowedStr(s2), Token::TupleEnd, ], ); } #[test] fn test_ser_de_serde_json_two_strs() { let s1 = "1234<<"; let s2 = "234a"; let two_strs = TwoStrs::new(s1, s2); assert_ser_de_json!(&two_strs, TwoStrs); } #[test] fn test_ser_de_small_array_box_empty() { let tokens = [Token::Seq { len: Some(0) }, Token::SeqEnd]; let array = SmallArrayBox::new([]); assert_tokens(&array, &tokens); let array = SmallArrayBox::new_empty(); assert_tokens(&array, &tokens); } #[test] fn test_ser_de_small_array_box() { let vec: Vec = (0..100).collect(); let mut tokens = Vec::new(); for len in 0..vec.len() { let slice = &vec[..len]; let array = SmallArrayBox::new(slice.iter().copied()); tokens.reserve_exact(len + 2); tokens.push(Token::Seq { len: Some(len) }); for i in 0..(len as u8) { tokens.push(Token::U8(i)); } tokens.push(Token::SeqEnd); assert_tokens(&array, &tokens); tokens.clear(); } } #[test] fn test_small_array_box_de_error() { #[derive(Debug)] struct DummyError; impl Display for DummyError { fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { Ok(()) } } impl de::Error for DummyError { fn custom(_msg: T) -> Self { Self } } impl Error for DummyError {} struct ErrSeqAccess(usize); impl<'de> SeqAccess<'de> for ErrSeqAccess { type Error = DummyError; fn next_element_seed(&mut self, _seed: T) -> Result, Self::Error> where T: DeserializeSeed<'de>, { if self.0 > 0 { self.0 -= 1; Ok(Some(unsafe { MaybeUninit::zeroed().assume_init() })) } else { Err(DummyError) } } fn size_hint(&self) -> Option { Some(self.0) } } for len in 0..INLINE_LEN { let deserializer = SeqAccessDeserializer::new(ErrSeqAccess(len)); assert!(SmallArrayBox::deserialize(deserializer).is_err()); } } } vec-strings-0.4.8/src/small_array_box.rs000064400000000000000000000202601046102023000164140ustar 00000000000000use std::mem::{ManuallyDrop, MaybeUninit}; use std::ptr::NonNull; use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::iter::IntoIterator; use std::iter::{ExactSizeIterator, Iterator}; use std::fmt::{self, Debug}; use std::ops::{Deref, DerefMut}; use std::cmp::{Eq, PartialEq}; pub(crate) union SmallArrayBoxInner { ptr: NonNull, pub(crate) inline_storage: ManuallyDrop<[MaybeUninit; INLINE_LEN]>, } /// * `INLINE_LEN` - Number of elements that can be stored inline. pub struct SmallArrayBox { pub(crate) storage: SmallArrayBoxInner, pub(crate) len: usize, } unsafe impl Send for SmallArrayBox {} unsafe impl Sync for SmallArrayBox {} impl Default for SmallArrayBox { fn default() -> Self { Self::new_empty() } } impl From> for SmallArrayBox { fn from(boxed: Box<[T]>) -> Self { Self::from_box(boxed) } } impl From> for SmallArrayBox { fn from(vec: Vec) -> Self { if vec.len() <= INLINE_LEN { Self::new(vec) } else { vec.into_boxed_slice().into() } } } impl From<&[T]> for SmallArrayBox { fn from(slice: &[T]) -> Self { Self::new(slice.iter().cloned()) } } impl Clone for SmallArrayBox { fn clone(&self) -> Self { Self::new(self.iter().cloned()) } } impl SmallArrayBox { pub(crate) fn uninit_inline_storage() -> Self { Self { storage: SmallArrayBoxInner { // Safety: // // It is safe because the array contains `MaybeUninit`. inline_storage: ManuallyDrop::new(unsafe { MaybeUninit::uninit().assume_init() }), }, len: 0, } } pub const fn new_empty() -> Self { Self { storage: SmallArrayBoxInner { ptr: NonNull::dangling(), }, len: 0, } } pub fn new(iter: impl IntoIterator) -> Self where I: Iterator + ExactSizeIterator, { let iter = iter.into_iter(); let len = iter.len(); if len <= INLINE_LEN { let mut this = Self::uninit_inline_storage(); let inline_storage = unsafe { this.storage.inline_storage.deref_mut() }; iter.zip(inline_storage).for_each(|(src, dst)| { *dst = MaybeUninit::new(src); }); this.len = len; this } else { let vec: Vec = iter.collect(); let array_ptr = Box::into_raw(vec.into_boxed_slice()); let slice = unsafe { &mut *array_ptr }; let ptr = unsafe { NonNull::new_unchecked(slice.as_mut_ptr()) }; Self { storage: SmallArrayBoxInner { ptr }, len, } } } pub fn from_box(boxed: Box<[T]>) -> Self { let len = boxed.len(); if len <= INLINE_LEN { let vec: Vec = boxed.into(); Self::new(vec) } else { let array_ptr = Box::into_raw(boxed); let slice = unsafe { &mut *array_ptr }; let ptr = unsafe { NonNull::new_unchecked(slice.as_mut_ptr()) }; debug_assert_eq!(slice.len(), len); Self { storage: SmallArrayBoxInner { ptr }, len, } } } pub fn into_boxed_slice(self) -> Box<[T]> { let len = self.len; let mut this = ManuallyDrop::new(self); if len <= INLINE_LEN { let inline_storage = unsafe { &mut this.storage.inline_storage }; let mut vec = Vec::with_capacity(len); for elem in inline_storage[..len].iter_mut() { let ptr = elem.as_mut_ptr(); vec.push(unsafe { ptr.read() }); } debug_assert_eq!(vec.len(), len); vec.into_boxed_slice() } else { let ptr = unsafe { this.storage.ptr }.as_ptr(); let slice = unsafe { from_raw_parts_mut(ptr, len) }; unsafe { Box::from_raw(slice) } } } } impl Deref for SmallArrayBox { type Target = [T]; fn deref(&self) -> &Self::Target { let len = self.len; if len <= INLINE_LEN { let inline_storage = unsafe { self.storage.inline_storage.deref() }; unsafe { &*(&inline_storage[..len] as *const _ as *const [T]) } } else { let ptr = unsafe { self.storage.ptr }.as_ptr(); unsafe { from_raw_parts(ptr, len) } } } } impl DerefMut for SmallArrayBox { fn deref_mut(&mut self) -> &mut Self::Target { let len = self.len; if len <= INLINE_LEN { let inline_storage = unsafe { self.storage.inline_storage.deref_mut() }; unsafe { &mut *(&mut inline_storage[..len] as *mut _ as *mut [T]) } } else { let ptr = unsafe { self.storage.ptr }.as_ptr(); unsafe { from_raw_parts_mut(ptr, len) } } } } impl Drop for SmallArrayBox { fn drop(&mut self) { let len = self.len; if len <= INLINE_LEN { let inline_storage = unsafe { self.storage.inline_storage.deref_mut() }; inline_storage[..len].iter_mut().for_each(|elem| { let ptr = elem.as_mut_ptr(); unsafe { ptr.drop_in_place(); } }); } else { let ptr = unsafe { self.storage.ptr }.as_ptr(); let slice = unsafe { from_raw_parts_mut(ptr, len) }; drop(unsafe { Box::from_raw(slice) }); } } } impl Debug for SmallArrayBox { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:#?}", self.deref()) } } impl PartialEq for SmallArrayBox { fn eq(&self, other: &Self) -> bool { self.deref().eq(other.deref()) } } impl Eq for SmallArrayBox {} #[cfg(test)] mod tests { type SmallArrayBox = super::SmallArrayBox; use std::ops::{Deref, DerefMut}; use std::ptr; fn assert_ptr_eq(x: *const [u8], y: *const [u8]) { assert!(ptr::eq(x, y)); } #[test] fn test_new_empty() { let mut empty_array = SmallArrayBox::new_empty(); let empty: &[u8] = &[]; assert_eq!(empty_array.deref(), empty); assert_eq!(empty_array.deref_mut(), empty); assert_ptr_eq(empty_array.deref(), empty_array.deref_mut()); let boxed = empty_array.into_boxed_slice(); assert_eq!(&*boxed, empty); } #[test] fn test_new() { let vec: Vec = (0..100).collect(); for len in 0..vec.len() { let slice = &vec[..len]; let mut array = SmallArrayBox::new(slice.iter().copied()); assert_eq!(array.deref(), slice); assert_eq!(array.deref_mut(), slice); assert_ptr_eq(array.deref(), array.deref_mut()); let boxed = array.into_boxed_slice(); assert_eq!(&*boxed, slice); } } #[test] fn test_from_box() { let vec: Vec = (0..100).collect(); for len in 0..vec.len() { let slice = &vec[..len]; let vec: Vec = slice.to_vec(); let mut array = SmallArrayBox::from_box(vec.into_boxed_slice()); assert_eq!(array.deref(), slice); assert_eq!(array.deref_mut(), slice); assert_ptr_eq(array.deref(), array.deref_mut()); let boxed = array.into_boxed_slice(); assert_eq!(&*boxed, slice); } } } vec-strings-0.4.8/src/strings.rs000064400000000000000000000111461046102023000147320ustar 00000000000000use std::convert::TryInto; use std::hint::unreachable_unchecked; use std::iter::{ExactSizeIterator, IntoIterator, Iterator}; use std::slice; use std::str; use thin_vec::ThinVec; /// Store any string efficiently in an immutable way. /// /// Can store at most `u32::MAX` strings, the accumulated length /// of these strings can be at most `u32::MAX`. #[derive(Debug, Default, Eq, PartialEq, Clone, Hash)] pub struct Strings { strs: ThinVec, ends: ThinVec, } impl Strings { #[inline(always)] pub fn new() -> Self { Self::default() } /// * `len` - number of strings pub fn with_capacity(len: u32) -> Self { let mut strings = Self::default(); strings.reserve(len); strings } /// **Strings can contain at most `u32::MAX` strings** pub fn push(&mut self, s: &str) { self.strs.extend_from_slice(s.as_bytes()); self.ends.push( self.strs .len() .try_into() .expect("Strings cannot contain more than u32::MAX strings"), ); } /// Accumulate length of all strings. #[inline(always)] pub fn strs_len(&self) -> u32 { match self.strs.len().try_into() { Ok(len) => len, Err(_err) => unsafe { unreachable_unchecked() }, } } #[inline(always)] pub fn len(&self) -> u32 { match self.ends.len().try_into() { Ok(len) => len, Err(_err) => unsafe { unreachable_unchecked() }, } } #[inline(always)] pub fn is_empty(&self) -> bool { self.len() == 0 } #[inline(always)] pub fn reserve(&mut self, strs_cnt: u32) { self.ends.reserve(strs_cnt as usize); } #[inline(always)] pub fn reserve_strs(&mut self, cnt: usize) { self.strs.reserve(cnt); } pub fn shrink_to_fit(&mut self) { self.strs.shrink_to_fit(); self.ends.shrink_to_fit(); } #[inline(always)] pub fn iter(&self) -> StringsIter<'_> { StringsIter { strings: self, ends_iter: self.ends.iter(), start: 0, } } pub fn get(&self, index: u32) -> Option<&str> { let end = *self.ends.get(index as usize)?; let start = if index == 0 { 0 } else { self.ends[(index - 1) as usize] }; Some(self.get_str_impl(start, end)) } #[inline(always)] fn get_str_impl(&self, start: u32, end: u32) -> &str { unsafe { str::from_utf8_unchecked(&self.strs[(start as usize)..(end as usize)]) } } pub fn as_str(&self) -> &str { self.get_str_impl(0, self.strs_len()) } pub fn into_str(self) -> String { let mut vec = Vec::with_capacity(self.strs.len()); vec.extend_from_slice(&self.strs); unsafe { String::from_utf8_unchecked(vec) } } } impl<'a> IntoIterator for &'a Strings { type Item = &'a str; type IntoIter = StringsIter<'a>; #[inline(always)] fn into_iter(self) -> Self::IntoIter { self.iter() } } #[derive(Clone, Debug)] pub struct StringsIter<'a> { strings: &'a Strings, ends_iter: slice::Iter<'a, u32>, start: u32, } impl<'a> Iterator for StringsIter<'a> { type Item = &'a str; fn next(&mut self) -> Option { let start = self.start; let end = *self.ends_iter.next()?; self.start = end; Some(self.strings.get_str_impl(start, end)) } fn size_hint(&self) -> (usize, Option) { let len = self.ends_iter.len(); (len, Some(len)) } } impl ExactSizeIterator for StringsIter<'_> {} #[cfg(test)] mod tests { use super::Strings; use std::convert::TryInto; fn assert_strs_in(strs: &Strings, input_strs: &Vec) { for (string, input_str) in strs.iter().zip(input_strs) { assert_eq!(string, input_str); } } #[test] fn test() { let mut strs = Strings::new(); let input_strs: Vec = (0..256).map(|n| n.to_string()).collect(); assert!(strs.is_empty()); for (i, input_str) in input_strs.iter().enumerate() { strs.push(input_str); assert_eq!(strs.len() as usize, i + 1); assert_strs_in(&strs, &input_strs); } assert!(input_strs.iter().eq(strs.iter())); for (i, input_str) in input_strs.iter().enumerate() { assert_eq!(strs.get(i.try_into().unwrap()).unwrap(), input_str); } let input_str = input_strs.concat(); assert_eq!(strs.as_str(), input_str); assert_eq!(strs.into_str(), input_str); } } vec-strings-0.4.8/src/strings_no_index.rs000064400000000000000000000107061046102023000166160ustar 00000000000000use std::convert::TryInto; use std::iter::{ExactSizeIterator, IntoIterator, Iterator}; use std::str; use thin_vec::ThinVec; /// Store any string efficiently in an immutable way. /// /// Can store at most `u32::MAX` strings and only provides /// `StringsNoIndexIter` and does not provide arbitary indexing. #[derive(Debug, Default, Eq, PartialEq, Clone, Hash)] pub struct StringsNoIndex { strs: ThinVec, } impl StringsNoIndex { pub fn new() -> Self { Self::default() } /// * `len` - number of strings /// /// NOTE that this function does nothing and is defined just to be compatible /// with `Strings`. pub fn with_capacity(_len: u32) -> Self { Self::new() } fn set_len(&mut self, new_len: u32) { self.strs[..4].copy_from_slice(&new_len.to_ne_bytes()); } pub fn len(&self) -> u32 { if self.is_empty() { 0 } else { u32::from_ne_bytes(self.strs[..4].try_into().unwrap()) } } pub fn is_empty(&self) -> bool { self.strs.is_empty() } /// * `s` - must not contain null byte. pub fn push(&mut self, s: &str) { if self.is_empty() { let len: u32 = 1; self.strs.extend_from_slice(&len.to_ne_bytes()); } else { let len = self.len(); if len == u32::MAX { panic!( "StringsNoIndex cannot contain more than u32::MAX {} elements", u32::MAX ); } self.set_len(len + 1); } self.strs .extend(s.as_bytes().iter().copied().filter(|byte| *byte != b'\0')); self.strs.push(0); } /// Accumulate length of all strings. #[inline(always)] pub fn strs_len(&self) -> usize { self.strs.len() } #[inline(always)] pub fn reserve_strs(&mut self, cnt: usize) { self.strs.reserve(cnt); } pub fn shrink_to_fit(&mut self) { self.strs.shrink_to_fit(); } #[inline(always)] pub fn iter(&self) -> StringsNoIndexIter<'_> { let slice = if self.is_empty() { &[] } else { &self.strs[4..] }; StringsNoIndexIter::new(slice, self.len()) } } impl<'a> IntoIterator for &'a StringsNoIndex { type Item = &'a str; type IntoIter = StringsNoIndexIter<'a>; #[inline(always)] fn into_iter(self) -> Self::IntoIter { self.iter() } } #[derive(Clone, Debug)] pub struct StringsNoIndexIter<'a>(&'a [u8], u32); impl<'a> StringsNoIndexIter<'a> { fn new(strs: &'a [u8], len: u32) -> Self { Self(strs, len) } } impl<'a> Iterator for StringsNoIndexIter<'a> { type Item = &'a str; fn next(&mut self) -> Option { if self.0.is_empty() { return None; } self.1 -= 1; let pos = self.0.iter().position(|byte| *byte == 0).unwrap(); let slice = &self.0[..pos]; self.0 = &self.0[(pos + 1)..]; Some(unsafe { str::from_utf8_unchecked(slice) }) } fn size_hint(&self) -> (usize, Option) { let len = self.1 as usize; (len, Some(len)) } } impl ExactSizeIterator for StringsNoIndexIter<'_> {} #[cfg(test)] mod tests { use super::StringsNoIndex; fn assert_strs_in(strs: &StringsNoIndex, input_strs: &Vec) { for (string, input_str) in strs.iter().zip(input_strs) { assert_eq!(string, input_str); } } #[test] fn test() { let mut strs = StringsNoIndex::new(); let input_strs: Vec = (0..256).map(|n| n.to_string()).collect(); assert!(strs.is_empty()); for (i, input_str) in input_strs.iter().enumerate() { strs.push(input_str); assert_eq!(strs.len() as usize, i + 1); assert_strs_in(&strs, &input_strs); } assert!(!strs.is_empty()); assert!(input_strs.iter().eq(strs.iter())); } #[test] fn test_adding_empty_strs() { let mut strs = StringsNoIndex::new(); assert!(strs.is_empty()); for i in 0..10 { strs.push(""); assert_eq!(strs.len() as usize, i + 1); } assert!(!strs.is_empty()); strs.push("12345"); for (i, string) in strs.iter().enumerate() { if i < 10 { assert_eq!(string, ""); } else { assert_eq!(string, "12345"); } } } } vec-strings-0.4.8/src/two_strs.rs000064400000000000000000000041611046102023000151240ustar 00000000000000use core::fmt; use core::str; /// Box of two strings. /// Store two strings efficiently in an immutable way. #[derive(Debug, Eq, PartialEq, Clone, Hash)] pub struct TwoStrs(Box<[u8]>); impl From<(&str, &str)> for TwoStrs { fn from((s1, s2): (&str, &str)) -> Self { Self::new(s1, s2) } } impl TwoStrs { /// * `s1` - must not contain null byte. /// * `s2` - must not contain null byte. pub fn new(s1: &str, s2: &str) -> Self { let iter1 = s1.as_bytes().iter().copied().filter(|byte| *byte != b'\0'); let iter2 = s2.as_bytes().iter().copied().filter(|byte| *byte != b'\0'); let len1 = iter1.clone().count(); let len2 = iter2.clone().count(); let mut bytes = Vec::with_capacity(len1 + 1 + len2); if len1 == s1.len() { bytes.extend_from_slice(s1.as_bytes()); } else { bytes.extend(iter1); } bytes.push(0); if len2 == s2.len() { bytes.extend_from_slice(s2.as_bytes()); } else { bytes.extend(iter2); } Self(bytes.into_boxed_slice()) } pub fn get(&self) -> (&str, &str) { let pos = self.0.iter().position(|byte| *byte == 0).unwrap(); ( unsafe { str::from_utf8_unchecked(&self.0[..pos]) }, unsafe { str::from_utf8_unchecked(&self.0[pos + 1..]) }, ) } } impl fmt::Display for TwoStrs { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { let (s1, s2) = self.get(); write!(f, "({}, {})", s1, s2) } } #[cfg(test)] mod tests { use super::TwoStrs; fn assert(s1: &str, s2: &str) { let two_strs = TwoStrs::new(s1, s2); assert_eq!(two_strs.get(), (s1, s2)); } #[test] fn test() { assert("", ""); assert("12", ""); assert("", "12"); assert("12", "12"); assert("12", "2333"); assert("acdbd3", "2333"); } #[allow(clippy::octal_escapes)] #[test] fn test_null() { let two_strs = TwoStrs::new("1\023d\0", "\023e\0"); assert_eq!(two_strs.get(), ("123d", "23e")); } }