bytes-utils-0.1.4/.cargo_vcs_info.json0000644000000001360000000000100133220ustar { "git": { "sha1": "119e5f90ecb3c7786ca87b325ee10ee908e43247" }, "path_in_vcs": "" }bytes-utils-0.1.4/.github/codecov.yml000064400000000000000000000002151046102023000156150ustar 00000000000000comment: layout: "diff, flags, files" require_changes: true coverage: status: project: default: informational: true bytes-utils-0.1.4/.github/dependabot.yml000064400000000000000000000005201046102023000162770ustar 00000000000000# Dependabot dependency version checks / updates version: 2 updates: - package-ecosystem: "github-actions" # Workflow files stored in the # default location of `.github/workflows` directory: "/" schedule: interval: "daily" - package-ecosystem: "cargo" directory: "/" schedule: interval: "daily" bytes-utils-0.1.4/.github/workflows/audit.yaml000064400000000000000000000005341046102023000175030ustar 00000000000000name: Security audit on: pull_request: push: branches: - master schedule: - cron: '0 0 * * 0' jobs: security_audit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions-rs/audit-check@35b7b53b1e25b55642157ac01b4adceb5b9ebef3 with: token: ${{ secrets.GITHUB_TOKEN }} bytes-utils-0.1.4/.github/workflows/coverage.yaml000064400000000000000000000020241046102023000201640ustar 00000000000000name: Test coverage on: push: branches: - main pull_request: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: full jobs: coverage: name: Coverage runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: nightly profile: minimal default: true - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Run cargo-tarpaulin uses: actions-rs/tarpaulin@v0.1 with: args: '--all-features --run-types Doctests,Tests' timeout: 120 - name: Upload to codecov.io uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d with: token: ${{ secrets.CODECOV_TOKEN }} - name: Archive code coverage results uses: actions/upload-artifact@v3 with: name: code-coverage-report path: cobertura.xml retention-days: 30 bytes-utils-0.1.4/.github/workflows/test.yaml000064400000000000000000000064431046102023000173610ustar 00000000000000name: test on: push: pull_request: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: full jobs: test: name: Build & test strategy: fail-fast: false matrix: os: - ubuntu-latest - macos-latest - windows-latest rust: - stable - beta - nightly runs-on: ${{ matrix.os }} steps: - name: checkout uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: ${{ matrix.rust }} default: true profile: minimal - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Build & test env: RUST_VERSION: ${{ matrix.rust }} OS: ${{ matrix.os }} RUSTFLAGS: -D warnings run: cargo test no_std_builds: name: Build no_std targets runs-on: ubuntu-latest strategy: matrix: target: - thumbv7m-none-eabi rust: - stable - beta - nightly steps: - name: checkout uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} default: true profile: minimal - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Build run: cargo build --no-default-features --release --target ${{ matrix.target }} rustfmt: name: Check formatting runs-on: ubuntu-latest steps: - name: checkout uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable default: true components: rustfmt - run: cargo fmt --all -- --check links: name: Check documentation links runs-on: ubuntu-latest steps: - name: checkout uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable default: true - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Check links run: cargo rustdoc --all-features -- -D warnings clippy: name: Clippy lints runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal default: true components: clippy - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Run clippy linter run: cargo clippy --all --tests -- -D clippy::all -D warnings miri: name: Miri checks runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: nightly profile: minimal default: true components: "miri" - name: Restore cache uses: Swatinem/rust-cache@v2 - name: Run miri env: PROPTEST_CASES: "10" MIRIFLAGS: "-Zmiri-disable-isolation" run: cargo miri test --all-features bytes-utils-0.1.4/.gitignore000064400000000000000000000000301046102023000140730ustar 00000000000000/target Cargo.lock tags bytes-utils-0.1.4/CHANGELOG.md000064400000000000000000000007071046102023000137270ustar 00000000000000# 0.1.4 * Deps bumps. * Fix no-std build on either by bumping edition. # 0.1.3 * Serde support for the stringly wrappers. * `string::from_static`. # 0.1.2 * No-std support. # 0.1.1 * Implement the `chunk_vectored` in a way it returns more than one chunk if possible. * SegmentedSlice added, for avoiding allocation of the VecDeque in SegmentedBuf. # 0.1.0 * Initial code for the SegmentedBuf. * Initial code for the Str/StrMut string wrappers. bytes-utils-0.1.4/Cargo.toml0000644000000025170000000000100113250ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "bytes-utils" version = "0.1.4" authors = ["Michal 'vorner' Vaner "] description = "Additional utilities for working with the bytes crate" documentation = "https://docs.rs/bytes-utils" readme = "README.md" keywords = [ "bytes", "zero-copy", "buffers", "no_std", ] categories = ["data-structures"] license = "Apache-2.0/MIT" repository = "https://github.com/vorner/bytes-utils" [dependencies.bytes] version = "1" default-features = false [dependencies.either] version = "1" default-features = false [dependencies.serde] version = "1" optional = true default-features = false [dev-dependencies.itertools] version = "^0.12" [dev-dependencies.proptest] version = "^1.0" [dev-dependencies.serde_test] version = "1.0.144" [features] default = ["std"] serde = [ "dep:serde", "bytes/serde", ] std = ["bytes/default"] bytes-utils-0.1.4/Cargo.toml.orig000064400000000000000000000015661046102023000150110ustar 00000000000000[package] name = "bytes-utils" version = "0.1.4" authors = ["Michal 'vorner' Vaner "] edition = "2021" description = "Additional utilities for working with the bytes crate" documentation = "https://docs.rs/bytes-utils" repository = "https://github.com/vorner/bytes-utils" readme = "README.md" keywords = ["bytes", "zero-copy", "buffers", "no_std"] categories = ["data-structures"] license = "Apache-2.0/MIT" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] bytes = { version = "1", default-features = false } either = { version = "1", default-features = false } serde = { version = "1", optional = true, default-features = false } [features] default = ["std"] std = ["bytes/default"] serde = ["dep:serde", "bytes/serde"] [dev-dependencies] itertools = "^0.12" proptest = "^1.0" serde_test = "1.0.144" bytes-utils-0.1.4/LICENSE-APACHE000064400000000000000000000251371046102023000140460ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. bytes-utils-0.1.4/LICENSE-MIT000064400000000000000000000020471046102023000135510ustar 00000000000000Copyright (c) 2017 arc-swap developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bytes-utils-0.1.4/README.md000064400000000000000000000024531046102023000133750ustar 00000000000000# Additional utils for the [bytes] crate [![Actions Status](https://github.com/vorner/bytes-utils/workflows/test/badge.svg)](https://github.com/vorner/bytes-utils/actions) [![codecov](https://codecov.io/gh/vorner/bytes-utils/branch/main/graph/badge.svg?token=GKITN8ZOE1)](https://codecov.io/gh/vorner/bytes-utils) [![docs](https://docs.rs/bytes-utils/badge.svg)](https://docs.rs/bytes-utils) Few utilities to make working with the types and traits in the [bytes] crate even more convenient and powerful. Currently contains: * `SegmentedBuf` that can concatenate multiple `Buf`s into a bigger one without copying. * `Str` and `StrMut`, string wrappers around `Bytes` and `BytesMut`. ## Features `no_std` builds are supported by disabling the `std` feature, which is enabled by default. # License Licensed under either of * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. [bytes]: https://docs.rs/bytes bytes-utils-0.1.4/src/lib.rs000064400000000000000000000022541046102023000140200ustar 00000000000000#![doc(test(attr(deny(warnings))))] #![warn(missing_docs)] #![cfg_attr(all(not(test), not(feature = "std")), no_std)] //! # Extra utilities for the [bytes] crate. //! //! The [bytes] crate defines few traits and types to help with high-performance manipulation of //! byte arrays. Nevertheless, it is more of an interface-level of library (many other crates //! expose its types and traits in their own public interfaces) and therefore tries to be on the //! lean side. //! //! One often wishes for some more auxiliary functionality „around“ these types and that's what //! this crate aims to provide. //! //! ## The content //! //! * [SegmentedBuf] and [SegmentedSlice] for concatenating multiple buffers into a large one //! without copying the bytes. //! * [Str] and [StrMut] are wrappers around [Bytes][bytes::Bytes] and [BytesMut] //! respectively, providing a [String]-like interface. They allow splitting into owned //! sub-slices, similar to how the [Bytes] and [BytesMut] work. //! //! [Bytes]: bytes::Bytes //! [BytesMut]: bytes::BytesMut extern crate alloc; mod segmented; pub mod string; pub use segmented::{SegmentedBuf, SegmentedSlice}; pub use string::{Str, StrMut}; bytes-utils-0.1.4/src/segmented.rs000064400000000000000000000470071046102023000152320ustar 00000000000000#![forbid(unsafe_code)] use alloc::collections::VecDeque; use alloc::vec::Vec; use bytes::{Buf, BufMut, Bytes, BytesMut}; use core::cmp; use core::iter::FromIterator; #[cfg(feature = "std")] use std::io::IoSlice; #[cfg(feature = "std")] fn chunks_vectored<'s, B, I>(bufs: I, dst: &mut [IoSlice<'s>]) -> usize where I: Iterator, B: Buf + 's, { let mut filled = 0; for buf in bufs { if filled == dst.len() { break; } filled += buf.chunks_vectored(&mut dst[filled..]); } filled } /// A consumable view of a sequence of buffers. /// /// This allows viewing a sequence of buffers as one buffer, without copying the bytes over. Unlike /// the [SegmentedBuf], this doesn't allow for appending more buffers and doesn't drop the buffers /// as they are exhausted (though they all get exhausted, no leftovers are kept in them as the /// caller advances through it). On the other hand, it doesn't require an internal allocation in /// the form of VecDeque and can be based on any kind of slice. /// /// # Example /// /// ```rust /// # use bytes_utils::SegmentedSlice; /// # use bytes::Buf; /// # use std::io::Read; /// let mut buffers = [b"Hello" as &[_], b"", b" ", b"", b"World"]; /// let buf = SegmentedSlice::new(&mut buffers); /// /// assert_eq!(11, buf.remaining()); /// assert_eq!(b"Hello", buf.chunk()); /// /// let mut out = String::new(); /// buf.reader().read_to_string(&mut out).expect("Doesn't cause IO errors"); /// assert_eq!("Hello World", out); /// ``` /// /// # Optimizations /// /// The [copy_to_bytes][SegmentedSlice::copy_to_bytes] method tries to avoid copies by delegating /// into the underlying buffer if possible (if the whole request can be fulfilled using only a /// single buffer). If that one is optimized (for example, the [Bytes] returns a shared instance /// instead of making a copy), the copying is avoided. If the request is across a buffer boundary, /// a copy is made. /// /// The [chunks_vectored][SegmentedSlice::chunks_vectored] will properly output as many slices as /// possible, not just 1 as the default implementation does. #[derive(Debug, Default)] pub struct SegmentedSlice<'a, B> { remaining: usize, idx: usize, bufs: &'a mut [B], } impl<'a, B: Buf> SegmentedSlice<'a, B> { /// Creates a new buffer out of a slice of buffers. /// /// The buffers will then be taken in order to form one bigger buffer. /// /// Each of the buffers in turn will be exhausted using its [advance][Buf::advance] before /// proceeding to the next one. Note that the buffers are not dropped (unlike with /// [SegmentedBuf]). pub fn new(bufs: &'a mut [B]) -> Self { let remaining = bufs.iter().map(Buf::remaining).sum(); let mut me = Self { remaining, idx: 0, bufs, }; me.clean_empty(); me } fn clean_empty(&mut self) { while self.idx < self.bufs.len() && !self.bufs[self.idx].has_remaining() { self.idx += 1; } } } impl<'a, B: Buf> Buf for SegmentedSlice<'a, B> { fn remaining(&self) -> usize { self.remaining } fn chunk(&self) -> &[u8] { self.bufs.get(self.idx).map(Buf::chunk).unwrap_or_default() } fn advance(&mut self, mut cnt: usize) { self.remaining -= cnt; while cnt > 0 { let first = &mut self.bufs[self.idx]; let rem = first.remaining(); let segment = cmp::min(rem, cnt); first.advance(segment); cnt -= segment; self.clean_empty(); } } fn copy_to_bytes(&mut self, len: usize) -> Bytes { assert!(len <= self.remaining(), "`len` greater than remaining"); match self.bufs.get_mut(self.idx) { // Special optimized case. The whole request comes from the front buffer. That one may // be optimized to do something more efficient, like slice the Bytes (if B == Bytes) // instead of copying, so we take the opportunity if it offers itself. Some(front) if front.remaining() >= len => { self.remaining -= len; let res = front.copy_to_bytes(len); self.clean_empty(); res } // The general case, borrowed from the default implementation (there's no way to // delegate to it, is there?) _ => { let mut res = BytesMut::with_capacity(len); res.put(self.take(len)); res.freeze() } } } #[cfg(feature = "std")] fn chunks_vectored<'s>(&'s self, dst: &mut [IoSlice<'s>]) -> usize { let bufs = self.bufs.get(self.idx..).unwrap_or_default(); chunks_vectored(bufs.iter(), dst) } } /// A concatenation of multiple buffers into a large one, without copying the bytes over. /// /// Note that this doesn't provide a continuous slice view into them, it is split into the segments /// of the original smaller buffers. /// /// This variants drop the inner buffers as they are exhausted and new ones can be added. But it /// internally keeps a [VecDeque], therefore needs a heap allocation. If you don't need the /// extending behaviour, but want to avoid the allocation, the [SegmentedSlice] can be used instead. /// /// # Why /// /// This can be used, for example, if data of unknown length is coming over the network (for /// example, the bodies in [hyper] act a bit like this, it returns a stream of [Bytes] buffers). /// One might want to accumulate the whole body before acting on it, possibly by parsing it through /// [serde] or [prost]. Options would include: /// /// * Have a `Vec` and extend it with each chunk. This needlessly copy the bytes every time and /// reallocates if the vector grows too large. /// * Repeatedly use [chain][Buf::chain], but this changes the type of the whole buffer, therefore /// needs to be boxed. /// * Use [hyper::body::aggregate] to create a [Buf] implementation that concatenates all of them /// together, but lacks any kind of flexibility (like protecting against loading too much data /// into memory). /// /// This type allows for concatenating multiple buffers, either all at once, or by incrementally /// pushing more buffers to the end. /// /// # Heterogeneous buffers /// /// This expects all the buffers are of the same type. If different-typed buffers are needed, one /// needs to use dynamic dispatch, either something like `SegmentedBuf>` or /// `SegmentedBuf<&mut Buf>`. /// /// # Example /// /// ```rust /// # use std::io::Read; /// # use bytes::{Bytes, Buf}; /// # use bytes_utils::SegmentedBuf; /// let mut buf = SegmentedBuf::new(); /// buf.push(Bytes::from("Hello")); /// buf.push(Bytes::from(" ")); /// buf.push(Bytes::from("World")); /// /// assert_eq!(3, buf.segments()); /// assert_eq!(11, buf.remaining()); /// assert_eq!(b"Hello", buf.chunk()); /// /// let mut out = String::new(); /// buf.reader().read_to_string(&mut out).expect("Doesn't cause IO errors"); /// assert_eq!("Hello World", out); /// ``` /// /// # FIFO behaviour /// /// The buffers are dropped once their data are completely consumed. Additionally, it is possible /// to add more buffers to the end, even while some of the previous buffers were partially or fully /// consumed. That makes it usable as kind of a queue (that operates on the buffers, not individual /// bytes). /// /// ```rust /// # use bytes::{Bytes, Buf}; /// # use bytes_utils::SegmentedBuf; /// let mut buf = SegmentedBuf::new(); /// buf.push(Bytes::from("Hello")); /// assert_eq!(1, buf.segments()); /// /// let mut out = [0; 3]; /// buf.copy_to_slice(&mut out); /// assert_eq!(&out, b"Hel"); /// assert_eq!(2, buf.remaining()); /// assert_eq!(1, buf.segments()); /// /// buf.push(Bytes::from("World")); /// assert_eq!(7, buf.remaining()); /// assert_eq!(2, buf.segments()); /// /// buf.copy_to_slice(&mut out); /// assert_eq!(&out, b"loW"); /// assert_eq!(4, buf.remaining()); /// assert_eq!(1, buf.segments()); /// ``` /// /// # Optimizations /// /// The [copy_to_bytes][SegmentedBuf::copy_to_bytes] method tries to avoid copies by delegating /// into the underlying buffer if possible (if the whole request can be fulfilled using only a /// single buffer). If that one is optimized (for example, the [Bytes] returns a shared instance /// instead of making a copy), the copying is avoided. If the request is across a buffer boundary, /// a copy is made. /// /// The [chunks_vectored][SegmentedBuf::chunks_vectored] will properly output as many slices as /// possible, not just 1 as the default implementation does. /// /// [hyper]: https://docs.rs/hyper /// [serde]: https://docs.rs/serde /// [prost]: https://docs.rs/prost /// [hyper::body::aggregate]: https://docs.rs/hyper/0.14.2/hyper/body/fn.aggregate.html #[derive(Clone, Debug)] pub struct SegmentedBuf { bufs: VecDeque, // Pre-computed sum of the total remaning remaining: usize, } impl SegmentedBuf { /// Creates a new empty instance. /// /// The instance can be [pushed][SegmentedBuf::push] or [extended][Extend] later. /// /// Alternatively, one may create it directly from an iterator, a [Vec] or a [VecDeque] of /// buffers. pub fn new() -> Self { Self::default() } /// Returns the yet unconsumed sequence of buffers. pub fn into_inner(self) -> VecDeque { self.into() } /// Returns the number of segments (buffers) this contains. pub fn segments(&self) -> usize { self.bufs.len() } } impl SegmentedBuf { /// Extends the buffer by another segment. /// /// The newly added segment is added to the end of the buffer (the buffer works as a FIFO). pub fn push(&mut self, buf: B) { self.remaining += buf.remaining(); self.bufs.push_back(buf); self.clean_empty(); } fn update_remaining(&mut self) { self.remaining = self.bufs.iter().map(Buf::remaining).sum(); } fn clean_empty(&mut self) { loop { match self.bufs.front() { Some(b) if !b.has_remaining() => { self.bufs.pop_front(); } _ => break, } } } } impl Default for SegmentedBuf { fn default() -> Self { Self { bufs: VecDeque::new(), remaining: 0, } } } impl From> for SegmentedBuf { fn from(bufs: Vec) -> Self { Self::from(VecDeque::from(bufs)) } } impl From> for SegmentedBuf { fn from(bufs: VecDeque) -> Self { let mut me = Self { bufs, remaining: 0 }; me.clean_empty(); me.update_remaining(); me } } impl From> for VecDeque { fn from(me: SegmentedBuf) -> Self { me.bufs } } impl Extend for SegmentedBuf { fn extend>(&mut self, iter: T) { self.bufs.extend(iter); self.clean_empty(); self.update_remaining(); } } impl FromIterator for SegmentedBuf { fn from_iter>(iter: T) -> Self { let mut me = Self { bufs: VecDeque::from_iter(iter), remaining: 0, }; me.clean_empty(); me.update_remaining(); me } } impl Buf for SegmentedBuf { fn remaining(&self) -> usize { self.remaining } fn chunk(&self) -> &[u8] { self.bufs.front().map(Buf::chunk).unwrap_or_default() } fn advance(&mut self, mut cnt: usize) { assert!(cnt <= self.remaining, "Advance past the end of buffer"); self.remaining -= cnt; while cnt > 0 { let front = self .bufs .front_mut() .expect("Missing buffers to provide remaining"); let front_remaining = front.remaining(); if front_remaining >= cnt { front.advance(cnt); break; } else { // We advance past the whole front buffer cnt -= front_remaining; self.bufs.pop_front(); } } self.clean_empty(); } fn copy_to_bytes(&mut self, len: usize) -> Bytes { assert!(len <= self.remaining(), "`len` greater than remaining"); match self.bufs.front_mut() { // Special optimized case. The whole request comes from the front buffer. That one may // be optimized to do something more efficient, like slice the Bytes (if B == Bytes) // instead of copying, so we take the opportunity if it offers itself. Some(front) if front.remaining() >= len => { self.remaining -= len; let res = front.copy_to_bytes(len); self.clean_empty(); res } // The general case, borrowed from the default implementation (there's no way to // delegate to it, is there?) _ => { let mut res = BytesMut::with_capacity(len); res.put(self.take(len)); res.freeze() } } } #[cfg(feature = "std")] fn chunks_vectored<'a>(&'a self, dst: &mut [IoSlice<'a>]) -> usize { chunks_vectored(self.bufs.iter(), dst) } } #[cfg(test)] mod tests { use std::io::Read; use super::*; #[test] fn empty() { let mut b = SegmentedBuf::::new(); assert!(!b.has_remaining()); assert_eq!(0, b.remaining()); assert!(b.chunk().is_empty()); assert_eq!(0, b.segments()); b.copy_to_slice(&mut []); b.advance(0); assert_eq!(0, b.reader().read(&mut [0; 10]).unwrap()); } #[test] fn empty_slices() { let mut b = SegmentedSlice::<&[u8]>::default(); assert!(!b.has_remaining()); assert_eq!(0, b.remaining()); assert!(b.chunk().is_empty()); b.copy_to_slice(&mut []); b.advance(0); assert_eq!(0, b.reader().read(&mut [0; 10]).unwrap()); } fn segmented() -> SegmentedBuf { vec![ Bytes::from("Hello"), Bytes::from(" "), Bytes::new(), Bytes::from("World"), ] .into() } #[test] fn segments() { let mut b = segmented(); assert_eq!(11, b.remaining()); assert_eq!(b"Hello", b.chunk()); assert_eq!(4, b.segments()); b.advance(3); assert_eq!(8, b.remaining()); assert_eq!(b"lo", b.chunk()); assert_eq!(4, b.segments()); } #[test] fn to_bytes_all() { let mut b = segmented(); let bytes = b.copy_to_bytes(11); assert_eq!("Hello World", &bytes); } #[test] fn advance_within() { let mut b = segmented(); b.advance(2); assert_eq!(4, b.segments()); assert_eq!(9, b.remaining()); assert_eq!(b"llo", b.chunk()); } #[test] fn advance_border() { let mut b = segmented(); b.advance(5); assert_eq!(3, b.segments()); assert_eq!(6, b.remaining()); assert_eq!(b" ", b.chunk()); } #[test] fn advance_across() { let mut b = segmented(); b.advance(7); assert_eq!(1, b.segments()); assert_eq!(4, b.remaining()); assert_eq!(b"orld", b.chunk()); } #[test] fn empty_at_border() { let mut b = segmented(); b.advance(6); assert_eq!(1, b.segments()); assert_eq!(5, b.remaining()); assert_eq!(b"World", b.chunk()); } #[test] fn empty_bufs() { fn is_empty(b: &SegmentedBuf) { assert_eq!(0, b.segments()); assert_eq!(0, b.remaining()); assert_eq!(b"", b.chunk()); } is_empty(&vec![].into()); is_empty(&vec![Bytes::new(), Bytes::new()].into()); is_empty(&vec![Bytes::new(), Bytes::new()].into_iter().collect()); let mut b = SegmentedBuf::new(); is_empty(&b); b.push(Bytes::new()); is_empty(&b); b.extend(vec![Bytes::new(), Bytes::new()]); is_empty(&b); } #[test] fn sliced_hello() { let mut buffers = [b"Hello" as &[_], b"", b" ", b"", b"World"]; let buf = SegmentedSlice::new(&mut buffers); assert_eq!(11, buf.remaining()); assert_eq!(b"Hello", buf.chunk()); let mut out = String::new(); buf.reader() .read_to_string(&mut out) .expect("Doesn't cause IO errors"); assert_eq!("Hello World", out); } #[test] fn chunk_vectored() { let mut b = segmented(); assert_eq!(b.chunks_vectored(&mut []), 0); let mut slices = [IoSlice::new(&[]); 5]; assert_eq!(b.segments(), 4); assert_eq!(b.chunks_vectored(&mut slices), 3); assert_eq!(&*slices[0], b"Hello"); assert_eq!(&*slices[1], b" "); assert_eq!(&*slices[2], b"World"); b.advance(2); let mut slices = [IoSlice::new(&[]); 1]; assert_eq!(b.chunks_vectored(&mut slices), 1); assert_eq!(&*slices[0], b"llo"); } #[test] fn chunk_vectored_nested() { let mut bufs = [segmented(), segmented()]; let mut bufs = SegmentedSlice::new(&mut bufs); let mut slices = [IoSlice::new(&[]); 10]; assert_eq!(bufs.chunks_vectored(&mut slices), 6); assert_eq!(&*slices[0], b"Hello"); assert_eq!(&*slices[1], b" "); assert_eq!(&*slices[2], b"World"); assert_eq!(&*slices[3], b"Hello"); assert_eq!(&*slices[4], b" "); assert_eq!(&*slices[5], b"World"); bufs.advance(2); let mut slices = [IoSlice::new(&[]); 1]; assert_eq!(bufs.chunks_vectored(&mut slices), 1); assert_eq!(&*slices[0], b"llo"); } #[cfg(not(miri))] mod proptests { use super::*; use proptest::prelude::*; use std::ops::Deref; proptest! { #[test] fn random(bufs: Vec>, splits in proptest::collection::vec(0..10usize, 1..10)) { let concat: Vec = bufs.iter().flat_map(|b| b.iter()).copied().collect(); let mut segmented = bufs.iter() .map(|b| &b[..]) .collect::>(); assert_eq!(concat.len(), segmented.remaining()); assert!(segmented.segments() <= bufs.len()); assert!(concat.starts_with(segmented.chunk())); let mut bytes = segmented.clone().copy_to_bytes(segmented.remaining()); assert_eq!(&concat[..], &bytes[..]); let mut sliced = bufs.iter().map(Deref::deref).collect::>(); let mut sliced = SegmentedSlice::new(&mut sliced); let mut fifo = SegmentedBuf::new(); let mut buf_pos = bufs.iter(); for split in splits { if !bytes.has_remaining() { break; } let split = cmp::min(bytes.remaining(), split); while fifo.remaining() < split { fifo.push(&buf_pos.next().unwrap()[..]); } let c1 = bytes.copy_to_bytes(split); let c2 = segmented.copy_to_bytes(split); let c3 = sliced.copy_to_bytes(split); assert_eq!(c1, c2); assert_eq!(c1, c3); assert_eq!(bytes.remaining(), segmented.remaining()); assert_eq!(bytes.remaining(), sliced.remaining()); } } } } } bytes-utils-0.1.4/src/string/mod.rs000064400000000000000000000774301046102023000153470ustar 00000000000000//! [String]-like wrappers around [Bytes] and [BytesMut]. //! //! The [Bytes] and [BytesMut] provide a buffer of bytes with ability to create owned slices into //! the same shared memory allocation. This allows cheap manipulation of data. //! //! Strings are mostly just byte buffers with extra APIs to manipulate them. The standard [String] //! type is built as a wrapper around [Vec]. We build similar wrappers around the [Bytes] and //! [BytesMut], gaining the ability to create owned shared slices for textual data as well. //! //! Users are expected to use the [Str] and [StrMut] types. Note that these are type aliases around //! the [StrInner] type. The latter is means to implement both in one go and contains all the //! documentation, but is not meant to be used directly. //! //! # Splitting //! //! The [prim@str] type from standard library (which the types here dereference to) allows for //! slicing and splitting in many convenient ways. They, however, return borrowed string slices //! (`&str`), which might pose some problems. //! //! The [Str], and to certain extent, the [StrMut] type additionally allow cheap splitting and //! slicing that produce owned [Str] and [StrMut] respectively. They are slightly more expensive //! than the slicing than the ones returning `&str`, but only by incrementing internal reference //! counts. They do not clone the actual string data, like `.to_owned()` on the standard library //! methods would. These methods are available in addition to the standard ones. //! //! There are three ways how this can be done: //! //! * By dedicated methods, like [lines_bytes][StrInner::lines_bytes] (in general, the name of the //! standard method suffixed with `_bytes`). //! * By using the [BytesIter] iterator manually. //! * By using the standard-library methods, producing `&str` and translating it back to [Str] with //! [slice][StrInner::slice] or [StrInner::slice_ref]. //! //! # Examples //! //! ```rust //! # use bytes::Bytes; //! # use bytes_utils::{Str, StrMut}; //! let mut builder = StrMut::new(); //! builder += "Hello"; //! builder.push(' '); //! builder.push_str("World"); //! assert_eq!("Hello World", builder); //! //! let s1 = builder.split_built().freeze(); //! // This is a cheap copy, in the form of incrementing a reference count. //! let s2 = s1.clone(); //! assert_eq!("Hello World", s1); //! assert_eq!("Hello World", s2); //! // Slicing is cheap as well, even though the returned things are Str and therefore owned too. //! assert_eq!("ello", s1.slice(1..5)); //! // We have taken the data out of the builder, but the rest of its capacity can be used for //! // further things. //! assert_eq!("", builder); //! //! // Creating from strings and similar works //! let a = Str::from("Hello"); //! assert_eq!("Hello", a); //! //! let e = Str::new(); //! assert_eq!("", e); //! //! // And from static str in O(1) //! let b = Str::from_static("World"); //! assert_eq!("World", b); //! //! // And from Bytes too. //! let b = Str::try_from(Bytes::from_static(b"World")).expect("Must be utf8"); //! assert_eq!("World", b); //! // Invalid utf8 is refused. //! Str::try_from(Bytes::from_static(&[0, 0, 255])).unwrap_err(); //! ``` use alloc::borrow::Cow; use alloc::boxed::Box; use alloc::string::String; use core::borrow::{Borrow, BorrowMut}; use core::cmp::Ordering; use core::convert::Infallible; use core::fmt::{Debug, Display, Formatter, Result as FmtResult, Write}; use core::hash::{Hash, Hasher}; use core::iter::{self, FromIterator}; use core::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut}; use core::str::{self, FromStr}; #[cfg(feature = "std")] use std::error::Error; use bytes::{Bytes, BytesMut}; use either::Either; #[cfg(feature = "serde")] mod serde_impl; /// Error when creating [Str] or [StrMut] from invalid UTF8 data. #[derive(Copy, Clone, Debug)] pub struct Utf8Error { e: core::str::Utf8Error, inner: S, } impl Utf8Error { /// Returns the byte buffer back to the caller. pub fn into_inner(self) -> S { self.inner } /// The inner description of why the data is invalid UTF8. pub fn utf8_error(&self) -> str::Utf8Error { self.e } } impl Display for Utf8Error { fn fmt(&self, fmt: &mut Formatter) -> FmtResult { Display::fmt(&self.e, fmt) } } #[cfg(feature = "std")] impl Error for Utf8Error {} /// Direction of iteration. /// /// See [BytesIter]. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum Direction { /// Move forward (in the normal direction) in the string. Forward, /// Move backwards in the string. Backward, } /// Manual splitting iterator. /// /// The methods on [Str] and [StrMut] that iterate use this internally. But it can also be used /// manually to generate other iterators that split the original into parts. #[derive(Clone, Debug)] pub struct BytesIter { bytes: Option, extract: F, direction: Direction, } impl BytesIter where S: Storage, F: FnMut(&str) -> Option<(usize, usize)>, { /// A constructor of the iterator. /// /// The `direction` specifies in what order chunks should be yielded. /// /// The `ext` closure is always called with the rest of not yet split string. It shall return /// the byte indices of the chunk and separator border. In case of forward iteration, it is the /// end of them and the separator needs to end further to the string (or at the same position). /// In the backwards direction, it is in reverse ‒ they specify their starts and the separator /// is before the chunk. /// /// # Panics /// /// If the indices don't point at a character boundary, the iteration will panic. It'll also /// panic if the returned indices are reversed or if they are out of bounds. pub fn new(s: StrInner, direction: Direction, ext: F) -> Self { Self { bytes: Some(s.0), extract: ext, direction, } } } impl Iterator for BytesIter where S: Storage, F: FnMut(&str) -> Option<(usize, usize)>, { type Item = StrInner; fn next(&mut self) -> Option> { let storage = self.bytes.take()?; // Safety: we keep sure it is valid UTF8 on the API boundary. let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) }; fn split(storage: S, left: usize, right: usize) -> (S, S) { let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) }; // Sanity-check we are not slicing in the middle of utf8 code point. This would // panic if we do. It would also panic if we are out of range, which is also good. assert!(whole_str.is_char_boundary(left)); assert!(whole_str.is_char_boundary(right)); // Now that we are sure this is legal, we are going to slice the byte data for real. let (with_sep, end) = storage.split_at(right); let (start, _sep) = with_sep.split_at(left); (start, end) } match ((self.extract)(whole_str), self.direction) { (Some((chunk_end, sep_end)), Direction::Forward) => { assert!(chunk_end <= sep_end); let (start, end) = split(storage, chunk_end, sep_end); self.bytes = Some(end); Some(StrInner(start)) } (Some((chunk_start, sep_start)), Direction::Backward) => { assert!(sep_start <= chunk_start); let (start, end) = split(storage, sep_start, chunk_start); self.bytes = Some(start); Some(StrInner(end)) } (None, _) => { // No separator found -> return the whole rest (and keep None in ourselves) Some(StrInner(storage)) } } } } /// Find a separator position, for use with the [BytesIter]. fn sep_find bool>(s: &str, is_sep: F) -> Option<(usize, usize)> { let sep_start = s.find(&is_sep)?; let sep_end = s[sep_start..] .find(|c| !is_sep(c)) .map(|e| e + sep_start) .unwrap_or_else(|| s.len()); Some((sep_start, sep_end)) } /// Separator for an empty pattern. fn empty_sep(s: &str, limit: usize) -> Option<(usize, usize)> { let char_end = s .char_indices() .skip(1) .map(|(i, _)| i) .chain(iter::once(s.len()).take((!s.is_empty()) as usize)) .take(limit) .next()?; Some((char_end, char_end)) } fn rempty_sep(s: &str, limit: usize) -> Option<(usize, usize)> { let char_start = s.char_indices().rev().map(|(i, _)| i).take(limit).next()?; Some((char_start, char_start)) } /// The backing storage for [StrInner] /// /// This is currently a technical detail of the crate, users are not expected to implement this /// trait. Use [Str] or [StrMut] type aliases. /// /// # Safety /// /// The storage must act "sane". But what exactly it means is not yet analyzed and may change in /// future versions. Don't implement the trait (at least not yet). pub unsafe trait Storage: AsRef<[u8]> + Default + Sized { /// A type that can be used to build the storage incrementally. /// /// For mutable storages, it may be itself. For immutable one, there needs to be a mutable /// counterpart that can be converted to immutable later on. type Creator: Default + StorageMut; /// Converts the creator (mutable storage) to self. /// /// In case of mutable storages, this should be identity. fn from_creator(creator: Self::Creator) -> Self; /// Splits the storage at the given byte index and creates two non-overlapping instances. fn split_at(self, at: usize) -> (Self, Self); } unsafe impl Storage for Bytes { type Creator = BytesMut; fn from_creator(creator: Self::Creator) -> Self { creator.freeze() } fn split_at(mut self, at: usize) -> (Self, Self) { let right = self.split_off(at); (self, right) } } unsafe impl Storage for BytesMut { type Creator = BytesMut; fn from_creator(creator: Self::Creator) -> Self { creator } fn split_at(mut self, at: usize) -> (Self, Self) { let right = self.split_off(at); (self, right) } } /// Trait for extra functionality of a mutable storage. /// /// This is in addition to what an immutable storage must satisfy. /// /// # Safety /// /// The storage must act "sane". But what exactly it means is not yet analyzed and may change in /// future versions. Don't implement the trait (at least not yet). pub unsafe trait StorageMut: Storage + AsMut<[u8]> { /// An immutable counter-part storage. type Immutable: Storage; /// Adds some more bytes to the end of the storage. fn push_slice(&mut self, s: &[u8]); } unsafe impl StorageMut for BytesMut { type Immutable = Bytes; fn push_slice(&mut self, s: &[u8]) { self.extend_from_slice(s) } } /// Implementation of the [Str] and [StrMut] types. /// /// For technical reasons, both are implemented in one go as this type. For the same reason, most /// of the documentation can be found here. Users are expected to use the [Str] and [StrMut] /// instead. #[derive(Clone, Default)] pub struct StrInner(S); impl StrInner { /// Creates an empty instance. pub fn new() -> Self { Self::default() } /// Extracts the inner byte storage. pub fn into_inner(self) -> S { self.0 } /// Access to the inner storage. pub fn inner(&self) -> &S { &self.0 } /// Creates an instance from an existing byte storage. /// /// It may fail if the content is not valid UTF8. /// /// A [try_from][TryFrom::try_from] may be used instead. pub fn from_inner(s: S) -> Result> { match str::from_utf8(s.as_ref()) { Ok(_) => Ok(Self(s)), Err(e) => Err(Utf8Error { e, inner: s }), } } /// Same as [from_inner][StrInner::from_inner], but without the checks. /// /// # Safety /// /// The caller must ensure content is valid UTF8. pub const unsafe fn from_inner_unchecked(s: S) -> Self { Self(s) } /// Splits the string into two at the given index. /// /// # Panics /// /// If the index is not at char boundary. pub fn split_at_bytes(self, at: usize) -> (Self, Self) { assert!(self.deref().is_char_boundary(at)); let (l, r) = self.0.split_at(at); (Self(l), Self(r)) } /// Splits into whitespace separated "words". /// /// This acts like [split_whitespace][str::split_whitespace], but yields owned instances. It /// doesn't clone the content, it just increments some reference counts. pub fn split_whitespace_bytes(self) -> impl Iterator { BytesIter::new(self, Direction::Forward, |s| { sep_find(s, char::is_whitespace) }) .filter(|s| !s.is_empty()) } /// Splits into whitespace separated "words". /// /// This acts like [split_ascii_whitespace][str::split_ascii_whitespace], but yields owned /// instances. This doesn't clone the content, it just increments some reference counts. pub fn split_ascii_whitespace_bytes(self) -> impl Iterator { BytesIter::new(self, Direction::Forward, |s| { sep_find(s, |c| c.is_ascii() && (c as u8).is_ascii_whitespace()) }) .filter(|s| !s.is_empty()) } /// Splits into lines. /// /// This acts like [lines][str::lines], but yields owned instances. The content is not cloned, /// this just increments some reference counts. pub fn lines_bytes(self) -> impl Iterator { if self.is_empty() { Either::Left(iter::empty()) } else { let iter = BytesIter::new(self, Direction::Forward, |s| sep_find(s, |c| c == '\n')) .map(|s| match s.chars().next() { Some('\r') => s.split_at_bytes(1).1, _ => s, }); Either::Right(iter) } } /// Splits with the provided separator. /// /// This acts somewhat like [split][str::split], but yields owned instances. Also, it accepts /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this /// just increments some reference counts. pub fn split_bytes<'s>(self, sep: &'s str) -> impl Iterator + 's where S: 's, { if sep.is_empty() { let bulk = BytesIter::new(self, Direction::Forward, |s| empty_sep(s, usize::MAX)); Either::Left(iter::once(Self::default()).chain(bulk)) } else { let sep_find = move |s: &str| s.find(sep).map(|pos| (pos, pos + sep.len())); Either::Right(BytesIter::new(self, Direction::Forward, sep_find)) } } /// Splits max. `n` times according to the given pattern. /// /// This acts somewhat like [splitn][str::splitn], but yields owned instances. Also, it accepts /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this /// just increments some reference counts. pub fn splitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator + 's where S: 's, { // TODO: This seems to work, but is ugly. Any idea how to simplify? if sep.is_empty() { if n <= 1 { Either::Left(Either::Left(iter::once(self).take(n))) } else { n -= 1; let bulk = BytesIter::new(self, Direction::Forward, move |s| { n -= 1; empty_sep(s, n) }); Either::Left(Either::Right(iter::once(Self::default()).chain(bulk))) } } else { let sep_find = move |s: &str| { n -= 1; if n == 0 { None } else { s.find(sep).map(|pos| (pos, pos + sep.len())) } }; Either::Right(BytesIter::new(self, Direction::Forward, sep_find).take(n)) } } /// A reverse version of [split_bytes][Self::split_bytes]. pub fn rsplit_bytes<'s>(self, sep: &'s str) -> impl Iterator + 's where S: 's, { if sep.is_empty() { let bulk = BytesIter::new(self, Direction::Backward, |s| rempty_sep(s, usize::MAX)); Either::Left(iter::once(Self::default()).chain(bulk)) } else { let sep_find = move |s: &str| s.rfind(sep).map(|pos| (pos + sep.len(), pos)); Either::Right(BytesIter::new(self, Direction::Backward, sep_find)) } } /// A reverse version of [splitn_bytes][Self::splitn_bytes]. pub fn rsplitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator + 's where S: 's, { // TODO: This seems to work, but is ugly. Any idea how to simplify? if sep.is_empty() { if n <= 1 { Either::Left(Either::Left(iter::once(self).take(n))) } else { n -= 1; let bulk = BytesIter::new(self, Direction::Backward, move |s| { n -= 1; rempty_sep(s, n) }); Either::Left(Either::Right(iter::once(Self::default()).chain(bulk))) } } else { let sep_find = move |s: &str| { n -= 1; if n == 0 { None } else { s.rfind(sep).map(|pos| (pos + sep.len(), pos)) } }; Either::Right(BytesIter::new(self, Direction::Backward, sep_find).take(n)) } } } impl StrInner { /// Appends a string. pub fn push_str(&mut self, s: &str) { self.0.push_slice(s.as_bytes()); } /// Appends one character. pub fn push(&mut self, c: char) { self.push_str(c.encode_utf8(&mut [0; 4])); } /// Provides mutable access to the inner buffer. /// /// # Safety /// /// The caller must ensure that the content stays valid UTF8. pub unsafe fn inner_mut(&mut self) -> &mut S { &mut self.0 } /// Turns the mutable variant into an immutable one. /// /// The advantage is that it can then be shared (also by small parts). pub fn freeze(self) -> StrInner { StrInner(S::Immutable::from_creator(self.0)) } } impl Deref for StrInner { type Target = str; fn deref(&self) -> &str { unsafe { str::from_utf8_unchecked(self.0.as_ref()) } } } impl DerefMut for StrInner { fn deref_mut(&mut self) -> &mut str { unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) } } } impl AsRef for StrInner where S: Storage, str: AsRef, { fn as_ref(&self) -> &T { self.deref().as_ref() } } impl AsMut for StrInner { fn as_mut(&mut self) -> &mut str { self.deref_mut() } } impl Borrow for StrInner { fn borrow(&self) -> &str { self.deref() } } impl BorrowMut for StrInner { fn borrow_mut(&mut self) -> &mut str { self.deref_mut() } } impl Debug for StrInner { fn fmt(&self, fmt: &mut Formatter) -> FmtResult { Debug::fmt(self.deref(), fmt) } } impl Display for StrInner { fn fmt(&self, fmt: &mut Formatter) -> FmtResult { Display::fmt(self.deref(), fmt) } } impl Hash for StrInner { fn hash(&self, state: &mut H) { self.deref().hash(state) } } impl Index for StrInner where S: Storage, str: Index, { type Output = >::Output; fn index(&self, index: I) -> &Self::Output { self.deref().index(index) } } impl IndexMut for StrInner where S: StorageMut, str: IndexMut, { fn index_mut(&mut self, index: I) -> &mut Self::Output { self.deref_mut().index_mut(index) } } impl Add<&str> for StrInner { type Output = Self; fn add(mut self, rhs: &str) -> Self::Output { self.push_str(rhs); self } } impl AddAssign<&str> for StrInner { fn add_assign(&mut self, rhs: &str) { self.push_str(rhs); } } impl Extend for StrInner { fn extend>(&mut self, iter: T) { for c in iter { self.push(c); } } } impl<'a, S: StorageMut> Extend<&'a char> for StrInner { fn extend>(&mut self, iter: T) { for c in iter { self.push(*c); } } } macro_rules! impl_extend { ($ty:ty $(, $lifetimes:lifetime )* ) => { impl<$($lifetimes, )* S: StorageMut> Extend<$ty> for StrInner { fn extend>(&mut self, iter: T) { for i in iter { self.push_str(i.as_ref()); } } } impl<$($lifetimes, )* S> FromIterator<$ty> for StrInner where S: Storage, { fn from_iter>(iter: T) -> Self { let mut creator = StrInner(S::Creator::default()); creator.extend(iter); StrInner(S::from_creator(creator.0)) } } }; } impl_extend!(String); impl_extend!(Box); impl_extend!(&'a String, 'a); impl_extend!(&'a str, 'a); impl_extend!(Cow<'a, str>, 'a); macro_rules! impl_from { ($ty:ty $(, $lifetimes:lifetime )* ) => { impl<$($lifetimes, )* S> From<$ty> for StrInner where S: Storage, { fn from(s: $ty) -> Self { iter::once(s).collect() } } }; } impl_from!(&'a String, 'a); impl_from!(&'a str, 'a); impl_from!(Cow<'a, str>, 'a); impl From for Str { fn from(s: String) -> Self { let inner = Bytes::from(s.into_bytes()); // Safety: inner is constructed from a str unsafe { Str::from_inner_unchecked(inner) } } } impl From> for Str { fn from(s: Box) -> Self { let s: Box<[u8]> = s.into(); let inner = Bytes::from(s); // Safety: inner is constructed from a str unsafe { Str::from_inner_unchecked(inner) } } } macro_rules! impl_try_from { ($ty: ty) => { impl TryFrom<$ty> for StrInner<$ty> { type Error = Utf8Error<$ty>; fn try_from(s: $ty) -> Result> { Self::from_inner(s) } } impl From> for $ty { fn from(s: StrInner<$ty>) -> $ty { s.0 } } }; } impl_try_from!(Bytes); impl_try_from!(BytesMut); impl From for Str { fn from(s: StrMut) -> Self { s.freeze() } } impl FromStr for StrInner { type Err = Infallible; fn from_str(s: &str) -> Result { Ok(s.into()) } } impl PartialEq for StrInner { fn eq(&self, other: &Self) -> bool { self.deref() == other.deref() } } impl Eq for StrInner {} impl PartialOrd for StrInner { fn partial_cmp(&self, other: &Self) -> Option { Some(Ord::cmp(self, other)) } } impl Ord for StrInner { fn cmp(&self, other: &Self) -> Ordering { self.deref().cmp(other.deref()) } } macro_rules! impl_partrial_eq { ($ty: ty $(, $lifetimes:lifetime )* ) => { impl<$($lifetimes, )* S: Storage> PartialEq<$ty> for StrInner { fn eq(&self, other: &$ty) -> bool { self.deref() == other.deref() } } impl<$($lifetimes, )* S: Storage> PartialEq> for $ty { fn eq(&self, other: &StrInner) -> bool { self.deref() == other.deref() } } impl<$($lifetimes, )* S: Storage> PartialOrd<$ty> for StrInner { fn partial_cmp(&self, other: &$ty) -> Option { Some(self.deref().cmp(other.deref())) } } impl<$($lifetimes, )* S: Storage> PartialOrd> for $ty { fn partial_cmp(&self, other: &StrInner) -> Option { Some(self.deref().cmp(other.deref())) } } }; } impl_partrial_eq!(String); impl_partrial_eq!(Box); impl_partrial_eq!(&'a str, 'a); impl_partrial_eq!(&'a mut str, 'a); impl_partrial_eq!(Cow<'a, str>, 'a); impl Write for StrInner { fn write_str(&mut self, s: &str) -> FmtResult { self.push_str(s); Ok(()) } } /// The [format] macro, but returning [Str]. /// /// # Examples /// /// ``` /// use bytes_utils::{format_bytes, Str}; /// let s: Str = format_bytes!("Hello {}", "world"); /// assert_eq!("Hello world", s); /// ``` #[macro_export] macro_rules! format_bytes { ($($arg: tt)*) => { $crate::format_bytes_mut!($($arg)*).freeze() } } /// The [format] macro, but returning [StrMut]. /// /// # Examples /// /// ``` /// use bytes_utils::{format_bytes_mut, StrMut}; /// let s: StrMut = format_bytes_mut!("Hello {}", "world"); /// assert_eq!("Hello world", s); /// ``` #[macro_export] macro_rules! format_bytes_mut { ($($arg: tt)*) => {{ use std::fmt::Write; let mut buf = $crate::StrMut::default(); write!(buf, $($arg)*).unwrap(); buf }} } /// An immutable variant of [Bytes]-backed string. /// /// The methods and their documentation are on [StrInner], but users are mostly expected to use /// this and the [StrMut] aliases. pub type Str = StrInner; impl Str { /// Extracts a subslice of the string as an owned [Str]. /// /// # Panics /// /// If the byte indices in the range are not on char boundaries. pub fn slice(&self, range: R) -> Str where str: Index, { self.slice_ref(&self[range]) } /// Extracts owned representation of the slice passed. /// /// This method accepts a string sub-slice of `self`. It then extracts the slice but as the /// [Str] type. This makes it easier to use "ordinary" string parsing/manipulation and then go /// back to holding the [Bytes]-based representation. /// /// This is zero-copy, the common part will be shared by reference counting. /// /// # Panics /// /// If the provided slice is not a sub-slice of `self`. This is checked based on address of the /// slice, not on the content. /// /// # Example /// /// ```rust /// # use bytes_utils::Str; /// let owned = Str::from("Hello World"); /// let borrowed_mid: &str = &owned[2..5]; /// /// let mid: Str = owned.slice_ref(borrowed_mid); /// assert_eq!("Hello World", owned); /// assert_eq!("llo", mid); /// ``` pub fn slice_ref(&self, subslice: &str) -> Self { let sub = self.0.slice_ref(subslice.as_bytes()); Self(sub) } /// Create [`Str`] from static string in O(1). pub const fn from_static(s: &'static str) -> Self { let bytes = Bytes::from_static(s.as_bytes()); // Safety: bytes is constructed from str unsafe { Str::from_inner_unchecked(bytes) } } } /// A mutable variant of [BytesMut]-backed string. /// /// Unlike [Str], this one allows modifications (mostly additions), but also doesn't allow /// overlapping/shared chunks. /// /// This is internally backed by the [StrInner] type, so the documentation of the methods are on /// that. pub type StrMut = StrInner; impl StrMut { /// Splits and returns the part of already built string, but keeps the extra capacity. pub fn split_built(&mut self) -> StrMut { StrInner(self.0.split()) } } #[cfg(test)] mod tests { use itertools::Itertools; use std::panic; use super::*; #[test] fn split_w_byte_index() { let v = Str::from("😈 ").split_whitespace_bytes().collect_vec(); assert_eq!(1, v.len()); assert_eq!("😈", v[0]); } #[test] fn split_same() { let v = Str::from("a").split_bytes("a").collect_vec(); assert_eq!(2, v.len()); assert_eq!("", v[0]); assert_eq!("", v[1]); } #[test] fn split_empty_pat() { let v = Str::from("a").split_bytes("").collect_vec(); assert_eq!(3, v.len()); assert_eq!("", v[0]); assert_eq!("a", v[1]); assert_eq!("", v[2]); } #[test] fn slice_checks_char_boundaries() { let v = Str::from("😈"); assert_eq!(4, v.len()); panic::catch_unwind(|| v.slice(1..)).unwrap_err(); } #[test] fn split_at_bytes_mid() { let v = Str::from("hello"); let (l, r) = v.split_at_bytes(2); assert_eq!("he", l); assert_eq!("llo", r); } #[test] fn split_at_bytes_begin() { let v = Str::from("hello"); let (l, r) = v.split_at_bytes(0); assert_eq!("", l); assert_eq!("hello", r); } #[test] fn split_at_bytes_end() { let v = Str::from("hello"); let (l, r) = v.split_at_bytes(5); assert_eq!("hello", l); assert_eq!("", r); } #[test] fn split_at_bytes_panic() { let v = Str::from("😈"); assert_eq!(4, v.len()); panic::catch_unwind(|| v.split_at_bytes(2)).unwrap_err(); } #[cfg(not(miri))] mod proptests { use proptest::prelude::*; use super::*; proptest! { #[test] fn split_whitespace(s: String) { let bstring = Str::from(&s); let bw = bstring.split_whitespace_bytes(); let sw = s.split_whitespace(); for (b, s) in bw.zip_eq(sw) { prop_assert_eq!(b, s); } } #[test] fn split_ascii_whitespace(s: String) { let bstring = Str::from(&s); let bw = bstring.split_ascii_whitespace_bytes(); let sw = s.split_ascii_whitespace(); for (b, s) in bw.zip_eq(sw) { prop_assert_eq!(b, s); } } #[test] fn lines(s: String) { let bstring = Str::from(&s); let bl = bstring.lines_bytes(); let sl = s.lines(); for (b, s) in bl.zip_eq(sl) { prop_assert_eq!(b, s); } } #[test] fn split(s: String, pat: String) { let bstring = Str::from(&s); let bs = bstring.split_bytes(&pat); let ss = s.split(&pat); for (b, s) in bs.zip_eq(ss) { prop_assert_eq!(b, s); } } #[test] fn split_n(s: String, pat: String, n in 0..5usize) { let bstring = Str::from(&s); let bs = bstring.splitn_bytes(n, &pat); let ss = s.splitn(n, &pat); for (b, s) in bs.zip_eq(ss) { prop_assert_eq!(b, s); } } #[test] fn rsplit(s: String, pat: String) { let bstring = Str::from(&s); let bs = bstring.rsplit_bytes(&pat); let ss = s.rsplit(&pat); for (b, s) in bs.zip_eq(ss) { prop_assert_eq!(b, s); } } #[test] fn rsplit_n(s: String, pat: String, n in 0..5usize) { let bstring = Str::from(&s); let bs = bstring.rsplitn_bytes(n, &pat); let ss = s.rsplitn(n, &pat); for (b, s) in bs.zip_eq(ss) { prop_assert_eq!(b, s); } } } } } bytes-utils-0.1.4/src/string/serde_impl.rs000064400000000000000000000024441046102023000167040ustar 00000000000000use core::ops::Deref; use super::{Storage, StrInner}; use serde::de::{Deserialize, Deserializer, Error, Unexpected}; use serde::{Serialize, Serializer}; impl Serialize for StrInner { fn serialize(&self, serializer: Ser) -> Result where Ser: Serializer, { let s: &str = Deref::deref(self); s.serialize(serializer) } } impl<'de, S: Storage + Deserialize<'de>> Deserialize<'de> for StrInner { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let inner = ::deserialize(deserializer)?; Self::from_inner(inner).map_err(|err| { D::Error::invalid_value( Unexpected::Bytes(err.inner.as_ref()), &format!("Expected utf-8 str: {}", err.e).as_str(), ) }) } } #[cfg(test)] mod tests { use crate::{Str, StrMut}; use serde_test::{assert_tokens, Token}; #[test] fn test_de_ser_str() { const S: &str = "Hello, world!"; assert_tokens(&Str::from_static(S), &[Token::BorrowedStr(S)]); } #[test] fn test_de_ser_str_mut() { const S: &str = "Hello, world!"; assert_tokens(&StrMut::from(S), &[Token::BorrowedStr(S)]); } }