smartstring-1.0.1/.cargo_vcs_info.json0000644000000001360000000000100134100ustar { "git": { "sha1": "e407ca23c747257a812d2d2e70bf336412718c3a" }, "path_in_vcs": "" }smartstring-1.0.1/.github/workflows/ci.yml000064400000000000000000000071220072674642500167450ustar 00000000000000name: Continuous Integration on: push: pull_request: schedule: - cron: "0 0 1,15 * *" jobs: check: name: Check runs-on: ubuntu-latest strategy: matrix: rust: - stable - nightly - 1.57.0 # lowest supported version flags: - --all-features - --no-default-features steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true - uses: actions-rs/cargo@v1 with: command: check args: ${{ matrix.flags }} test: name: Tests runs-on: ubuntu-latest strategy: matrix: rust: - stable - nightly - 1.57.0 # lowest supported version steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true - uses: actions-rs/cargo@v1 with: command: test args: --all-features nostd: name: no_std build runs-on: ubuntu-latest strategy: matrix: rust: - stable - nightly - 1.57.0 # lowest supported version steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true - uses: actions-rs/cargo@v1 with: command: build args: --no-default-features fmt: name: Rustfmt runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true components: rustfmt - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: name: Clippy runs-on: ubuntu-latest strategy: matrix: rust: - stable - nightly steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true components: clippy - uses: actions-rs/clippy-check@v1 with: name: Clippy-${{ matrix.rust }} token: ${{ secrets.GITHUB_TOKEN }} args: --all-features miri: name: Miri runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true components: miri - name: Run Miri run: | cargo miri setup env MIRIFLAGS=-Zmiri-disable-isolation cargo miri test -- --skip proptest smartstring-1.0.1/.github/workflows/fuzz.yml000064400000000000000000000026460072674642500173560ustar 00000000000000name: libFuzzer on: push: pull_request: schedule: - cron: "8 0 * * *" jobs: fuzz: name: libFuzzer runs-on: ubuntu-latest strategy: fail-fast: false matrix: target: - ordering_compact - smartstring_compact - smartstring_lazycompact steps: - uses: actions/checkout@v2 name: Checkout project - uses: actions/cache@v1 name: Cache corpus id: cache-corpus with: path: fuzz/corpus/${{ matrix.target }} key: fuzz-corpus-${{ matrix.target }}-${{ github.run_id }} restore-keys: | fuzz-corpus-${{ matrix.target }}- - uses: actions-rs/toolchain@v1 name: Install Rust with: profile: minimal toolchain: nightly override: true - uses: actions-rs/install@v0.1 name: Install cargo-fuzz with: crate: cargo-fuzz version: latest use-tool-cache: true - name: Fuzz for 2 minutes run: cargo fuzz run ${{ matrix.target }} -- -max_total_time=120 # seconds - uses: actions/upload-artifact@v1 name: Publish artifacts if: always() with: name: fuzz-artifacts path: fuzz/artifacts - uses: actions/upload-artifact@v2 name: Publish corpus if: always() with: name: fuzz-corpus path: fuzz/corpus smartstring-1.0.1/.gitignore000064400000000000000000000000230072674642500142130ustar 00000000000000/target Cargo.lock smartstring-1.0.1/.travis.yml000064400000000000000000000005760072674642500143510ustar 00000000000000language: rust rust: - 1.46.0 - stable - beta - nightly arch: - amd64 - arm64 - ppc64le - s390x cache: directories: - /home/travis/.rustup - /home/travis/.cargo - /home/travis/target install: - rustup update - mkdir -p .cargo && echo '[build]' > .cargo/config && echo 'target-dir = "/home/travis/target"' >> .cargo/config script: cargo test smartstring-1.0.1/CHANGELOG.md000064400000000000000000000145340072674642500140500ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [1.0.1] - 2022-03-24 ### FIXED - To avoid an issue where allocated heap memory may be deallocated with a different layout alignment than it was officially allocated with when converting between `std::string::String` and `SmartString`, even if otherwise correctly aligned, the respective `From` implementations now use `std::alloc::Allocator::grow()` to re-align the heap data as necessary. An unfortunate consequence of this is that because the `std::alloc::Allocator` API hasn't been stabilised yet, unless you're on nightly or some future stable rustc version after `allocator_api` has been stabilised, converting between `String` and `SmartString` will always reallocate and copy (making it always O(n) rather than O(1) when correctly aligned and O(n) otherwise). ([#28](https://github.com/bodil/smartstring/issues/28)) ## [1.0.0] - 2022-02-24 ### CHANGED - `smartstring` now implements its own boxed string type rather than deferring directly to `String`, so it no longer makes assumptions it shouldn't be making about the layout of the `String` struct. This also allows us to organise the boxed struct in a way that will let us rely only on our basic assumption that heap memory is word aligned on both big and little endian architectures. The most immediate consequence of this is that `smartstring` will now compile on 32-bit big endian architectures such as `mips`. We are now also explicitly allocating heap memory aligned for `u16` rather than `u8`, ensuring the assumption about pointer alignment becomes an invariant. In short: `smartstring` no longer relies on undefined behaviour, and should be safe to use anywhere. - The above means that the boxed `SmartString` is no longer pointer compatible with `String`, so if you were relying on that despite the documentation urging you not to, you'll really have to stop it now. Converting between `SmartString` and `String` using `From` and `Into` traits is still efficient and allocation free. - The minimum supported rustc version is now 1.57.0. - The `smartstring::validate()` function has been removed, as it's no longer needed. ## [0.2.10] - 2022-02-20 ### CHANGED - The minimum supported rustc version has been increased to 1.56.0, and the `rust-version` field has been added to the crate's `Cargo.toml` to indicate the MSRV. (The `rust-version` field itself was introduced in version 1.56, hence the bump.) - Dependencies have been bumped, most notably to `arbitrary` version 1. ## [0.2.9] - 2021-07-27 ### ADDED - You can (and should) now call `smartstring::validate()` from your own code or test suite to validate `SmartString`'s memory layout assumptions. ## [0.2.8] - 2021-07-26 ### CHANGED - The minimum supported rustc version has been increased to 1.46.0. ### ADDED - There are now `const fn new_const()` constructors for `SmartString` and `SmartString`, added as a temporary measure because const functions can't yet take trait bounds on type arguments, so we can't simply make `SmartString::new()` const. Please note that when rustc catches up, the plan is to deprecate `new_const()` in favour of `new()`. (#21) ## [0.2.7] - 2021-07-01 ### FIXED - `no_std` builds have been fixed. (#18) ## [0.2.6] - 2020-12-19 ### ADDED - `SmartString` now implements `PartialEq<&str>`. ## [0.2.5] - 2020-09-24 ### ADDED - `From` implementations from `Cow<'_, str>` and `&mut str` were added. (#12) ## [0.2.4] - 2020-09-05 ### ADDED - `smartstring` is now `no_std` if you disable the `std` feature flag (which is enabled by default). (#10) ### FIXED - `smartstring` will now refuse to compile on 32-bit big-endian architectures, where assuming that the high bit of a pointer is always empty is going to be a very bad idea. ## [0.2.3] - 2020-07-07 ### ADDED - `SmartString` now implements `Display`. (#6) - `SmartString` now implements `FromIterator`. - Support for [`serde`](https://serde.rs/) behind the `serde` feature flag. (#2) - Support for [`arbitrary`](https://crates.io/crates/arbitrary) behind the `arbitrary` feature flag. - Support for [`proptest`](https://crates.io/crates/proptest) behind the `proptest` feature flag. ### FIXED - `SmartString::push_str` would previously trigger two heap allocations while promoting an inline string to a boxed string, one of which was unnecessary. It now only makes the one strictly necessary allocation. (#5) - Fixed a bug where `SmartString::remove` would panic if you tried to remove the last index in an inline string. ## [0.2.2] - 2020-07-05 ### FIXED - Calling `shrink_to_fit()` on a string with `LazyCompact` layout will now inline it and deallocate the heap allocation if the string is short enough to be inlined. ## [0.2.1] - 2020-07-04 ### FIXED - The type alias `smartstring::alias::String` was incorrectly pointing at the `Compact` variant. It is now pointing at `LazyCompact`, as the documentation describes. ## [0.2.0] - 2020-07-04 ### REMOVED - The `Prefixed` variant has been removed, as it comes with significant code complexity for very dubious gains. ### CHANGED - The type alias `smartstring::alias::String` now refers to `LazyCompact` instead of `Compact`, the idea being that the obvious drop-in replacement for `String` shouldn't have any unexpected performance differences, which `Compact` can have because it aggressively re-inlines strings to keep them as local as possible. `LazyCompact` instead heap allocates once when the string is in excess of the inline capacity and keeps the allocation from then on, so there are no surprises. ### ADDED - There's a new layout variant, `LazyCompact`, which works like `Compact` except it never re-inlines strings once they have been moved to the heap. - As the alias `String` has changed, there is now a new type alias `smartstring::alias::CompactString`, referring to strings with `Compact` layout. ### FIXED - Fixed a bug where `SmartString::drain()` would remove twice the drained content from the string. ## [0.1.0] - 2020-05-15 Initial release. smartstring-1.0.1/CODE_OF_CONDUCT.md000064400000000000000000000062320072674642500150320ustar 00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at admin@immutable.rs. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org smartstring-1.0.1/Cargo.toml0000644000000034010000000000100114040ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.57" name = "smartstring" version = "1.0.1" authors = ["Bodil Stokke "] build = "./build.rs" exclude = ["release.toml", "proptest-regressions/**"] description = "Compact inlined strings" documentation = "http://docs.rs/smartstring" readme = "./README.md" keywords = ["cache-local", "cpu-cache", "small-string", "sso", "inline-string"] categories = ["data-structures"] license = "MPL-2.0+" repository = "https://github.com/bodil/smartstring" resolver = "2" [package.metadata.docs.rs] features = ["arbitrary", "proptest", "serde"] [[bench]] name = "smartstring" harness = false [dependencies.arbitrary] version = "1" optional = true [dependencies.proptest] version = "1" optional = true [dependencies.serde] version = "1" optional = true [dependencies.static_assertions] version = "1" [dev-dependencies.criterion] version = "0.3" [dev-dependencies.proptest] version = "1" [dev-dependencies.proptest-derive] version = "0.3" [dev-dependencies.rand] version = "0.8" [dev-dependencies.serde_test] version = "1" [build-dependencies.autocfg] version = "1" [build-dependencies.version_check] version = "0.9" [features] default = ["std"] std = [] test = ["std", "arbitrary", "arbitrary/derive"] [badges.travis-ci] branch = "master" repository = "bodil/smartstring" smartstring-1.0.1/Cargo.toml.orig000064400000000000000000000021520072674642500151170ustar 00000000000000[package] name = "smartstring" version = "1.0.1" authors = ["Bodil Stokke "] edition = "2021" license = "MPL-2.0+" description = "Compact inlined strings" repository = "https://github.com/bodil/smartstring" documentation = "http://docs.rs/smartstring" readme = "./README.md" categories = ["data-structures"] keywords = ["cache-local", "cpu-cache", "small-string", "sso", "inline-string"] exclude = ["release.toml", "proptest-regressions/**"] rust-version = "1.57" build = "./build.rs" [package.metadata.docs.rs] features = ["arbitrary", "proptest", "serde"] [badges] travis-ci = { repository = "bodil/smartstring", branch = "master" } [[bench]] name = "smartstring" harness = false [features] default = ["std"] std = [] test = ["std", "arbitrary", "arbitrary/derive"] [dependencies] static_assertions = "1" serde = { version = "1", optional = true } arbitrary = { version = "1", optional = true } proptest = { version = "1", optional = true } [dev-dependencies] proptest = "1" proptest-derive = "0.3" criterion = "0.3" rand = "0.8" serde_test = "1" [build-dependencies] version_check = "0.9" autocfg = "1" smartstring-1.0.1/LICENCE.md000064400000000000000000000362760072674642500136320ustar 00000000000000Mozilla Public License Version 2.0 ================================== ### 1. Definitions **1.1. “Contributor”** means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. **1.2. “Contributor Version”** means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. **1.3. “Contribution”** means Covered Software of a particular Contributor. **1.4. “Covered Software”** means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. **1.5. “Incompatible With Secondary Licenses”** means * **(a)** that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or * **(b)** that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. **1.6. “Executable Form”** means any form of the work other than Source Code Form. **1.7. “Larger Work”** means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. **1.8. “License”** means this document. **1.9. “Licensable”** means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. **1.10. “Modifications”** means any of the following: * **(a)** any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or * **(b)** any new file in Source Code Form that contains any Covered Software. **1.11. “Patent Claims” of a Contributor** means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. **1.12. “Secondary License”** means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. **1.13. “Source Code Form”** means the form of the work preferred for making modifications. **1.14. “You” (or “Your”)** means an individual or a legal entity exercising rights under this License. For legal entities, “You” includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, “control” means **(a)** the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or **(b)** ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. ### 2. License Grants and Conditions #### 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: * **(a)** under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and * **(b)** under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. #### 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. #### 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: * **(a)** for any code that a Contributor has removed from Covered Software; or * **(b)** for infringements caused by: **(i)** Your and any other third party's modifications of Covered Software, or **(ii)** the combination of its Contributions with other software (except as part of its Contributor Version); or * **(c)** under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). #### 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). #### 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. #### 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. #### 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. ### 3. Responsibilities #### 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. #### 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: * **(a)** such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and * **(b)** You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. #### 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). #### 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. #### 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. ### 4. Inability to Comply Due to Statute or Regulation If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: **(a)** comply with the terms of this License to the maximum extent possible; and **(b)** describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. ### 5. Termination **5.1.** The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated **(a)** provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and **(b)** on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. **5.2.** If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. **5.3.** In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ### 6. Disclaimer of Warranty > Covered Software is provided under this License on an “as is” > basis, without warranty of any kind, either expressed, implied, or > statutory, including, without limitation, warranties that the > Covered Software is free of defects, merchantable, fit for a > particular purpose or non-infringing. The entire risk as to the > quality and performance of the Covered Software is with You. > Should any Covered Software prove defective in any respect, You > (not any Contributor) assume the cost of any necessary servicing, > repair, or correction. This disclaimer of warranty constitutes an > essential part of this License. No use of any Covered Software is > authorized under this License except under this disclaimer. ### 7. Limitation of Liability > Under no circumstances and under no legal theory, whether tort > (including negligence), contract, or otherwise, shall any > Contributor, or anyone who distributes Covered Software as > permitted above, be liable to You for any direct, indirect, > special, incidental, or consequential damages of any character > including, without limitation, damages for lost profits, loss of > goodwill, work stoppage, computer failure or malfunction, or any > and all other commercial damages or losses, even if such party > shall have been informed of the possibility of such damages. This > limitation of liability shall not apply to liability for death or > personal injury resulting from such party's negligence to the > extent applicable law prohibits such limitation. Some > jurisdictions do not allow the exclusion or limitation of > incidental or consequential damages, so this exclusion and > limitation may not apply to You. ### 8. Litigation Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. ### 9. Miscellaneous This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. ### 10. Versions of the License #### 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. #### 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. #### 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). #### 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. ## Exhibit A - Source Code Form License Notice This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. ## Exhibit B - “Incompatible With Secondary Licenses” Notice This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. smartstring-1.0.1/README.md000064400000000000000000000031100072674642500135020ustar 00000000000000# smartstring Compact inlined strings. ## tl;dr String type that's source compatible with `std::string::String`, uses exactly the same amount of space, doesn't heap allocate for short strings (up to 23 bytes on 64-bit archs) by storing them in the space a `String` would have taken up on the stack, making strings go faster overall. ## Overview This crate provides a wrapper for Rust's standard `String` which uses the space a `String` occupies on the stack to store inline string data, automatically promoting it to a `String` when it grows beyond the inline capacity. This has the advantage of avoiding heap allocations for short strings as well as improving performance thanks to keeping the strings on the stack. This is all accomplished without the need for an external discriminant, so a `SmartString` is exactly the same size as a `String` on the stack, regardless of whether it's inlined or not. Converting a heap allocated `SmartString` into a `String` and vice versa is also a zero cost operation, as one will reuse the allocated memory of the other. ## Documentation - [API docs](https://docs.rs/smartstring) ## Licence Copyright 2020 Bodil Stokke This software is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at . ## Code of Conduct Please note that this project is released with a [Contributor Code of Conduct][coc]. By participating in this project you agree to abide by its terms. [coc]: https://github.com/bodil/sized-chunks/blob/master/CODE_OF_CONDUCT.md smartstring-1.0.1/benches/smartstring.rs000064400000000000000000000067640072674642500165770ustar 00000000000000use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use rand::{distributions::Standard, Rng, SeedableRng}; use smartstring::{Compact, LazyCompact, SmartString, SmartStringMode}; use std::collections::BTreeSet; const SIZES: &[usize] = &[4096, 16384, 32768, 65536, 131072]; // const SIZES: &[usize] = &[4096, 65536]; // Makes a random string of only ASCII chars. fn make_key(chars: &mut impl Iterator, key_len: usize) -> String { let mut key = String::with_capacity(key_len); loop { let ch: char = ((chars.next().unwrap() % 96) + 32).into(); key.push(ch); if key.len() >= key_len { return key; } } } fn make_indices(count: usize, key_len: usize) -> Vec { let mut chars = rand::rngs::StdRng::seed_from_u64(31337).sample_iter::(Standard); let mut control = BTreeSet::new(); let mut indices = Vec::new(); loop { let key: String = make_key(&mut chars, key_len); if control.contains(&key) { continue; } control.insert(key.clone()); indices.push(key); if indices.len() >= count { break; } } indices } fn make_set(indices: &Vec) -> BTreeSet { let mut set = BTreeSet::new(); for key in indices { set.insert(key.clone()); } set } fn make_string_input(count: usize, key_len: usize) -> (Vec, BTreeSet) { let indices = make_indices(count, key_len); let set = make_set(&indices); (indices, set) } fn make_smart_input( indices: &Vec, set: &BTreeSet, ) -> (Vec>, BTreeSet>) where Mode: SmartStringMode, { ( indices.iter().cloned().map(From::from).collect(), set.iter().cloned().map(From::from).collect(), ) } fn lookup_random(key_size: usize, c: &mut Criterion) { let mut group = c.benchmark_group(format!("BTreeMap random lookup/key_len={}", key_size)); for size in SIZES { group.throughput(Throughput::Elements(*size as u64)); let (string_indices, string_set) = make_string_input(*size, key_size); let (smartc_indices, smartc_set) = make_smart_input::(&string_indices, &string_set); let (smartp_indices, smartp_set) = make_smart_input::(&string_indices, &string_set); group.bench_function(BenchmarkId::new("String", size), |b| { b.iter(|| { for k in &string_indices { black_box(string_set.contains(k)); } }) }); group.bench_function(BenchmarkId::new("SmartString", size), |b| { b.iter(|| { for k in &smartc_indices { black_box(smartc_set.contains(k)); } }) }); group.bench_function(BenchmarkId::new("SmartString", size), |b| { b.iter(|| { for k in &smartp_indices { black_box(smartp_set.contains(k)); } }) }); } group.finish(); } fn lookup_random_16b(c: &mut Criterion) { lookup_random(16, c) } fn lookup_random_256b(c: &mut Criterion) { lookup_random(256, c) } fn lookup_random_4096b(c: &mut Criterion) { lookup_random(4096, c) } criterion_group!( smartstring, lookup_random_16b, lookup_random_256b, lookup_random_4096b ); criterion_main!(smartstring); smartstring-1.0.1/build.rs000064400000000000000000000011360072674642500136760ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use version_check as rustc; fn main() { let ac = autocfg::new(); let has_feature = Some(true) == rustc::supports_feature("allocator_api"); let has_api = ac.probe_trait("alloc::alloc::Allocator"); if has_feature || has_api { autocfg::emit("has_allocator"); } if has_feature { autocfg::emit("needs_allocator_feature"); } autocfg::rerun_path("build.rs"); } smartstring-1.0.1/src/arbitrary.rs000064400000000000000000000013700072674642500153650ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{SmartString, SmartStringMode}; use alloc::string::String; use arbitrary::{Arbitrary, Result, Unstructured}; impl<'a, Mode: SmartStringMode> Arbitrary<'a> for SmartString where Mode: 'static, { fn arbitrary(u: &mut Unstructured<'_>) -> Result { String::arbitrary(u).map(Self::from) } fn arbitrary_take_rest(u: Unstructured<'_>) -> Result { String::arbitrary_take_rest(u).map(Self::from) } fn size_hint(depth: usize) -> (usize, Option) { String::size_hint(depth) } } smartstring-1.0.1/src/boxed.rs000064400000000000000000000147310072674642500144740ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use alloc::{alloc::Layout, string::String}; use core::{ mem::align_of, ops::{Deref, DerefMut}, ptr::NonNull, }; use crate::{ops::GenericString, MAX_INLINE}; #[cfg(target_endian = "little")] #[repr(C)] pub(crate) struct BoxedString { ptr: NonNull, cap: usize, len: usize, } #[cfg(target_endian = "big")] #[repr(C)] pub(crate) struct BoxedString { len: usize, cap: usize, ptr: NonNull, } /// Checks if a pointer is aligned to an even address (good) /// or an odd address (either actually an InlineString or very, very bad). /// /// Returns `true` if aligned to an odd address, `false` if even. The sense of /// the boolean is "does this look like an InlineString? true/false" fn check_alignment(ptr: *const u8) -> bool { ptr.align_offset(2) > 0 } impl GenericString for BoxedString { fn set_size(&mut self, size: usize) { self.len = size; debug_assert!(self.len <= self.cap); } fn as_mut_capacity_slice(&mut self) -> &mut [u8] { #[allow(unsafe_code)] unsafe { core::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.capacity()) } } } impl BoxedString { const MINIMAL_CAPACITY: usize = MAX_INLINE * 2; pub(crate) fn check_alignment(this: &Self) -> bool { check_alignment(this.ptr.as_ptr()) } fn layout_for(cap: usize) -> Layout { // Always request memory that is specifically aligned to at least 2, so // the least significant bit is guaranteed to be 0. let layout = Layout::array::(cap) .and_then(|layout| layout.align_to(align_of::())) .unwrap(); assert!( layout.size() <= isize::MAX as usize, "allocation too large!" ); layout } fn alloc(cap: usize) -> NonNull { let layout = Self::layout_for(cap); #[allow(unsafe_code)] let ptr = match NonNull::new(unsafe { alloc::alloc::alloc(layout) }) { Some(ptr) => ptr, None => alloc::alloc::handle_alloc_error(layout), }; debug_assert!(ptr.as_ptr().align_offset(2) == 0); ptr } fn realloc(&mut self, cap: usize) { let layout = Self::layout_for(cap); let old_layout = Self::layout_for(self.cap); let old_ptr = self.ptr.as_ptr(); #[allow(unsafe_code)] let ptr = unsafe { alloc::alloc::realloc(old_ptr, old_layout, layout.size()) }; self.ptr = match NonNull::new(ptr) { Some(ptr) => ptr, None => alloc::alloc::handle_alloc_error(layout), }; self.cap = cap; debug_assert!(self.ptr.as_ptr().align_offset(2) == 0); } pub(crate) fn ensure_capacity(&mut self, target_cap: usize) { let mut cap = self.cap; while cap < target_cap { cap *= 2; } self.realloc(cap) } pub(crate) fn new(cap: usize) -> Self { let cap = cap.max(Self::MINIMAL_CAPACITY); Self { cap, len: 0, ptr: Self::alloc(cap), } } pub(crate) fn from_str(cap: usize, src: &str) -> Self { let mut out = Self::new(cap); out.len = src.len(); out.as_mut_capacity_slice()[..src.len()].copy_from_slice(src.as_bytes()); out } pub(crate) fn capacity(&self) -> usize { self.cap } pub(crate) fn shrink_to_fit(&mut self) { self.realloc(self.len); } } impl Drop for BoxedString { fn drop(&mut self) { #[allow(unsafe_code)] unsafe { alloc::alloc::dealloc(self.ptr.as_ptr(), Self::layout_for(self.cap)) } } } impl Clone for BoxedString { fn clone(&self) -> Self { Self::from_str(self.capacity(), self.deref()) } } impl Deref for BoxedString { type Target = str; fn deref(&self) -> &Self::Target { #[allow(unsafe_code)] unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts(self.ptr.as_ptr(), self.len)) } } } impl DerefMut for BoxedString { fn deref_mut(&mut self) -> &mut Self::Target { #[allow(unsafe_code)] unsafe { core::str::from_utf8_unchecked_mut(core::slice::from_raw_parts_mut( self.ptr.as_ptr(), self.len, )) } } } impl From for BoxedString { #[allow(unsafe_code, unused_mut)] fn from(mut s: String) -> Self { if s.is_empty() { Self::new(s.capacity()) } else { #[cfg(has_allocator)] { // TODO: Use String::into_raw_parts when stabilised, meanwhile let's get unsafe let len = s.len(); let cap = s.capacity(); #[allow(unsafe_code)] let ptr = unsafe { NonNull::new_unchecked(s.as_mut_ptr()) }; let old_layout = Layout::array::(cap).unwrap(); use alloc::alloc::Allocator; let allocator = alloc::alloc::Global; if let Ok(aligned_ptr) = unsafe { allocator.grow(ptr, old_layout, Self::layout_for(cap)) } { core::mem::forget(s); Self { cap, len, ptr: aligned_ptr.cast(), } } else { Self::from_str(cap, &s) } } #[cfg(not(has_allocator))] Self::from_str(s.capacity(), &s) } } } impl From for String { #[allow(unsafe_code)] fn from(s: BoxedString) -> Self { #[cfg(has_allocator)] { let ptr = s.ptr; let cap = s.cap; let len = s.len; let new_layout = Layout::array::(cap).unwrap(); use alloc::alloc::Allocator; let allocator = alloc::alloc::Global; if let Ok(aligned_ptr) = unsafe { allocator.grow(ptr, BoxedString::layout_for(cap), new_layout) } { core::mem::forget(s); unsafe { String::from_raw_parts(aligned_ptr.as_ptr().cast(), len, cap) } } else { String::from(s.deref()) } } #[cfg(not(has_allocator))] String::from(s.deref()) } } smartstring-1.0.1/src/casts.rs000064400000000000000000000010370072674642500145030ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{boxed::BoxedString, inline::InlineString}; pub(crate) enum StringCast<'a> { Boxed(&'a BoxedString), Inline(&'a InlineString), } pub(crate) enum StringCastMut<'a> { Boxed(&'a mut BoxedString), Inline(&'a mut InlineString), } pub(crate) enum StringCastInto { Boxed(BoxedString), Inline(InlineString), } smartstring-1.0.1/src/config.rs000064400000000000000000000075600072674642500146420ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{boxed::BoxedString, inline::InlineString, SmartString}; use alloc::string::String; use core::mem::{align_of, size_of}; use static_assertions::{assert_eq_align, assert_eq_size, const_assert, const_assert_eq}; /// A compact string representation equal to [`String`] in size with guaranteed inlining. /// /// This representation relies on pointer alignment to be able to store a discriminant bit in its /// inline form that will never be present in its [`String`] form, thus /// giving us 24 bytes on 64-bit architectures, and 12 bytes on 32-bit, minus one bit, to encode our /// inline string. It uses the rest of the discriminant bit's byte to encode the string length, and /// the remaining bytes (23 or 11 depending on arch) to store the string data. When the available space is exceeded, /// it swaps itself out with a [`String`] containing its previous /// contents, relying on the discriminant bit in the [`String`]'s pointer to be unset, so we can /// store the [`String`] safely without taking up any extra space for a discriminant. /// /// This performs generally as well as [`String`] on all ops on boxed strings, and /// better than [`String`]s on inlined strings. #[derive(Debug)] pub struct Compact; /// A representation similar to [`Compact`] but which doesn't re-inline strings. /// /// This is a variant of [`Compact`] which doesn't aggressively inline strings. /// Where [`Compact`] automatically turns a heap allocated string back into an /// inlined string if it should become short enough, [`LazyCompact`] keeps /// it heap allocated once heap allocation has occurred. If your aim is to defer heap /// allocation as much as possible, rather than to ensure cache locality, this is the /// variant you want - it won't allocate until the inline capacity is exceeded, and it /// also won't deallocate once allocation has occurred, which risks reallocation if the /// string exceeds its inline capacity in the future. #[derive(Debug)] pub struct LazyCompact; /// Marker trait for [`SmartString`] representations. /// /// See [`LazyCompact`] and [`Compact`]. pub trait SmartStringMode { /// The inline string type for this layout. type InlineArray: AsRef<[u8]> + AsMut<[u8]> + Clone + Copy; /// A constant to decide whether to turn a wrapped string back into an inlined /// string whenever possible (`true`) or leave it as a wrapped string once wrapping /// has occurred (`false`). const DEALLOC: bool; } impl SmartStringMode for Compact { type InlineArray = [u8; size_of::() - 1]; const DEALLOC: bool = true; } impl SmartStringMode for LazyCompact { type InlineArray = [u8; size_of::() - 1]; const DEALLOC: bool = false; } /// The maximum capacity of an inline string, in bytes. pub const MAX_INLINE: usize = size_of::() - 1; // Assert that we're not using more space than we can encode in the header byte, // just in case we're on a 1024-bit architecture. const_assert!(MAX_INLINE < 128); // Assert that all the structs are of the expected size. assert_eq_size!(BoxedString, SmartString); assert_eq_size!(BoxedString, SmartString); assert_eq_size!(InlineString, SmartString); assert_eq_size!(InlineString, SmartString); assert_eq_align!(BoxedString, String); assert_eq_align!(InlineString, String); assert_eq_align!(SmartString, String); assert_eq_align!(SmartString, String); assert_eq_size!(String, SmartString); assert_eq_size!(String, SmartString); // Assert that `SmartString` is aligned correctly. const_assert_eq!(align_of::(), align_of::>()); const_assert_eq!(align_of::(), align_of::>()); smartstring-1.0.1/src/inline.rs000064400000000000000000000044770072674642500146570ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{config::MAX_INLINE, marker_byte::Marker, ops::GenericString}; use core::{ ops::{Deref, DerefMut}, str::{from_utf8_unchecked, from_utf8_unchecked_mut}, }; #[cfg(target_endian = "little")] #[repr(C)] #[cfg_attr(target_pointer_width = "64", repr(align(8)))] #[cfg_attr(target_pointer_width = "32", repr(align(4)))] pub(crate) struct InlineString { pub(crate) marker: Marker, pub(crate) data: [u8; MAX_INLINE], } #[cfg(target_endian = "big")] #[repr(C)] #[cfg_attr(target_pointer_width = "64", repr(align(8)))] #[cfg_attr(target_pointer_width = "32", repr(align(4)))] pub(crate) struct InlineString { pub(crate) data: [u8; MAX_INLINE], pub(crate) marker: Marker, } impl Clone for InlineString { fn clone(&self) -> Self { unreachable!("InlineString should be copy!") } } impl Copy for InlineString {} impl Deref for InlineString { type Target = str; fn deref(&self) -> &Self::Target { #[allow(unsafe_code)] unsafe { from_utf8_unchecked(&self.data[..self.len()]) } } } impl DerefMut for InlineString { fn deref_mut(&mut self) -> &mut Self::Target { let len = self.len(); #[allow(unsafe_code)] unsafe { from_utf8_unchecked_mut(&mut self.data[..len]) } } } impl GenericString for InlineString { fn set_size(&mut self, size: usize) { self.marker.set_data(size as u8); } fn as_mut_capacity_slice(&mut self) -> &mut [u8] { self.data.as_mut() } } impl InlineString { pub(crate) const fn new() -> Self { Self { marker: Marker::empty(), data: [0; MAX_INLINE], } } pub(crate) fn len(&self) -> usize { let len = self.marker.data() as usize; debug_assert!(len <= MAX_INLINE); len } } impl From<&str> for InlineString { fn from(string: &str) -> Self { let len = string.len(); debug_assert!(len <= MAX_INLINE); let mut out = Self::new(); out.marker = Marker::new_inline(len as u8); out.data.as_mut()[..len].copy_from_slice(string.as_bytes()); out } } smartstring-1.0.1/src/iter.rs000064400000000000000000000043470072674642500143400ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{ops::bounds_for, SmartString, SmartStringMode}; use core::{ fmt::{Debug, Error, Formatter}, iter::FusedIterator, ops::RangeBounds, str::Chars, }; /// A draining iterator for a [`SmartString`]. pub struct Drain<'a, Mode: SmartStringMode> { string: *mut SmartString, start: usize, end: usize, iter: Chars<'a>, } impl<'a, Mode: SmartStringMode> Drain<'a, Mode> { pub(crate) fn new(string: &'a mut SmartString, range: R) -> Self where R: RangeBounds, { let string_ptr: *mut _ = string; let len = string.len(); let (start, end) = bounds_for(&range, len); assert!(start <= end); assert!(end <= len); assert!(string.as_str().is_char_boundary(start)); assert!(string.as_str().is_char_boundary(end)); let iter = string.as_str()[start..end].chars(); Drain { string: string_ptr, start, end, iter, } } } impl<'a, Mode: SmartStringMode> Drop for Drain<'a, Mode> { fn drop(&mut self) { #[allow(unsafe_code)] let string = unsafe { &mut *self.string }; debug_assert!(string.as_str().is_char_boundary(self.start)); debug_assert!(string.as_str().is_char_boundary(self.end)); string.replace_range(self.start..self.end, ""); } } impl<'a, Mode: SmartStringMode> Iterator for Drain<'a, Mode> { type Item = char; #[inline] fn next(&mut self) -> Option { self.iter.next() } #[inline] fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } } impl<'a, Mode: SmartStringMode> DoubleEndedIterator for Drain<'a, Mode> { #[inline] fn next_back(&mut self) -> Option { self.iter.next_back() } } impl<'a, Mode: SmartStringMode> FusedIterator for Drain<'a, Mode> {} impl<'a, Mode: SmartStringMode> Debug for Drain<'a, Mode> { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { f.pad("Drain { ... }") } } smartstring-1.0.1/src/lib.rs000064400000000000000000000740520072674642500141430ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. //! # Smart String //! //! [`SmartString`] is a wrapper around [`String`] which offers //! automatic inlining of small strings. It comes in two flavours: //! [`LazyCompact`], which takes up exactly as much space as a [`String`] //! and is generally a little faster, and [`Compact`], which is the same as //! [`LazyCompact`] except it will aggressively re-inline any expanded //! [`String`]s which become short enough to do so. //! [`LazyCompact`] is the default, and what you should be using unless //! you care considerably more about heap memory usage than performance. //! //! ## What Is It For? //! //! The intended use for [`SmartString`] is as a key type for a //! B-tree (such as [`std::collections::BTreeMap`]) or any kind of //! array operation where cache locality is critical. //! //! In general, it's a nice data type for reducing your heap allocations and //! increasing the locality of string data. If you use [`SmartString`] //! as a drop-in replacement for [`String`], you're almost certain to see //! a slight performance boost, as well as slightly reduced memory usage. //! //! ## How To Use It? //! //! [`SmartString`] has the exact same API as [`String`], //! all the clever bits happen automatically behind the scenes, so you could just: //! //! ```rust //! use smartstring::alias::String; //! use std::fmt::Write; //! //! let mut string = String::new(); //! string.push_str("This is just a string!"); //! string.clear(); //! write!(string, "Hello Joe!"); //! assert_eq!("Hello Joe!", string); //! ``` //! //! ## Give Me The Details //! //! [`SmartString`] is the same size as [`String`] and //! relies on pointer alignment to be able to store a discriminant bit in its //! inline form that will never be present in its [`String`] form, thus //! giving us 24 bytes (on 64-bit architectures) minus one bit to encode our //! inline string. It uses 23 bytes to store the string data and the remaining //! 7 bits to encode the string's length. When the available space is exceeded, //! it swaps itself out with a boxed string type containing its previous //! contents. Likewise, if the string's length should drop below its inline //! capacity again, it deallocates the string and moves its contents inline. //! //! In [`Compact`] mode, it is aggressive about inlining strings, meaning that if you modify a heap allocated //! string such that it becomes short enough for inlining, it will be inlined immediately //! and the allocated [`String`] will be dropped. This may cause multiple //! unintended allocations if you repeatedly adjust your string's length across the //! inline capacity threshold, so if your string's construction can get //! complicated and you're relying on performance during construction, it might be better //! to construct it as a [`String`] and convert it once construction is done. //! //! [`LazyCompact`] looks the same as [`Compact`], except //! it never re-inlines a string that's already been heap allocated, instead //! keeping the allocation around in case it needs it. This makes for less //! cache local strings, but is the best choice if you're more worried about //! time spent on unnecessary allocations than cache locality. //! //! ## Performance //! //! It doesn't aim to be more performant than [`String`] in the general case, //! except that it doesn't trigger heap allocations for anything shorter than //! its inline capacity and so can be reasonably expected to exceed //! [`String`]'s performance perceptibly on shorter strings, as well as being more //! memory efficient in these cases. There will always be a slight overhead on all //! operations on boxed strings, compared to [`String`]. //! //! ## Feature Flags //! //! `smartstring` comes with optional support for the following crates through Cargo //! feature flags. You can enable them in your `Cargo.toml` file like this: //! //! ```no_compile //! [dependencies] //! smartstring = { version = "*", features = ["proptest", "serde"] } //! ``` //! //! | Feature | Description | //! | ------- | ----------- | //! | [`arbitrary`](https://crates.io/crates/arbitrary) | [`Arbitrary`][Arbitrary] implementation for [`SmartString`]. | //! | [`proptest`](https://crates.io/crates/proptest) | A strategy for generating [`SmartString`]s from a regular expression. | //! | [`serde`](https://crates.io/crates/serde) | [`Serialize`][Serialize] and [`Deserialize`][Deserialize] implementations for [`SmartString`]. | //! //! [Serialize]: https://docs.rs/serde/latest/serde/trait.Serialize.html //! [Deserialize]: https://docs.rs/serde/latest/serde/trait.Deserialize.html //! [Arbitrary]: https://docs.rs/arbitrary/latest/arbitrary/trait.Arbitrary.html // Ensure all unsafe blocks get flagged for manual validation. #![deny(unsafe_code)] #![forbid(rust_2018_idioms)] #![deny(nonstandard_style)] #![warn(unreachable_pub, missing_debug_implementations, missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(needs_allocator_feature, feature(allocator_api))] extern crate alloc; use alloc::{ boxed::Box, string::{String, ToString}, }; use core::{ borrow::{Borrow, BorrowMut}, cmp::Ordering, convert::Infallible, fmt::{Debug, Display, Error, Formatter, Write}, hash::{Hash, Hasher}, iter::FromIterator, marker::PhantomData, mem::{forget, MaybeUninit}, ops::{ Add, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }, ptr::drop_in_place, str::FromStr, }; #[cfg(feature = "std")] use std::borrow::Cow; mod config; pub use config::{Compact, LazyCompact, SmartStringMode, MAX_INLINE}; mod marker_byte; use marker_byte::Discriminant; mod inline; use inline::InlineString; mod boxed; use boxed::BoxedString; mod casts; use casts::{StringCast, StringCastInto, StringCastMut}; mod iter; pub use iter::Drain; mod ops; use ops::{string_op_grow, string_op_shrink}; #[cfg(feature = "serde")] mod serde; #[cfg(feature = "arbitrary")] mod arbitrary; #[cfg(feature = "proptest")] pub mod proptest; /// Convenient type aliases. pub mod alias { use super::*; /// A convenience alias for a [`LazyCompact`] layout [`SmartString`]. /// /// Just pretend it's a [`String`][String]! pub type String = SmartString; /// A convenience alias for a [`Compact`] layout [`SmartString`]. pub type CompactString = SmartString; } /// A smart string. /// /// This wraps one of two string types: an inline string or a boxed string. /// Conversion between the two happens opportunistically and transparently. /// /// It takes a layout as its type argument: one of [`Compact`] or [`LazyCompact`]. /// /// It mimics the interface of [`String`] except where behaviour cannot /// be guaranteed to stay consistent between its boxed and inline states. This means /// you still have `capacity()` and `shrink_to_fit()`, relating to state that only /// really exists in the boxed variant, because the inline variant can still give /// sensible behaviour for these operations, but `with_capacity()`, `reserve()` etc are /// absent, because they would have no effect on inline strings and the requested /// state changes wouldn't carry over if the inline string is promoted to a boxed /// one - not without also storing that state in the inline representation, which /// would waste precious bytes for inline string data. pub struct SmartString { data: MaybeUninit, mode: PhantomData, } impl Drop for SmartString { fn drop(&mut self) { if let StringCastMut::Boxed(string) = self.cast_mut() { #[allow(unsafe_code)] unsafe { drop_in_place(string) }; } } } impl Clone for SmartString { /// Clone a [`SmartString`]. /// /// If the string is inlined, this is a [`Copy`] operation. Otherwise, /// a string with the same capacity as the source is allocated. fn clone(&self) -> Self { match self.cast() { StringCast::Boxed(string) => Self::from_boxed(string.clone()), StringCast::Inline(string) => Self::from_inline(*string), } } } impl Deref for SmartString { type Target = str; #[inline(always)] fn deref(&self) -> &Self::Target { match self.cast() { StringCast::Boxed(string) => string.deref(), StringCast::Inline(string) => string.deref(), } } } impl DerefMut for SmartString { #[inline(always)] fn deref_mut(&mut self) -> &mut Self::Target { match self.cast_mut() { StringCastMut::Boxed(string) => string.deref_mut(), StringCastMut::Inline(string) => string.deref_mut(), } } } impl SmartString { /// Construct an empty string. /// /// This is a `const fn` version of [`SmartString::new`]. /// It's a temporary measure while we wait for trait bounds on /// type arguments to `const fn`s to stabilise, and will be deprecated /// once this happens. pub const fn new_const() -> Self { Self { data: MaybeUninit::new(InlineString::new()), mode: PhantomData, } } } impl SmartString { /// Construct an empty string. /// /// This is a `const fn` version of [`SmartString::new`]. /// It's a temporary measure while we wait for trait bounds on /// type arguments to `const fn`s to stabilise, and will be deprecated /// once this happens. pub const fn new_const() -> Self { Self { data: MaybeUninit::new(InlineString::new()), mode: PhantomData, } } } impl SmartString { /// Construct an empty string. #[inline(always)] pub fn new() -> Self { Self::from_inline(InlineString::new()) } fn from_boxed(boxed: BoxedString) -> Self { let mut out = Self { data: MaybeUninit::uninit(), mode: PhantomData, }; let data_ptr: *mut BoxedString = out.data.as_mut_ptr().cast(); #[allow(unsafe_code)] unsafe { data_ptr.write(boxed) }; out } fn from_inline(inline: InlineString) -> Self { Self { data: MaybeUninit::new(inline), mode: PhantomData, } } fn discriminant(&self) -> Discriminant { // unsafe { self.data.assume_init() }.marker.discriminant() let str_ptr: *const BoxedString = self.data.as_ptr().cast() as *const _ as *const BoxedString; #[allow(unsafe_code)] Discriminant::from_bit(BoxedString::check_alignment(unsafe { &*str_ptr })) } fn cast(&self) -> StringCast<'_> { #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCast::Inline(unsafe { &*self.data.as_ptr() }), Discriminant::Boxed => StringCast::Boxed(unsafe { &*self.data.as_ptr().cast() }), } } fn cast_mut(&mut self) -> StringCastMut<'_> { #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCastMut::Inline(unsafe { &mut *self.data.as_mut_ptr() }), Discriminant::Boxed => { StringCastMut::Boxed(unsafe { &mut *self.data.as_mut_ptr().cast() }) } } } fn cast_into(mut self) -> StringCastInto { #[allow(unsafe_code)] match self.discriminant() { Discriminant::Inline => StringCastInto::Inline(unsafe { self.data.assume_init() }), Discriminant::Boxed => StringCastInto::Boxed(unsafe { let boxed_ptr: *mut BoxedString = self.data.as_mut_ptr().cast(); let string = boxed_ptr.read(); forget(self); string }), } } fn promote_from(&mut self, string: BoxedString) { debug_assert!(self.discriminant() == Discriminant::Inline); let data: *mut BoxedString = self.data.as_mut_ptr().cast(); #[allow(unsafe_code)] unsafe { data.write(string) }; } /// Attempt to inline the string if it's currently heap allocated. /// /// Returns the resulting state: `true` if it's inlined, `false` if it's not. fn try_demote(&mut self) -> bool { if Mode::DEALLOC { self.really_try_demote() } else { false } } /// Attempt to inline the string regardless of whether `Mode::DEALLOC` is set. fn really_try_demote(&mut self) -> bool { if let StringCastMut::Boxed(string) = self.cast_mut() { if string.len() > MAX_INLINE { false } else { let s: &str = string.deref(); let inlined = s.into(); #[allow(unsafe_code)] unsafe { drop_in_place(string); self.data.as_mut_ptr().write(inlined); } true } } else { true } } /// Return the length in bytes of the string. /// /// Note that this may differ from the length in `char`s. pub fn len(&self) -> usize { match self.cast() { StringCast::Boxed(string) => string.len(), StringCast::Inline(string) => string.len(), } } /// Test whether the string is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Test whether the string is currently inlined. pub fn is_inline(&self) -> bool { self.discriminant() == Discriminant::Inline } /// Get a reference to the string as a string slice. pub fn as_str(&self) -> &str { self.deref() } /// Get a reference to the string as a mutable string slice. pub fn as_mut_str(&mut self) -> &mut str { self.deref_mut() } /// Return the currently allocated capacity of the string. /// /// Note that if this is a boxed string, it returns [`String::capacity()`][String::capacity], /// but an inline string always returns [`MAX_INLINE`]. /// /// Note also that if a boxed string is converted into an inline string, its capacity is /// deallocated, and if the inline string is promoted to a boxed string in the future, /// it will be reallocated with a default capacity. pub fn capacity(&self) -> usize { if let StringCast::Boxed(string) = self.cast() { string.capacity() } else { MAX_INLINE } } /// Push a character to the end of the string. pub fn push(&mut self, ch: char) { string_op_grow!(ops::Push, self, ch) } /// Copy a string slice onto the end of the string. pub fn push_str(&mut self, string: &str) { string_op_grow!(ops::PushStr, self, string) } /// Shrink the capacity of the string to fit its contents exactly. /// /// This has no effect on inline strings, which always have a fixed capacity. /// Thus, it's not safe to assume that [`capacity()`][SmartString::capacity] will /// equal [`len()`][SmartString::len] after calling this. /// /// Calling this on a [`LazyCompact`] string that is currently /// heap allocated but is short enough to be inlined will deallocate the /// heap allocation and convert it to an inline string. pub fn shrink_to_fit(&mut self) { if let StringCastMut::Boxed(string) = self.cast_mut() { if string.len() > MAX_INLINE { string.shrink_to_fit(); } } self.really_try_demote(); } /// Truncate the string to `new_len` bytes. /// /// If `new_len` is larger than the string's current length, this does nothing. /// If `new_len` isn't on a UTF-8 character boundary, this method panics. pub fn truncate(&mut self, new_len: usize) { string_op_shrink!(ops::Truncate, self, new_len) } /// Pop a `char` off the end of the string. pub fn pop(&mut self) -> Option { string_op_shrink!(ops::Pop, self) } /// Remove a `char` from the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn remove(&mut self, index: usize) -> char { string_op_shrink!(ops::Remove, self, index) } /// Insert a `char` into the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn insert(&mut self, index: usize, ch: char) { string_op_grow!(ops::Insert, self, index, ch) } /// Insert a string slice into the string at the given index. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn insert_str(&mut self, index: usize, string: &str) { string_op_grow!(ops::InsertStr, self, index, string) } /// Split the string into two at the given index. /// /// Returns the content to the right of the index as a new string, and removes /// it from the original. /// /// If the index doesn't fall on a UTF-8 character boundary, this method panics. pub fn split_off(&mut self, index: usize) -> Self { string_op_shrink!(ops::SplitOff, self, index) } /// Clear the string. /// /// This causes any memory reserved by the string to be immediately deallocated. pub fn clear(&mut self) { *self = Self::new(); } /// Filter out `char`s not matching a predicate. pub fn retain(&mut self, f: F) where F: FnMut(char) -> bool, { string_op_shrink!(ops::Retain, self, f) } /// Construct a draining iterator over a given range. /// /// This removes the given range from the string, and returns an iterator over the /// removed `char`s. pub fn drain(&mut self, range: R) -> Drain<'_, Mode> where R: RangeBounds, { Drain::new(self, range) } /// Replaces a range with the contents of a string slice. pub fn replace_range(&mut self, range: R, replace_with: &str) where R: RangeBounds, { string_op_grow!(ops::ReplaceRange, self, &range, replace_with); self.try_demote(); } } impl Default for SmartString { fn default() -> Self { Self::new() } } impl AsRef for SmartString { fn as_ref(&self) -> &str { self.deref() } } impl AsMut for SmartString { fn as_mut(&mut self) -> &mut str { self.deref_mut() } } impl AsRef<[u8]> for SmartString { fn as_ref(&self) -> &[u8] { self.deref().as_bytes() } } impl Borrow for SmartString { fn borrow(&self) -> &str { self.deref() } } impl BorrowMut for SmartString { fn borrow_mut(&mut self) -> &mut str { self.deref_mut() } } impl Index> for SmartString { type Output = str; fn index(&self, index: Range) -> &Self::Output { &self.deref()[index] } } impl Index> for SmartString { type Output = str; fn index(&self, index: RangeTo) -> &Self::Output { &self.deref()[index] } } impl Index> for SmartString { type Output = str; fn index(&self, index: RangeFrom) -> &Self::Output { &self.deref()[index] } } impl Index for SmartString { type Output = str; fn index(&self, _index: RangeFull) -> &Self::Output { self.deref() } } impl Index> for SmartString { type Output = str; fn index(&self, index: RangeInclusive) -> &Self::Output { &self.deref()[index] } } impl Index> for SmartString { type Output = str; fn index(&self, index: RangeToInclusive) -> &Self::Output { &self.deref()[index] } } impl IndexMut> for SmartString { fn index_mut(&mut self, index: Range) -> &mut Self::Output { &mut self.deref_mut()[index] } } impl IndexMut> for SmartString { fn index_mut(&mut self, index: RangeTo) -> &mut Self::Output { &mut self.deref_mut()[index] } } impl IndexMut> for SmartString { fn index_mut(&mut self, index: RangeFrom) -> &mut Self::Output { &mut self.deref_mut()[index] } } impl IndexMut for SmartString { fn index_mut(&mut self, _index: RangeFull) -> &mut Self::Output { self.deref_mut() } } impl IndexMut> for SmartString { fn index_mut(&mut self, index: RangeInclusive) -> &mut Self::Output { &mut self.deref_mut()[index] } } impl IndexMut> for SmartString { fn index_mut(&mut self, index: RangeToInclusive) -> &mut Self::Output { &mut self.deref_mut()[index] } } impl From<&'_ str> for SmartString { fn from(string: &'_ str) -> Self { if string.len() > MAX_INLINE { Self::from_boxed(string.to_string().into()) } else { Self::from_inline(string.into()) } } } impl From<&'_ mut str> for SmartString { fn from(string: &'_ mut str) -> Self { if string.len() > MAX_INLINE { Self::from_boxed(string.to_string().into()) } else { Self::from_inline(string.deref().into()) } } } impl From<&'_ String> for SmartString { fn from(string: &'_ String) -> Self { if string.len() > MAX_INLINE { Self::from_boxed(string.clone().into()) } else { Self::from_inline(string.deref().into()) } } } impl From for SmartString { fn from(string: String) -> Self { if string.len() > MAX_INLINE { Self::from_boxed(string.into()) } else { Self::from_inline(string.deref().into()) } } } impl From> for SmartString { fn from(string: Box) -> Self { if string.len() > MAX_INLINE { String::from(string).into() } else { Self::from(&*string) } } } #[cfg(feature = "std")] impl From> for SmartString { fn from(string: Cow<'_, str>) -> Self { if string.len() > MAX_INLINE { String::from(string).into() } else { Self::from(&*string) } } } impl<'a, Mode: SmartStringMode> Extend<&'a str> for SmartString { fn extend>(&mut self, iter: I) { for item in iter { self.push_str(item); } } } impl<'a, Mode: SmartStringMode> Extend<&'a char> for SmartString { fn extend>(&mut self, iter: I) { for item in iter { self.push(*item); } } } impl Extend for SmartString { fn extend>(&mut self, iter: I) { for item in iter { self.push(item); } } } impl Extend> for SmartString { fn extend>>(&mut self, iter: I) { for item in iter { self.push_str(&item); } } } impl Extend for SmartString { fn extend>(&mut self, iter: I) { for item in iter { self.push_str(&item); } } } impl<'a, Mode: SmartStringMode + 'a> Extend<&'a SmartString> for SmartString { fn extend>>(&mut self, iter: I) { for item in iter { self.push_str(item); } } } impl<'a, Mode: SmartStringMode> Extend<&'a String> for SmartString { fn extend>(&mut self, iter: I) { for item in iter { self.push_str(item); } } } impl Add for SmartString { type Output = Self; fn add(mut self, rhs: Self) -> Self::Output { self.push_str(&rhs); self } } impl Add<&'_ Self> for SmartString { type Output = Self; fn add(mut self, rhs: &'_ Self) -> Self::Output { self.push_str(rhs); self } } impl Add<&'_ str> for SmartString { type Output = Self; fn add(mut self, rhs: &'_ str) -> Self::Output { self.push_str(rhs); self } } impl Add<&'_ String> for SmartString { type Output = Self; fn add(mut self, rhs: &'_ String) -> Self::Output { self.push_str(rhs); self } } impl Add for SmartString { type Output = Self; fn add(mut self, rhs: String) -> Self::Output { self.push_str(&rhs); self } } impl Add> for String { type Output = Self; fn add(mut self, rhs: SmartString) -> Self::Output { self.push_str(&rhs); self } } impl FromIterator for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); out.extend(iter.into_iter()); out } } impl FromIterator for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); out.extend(iter.into_iter()); out } } impl<'a, Mode: SmartStringMode + 'a> FromIterator<&'a Self> for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); out.extend(iter.into_iter()); out } } impl<'a, Mode: SmartStringMode> FromIterator<&'a str> for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); out.extend(iter.into_iter()); out } } impl<'a, Mode: SmartStringMode> FromIterator<&'a String> for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); out.extend(iter.into_iter()); out } } impl FromIterator for SmartString { fn from_iter>(iter: I) -> Self { let mut out = Self::new(); for ch in iter { out.push(ch); } out } } impl FromStr for SmartString { type Err = Infallible; fn from_str(s: &str) -> Result { Ok(Self::from(s)) } } impl From> for String { /// Unwrap a boxed [`String`][String], or copy an inline string into a new [`String`][String]. /// /// [String]: https://doc.rust-lang.org/std/string/struct.String.html fn from(s: SmartString) -> Self { match s.cast_into() { StringCastInto::Boxed(string) => string.into(), StringCastInto::Inline(string) => string.to_string(), } } } impl PartialEq for SmartString { fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq<&'_ str> for SmartString { fn eq(&self, other: &&str) -> bool { self.as_str() == *other } } impl PartialEq> for &'_ str { fn eq(&self, other: &SmartString) -> bool { other.eq(*self) } } impl PartialEq> for str { fn eq(&self, other: &SmartString) -> bool { other.eq(self) } } impl PartialEq for SmartString { fn eq(&self, other: &String) -> bool { self.eq(other.as_str()) } } impl PartialEq> for String { fn eq(&self, other: &SmartString) -> bool { other.eq(self.as_str()) } } impl PartialEq for SmartString { fn eq(&self, other: &Self) -> bool { self.as_str() == other.as_str() } } impl Eq for SmartString {} impl PartialOrd for SmartString { fn partial_cmp(&self, other: &str) -> Option { self.as_str().partial_cmp(other) } } impl PartialOrd for SmartString { fn partial_cmp(&self, other: &Self) -> Option { self.partial_cmp(other.as_str()) } } impl Ord for SmartString { fn cmp(&self, other: &Self) -> Ordering { self.as_str().cmp(other.as_str()) } } impl Hash for SmartString { fn hash(&self, state: &mut H) { self.as_str().hash(state) } } impl Debug for SmartString { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { Debug::fmt(self.as_str(), f) } } impl Display for SmartString { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { Display::fmt(self.as_str(), f) } } impl Write for SmartString { fn write_str(&mut self, string: &str) -> Result<(), Error> { self.push_str(string); Ok(()) } } #[cfg(any(test, feature = "test"))] #[allow(missing_docs)] pub mod test; smartstring-1.0.1/src/marker_byte.rs000064400000000000000000000030510072674642500156700ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub(crate) enum Discriminant { Boxed, Inline, } impl Discriminant { #[inline(always)] pub(crate) const fn from_bit(bit: bool) -> Self { if bit { Self::Inline } else { Self::Boxed } } #[inline(always)] const fn bit(self) -> u8 { match self { Self::Boxed => 0, Self::Inline => 1, } } } #[derive(Clone, Copy, Debug)] pub(crate) struct Marker(u8); impl Marker { #[inline(always)] const fn assemble(discriminant: Discriminant, data: u8) -> u8 { data << 1 | discriminant.bit() } #[inline(always)] pub(crate) const fn empty() -> Self { Self(Self::assemble(Discriminant::Inline, 0)) } #[inline(always)] pub(crate) const fn new_inline(data: u8) -> Self { debug_assert!(data < 0x80); Self(Self::assemble(Discriminant::Inline, data)) } #[inline(always)] pub(crate) const fn discriminant(self) -> Discriminant { Discriminant::from_bit(self.0 & 0x01 != 0) } #[inline(always)] pub(crate) const fn data(self) -> u8 { self.0 >> 1 } #[inline(always)] pub(crate) fn set_data(&mut self, byte: u8) { debug_assert!(byte < 0x80); self.0 = Self::assemble(self.discriminant(), byte); } } smartstring-1.0.1/src/ops.rs000064400000000000000000000201650072674642500141720ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. //! Generic string ops. //! //! `string_op_grow` is for ops which may grow but not shrink the target //! string, and should have a `cap` method which will return the new //! minimum required capacity. //! //! `string_op_shrink` is for ops which may shrinl but not grow the target //! string. They don't need a `cap` method, and will try to demote the //! string as appropriate after calling `op`. use core::{ marker::PhantomData, ops::{Bound, Deref, DerefMut, RangeBounds}, }; pub(crate) trait GenericString: Deref + DerefMut { fn set_size(&mut self, size: usize); fn as_mut_capacity_slice(&mut self) -> &mut [u8]; } macro_rules! string_op_grow { ($action:ty, $target:ident, $($arg:expr),*) => { match $target.cast_mut() { StringCastMut::Boxed(this) => { this.ensure_capacity(<$action>::cap(this, $($arg),*)); <$action>::op(this, $($arg),*) } StringCastMut::Inline(this) => { let new_size = <$action>::cap(this,$($arg),*); if new_size > MAX_INLINE { let mut new_str = BoxedString::from_str(new_size, this); let result = <$action>::op(&mut new_str, $($arg),*); $target.promote_from(new_str); result } else { <$action>::op(this, $($arg),*) } } } }; } pub(crate) use string_op_grow; macro_rules! string_op_shrink { ($action:ty, $target:ident, $($arg:expr),*) => {{ let result = match $target.cast_mut() { StringCastMut::Boxed(this) => { <$action>::op(this, $($arg),*) } StringCastMut::Inline(this) => { <$action>::op(this, $($arg),*) } }; $target.try_demote(); result }}; ($action:ty, $target:ident) => { string_op_shrink!($action, $target,) } } pub(crate) use string_op_shrink; use crate::{SmartString, SmartStringMode}; pub(crate) fn bounds_for(range: &R, max_len: usize) -> (usize, usize) where R: RangeBounds, { let start = match range.start_bound() { Bound::Included(&n) => n, Bound::Excluded(&n) => n.checked_add(1).unwrap(), Bound::Unbounded => 0, }; let end = match range.end_bound() { Bound::Included(&n) => n.checked_add(1).unwrap(), Bound::Excluded(&n) => n, Bound::Unbounded => max_len, }; (start, end) } fn insert_bytes(this: &mut S, index: usize, src: &[u8]) { let len = this.len(); let src_len = src.len(); let tail_index = index + src_len; if src_len > 0 { let buf = this.as_mut_capacity_slice(); buf.copy_within(index..len, tail_index); buf[index..tail_index].copy_from_slice(src); this.set_size(len + src_len); } } pub(crate) struct PushStr; impl PushStr { pub(crate) fn cap(this: &S, string: &str) -> usize { this.len() + string.len() } pub(crate) fn op(this: &mut S, string: &str) { let len = this.len(); let new_len = len + string.len(); this.as_mut_capacity_slice()[len..new_len].copy_from_slice(string.as_bytes()); this.set_size(new_len); } } pub(crate) struct Push; impl Push { pub(crate) fn cap(this: &S, ch: char) -> usize { this.len() + ch.len_utf8() } pub(crate) fn op(this: &mut S, ch: char) { let len = this.len(); let written = ch .encode_utf8(&mut this.as_mut_capacity_slice()[len..]) .len(); this.set_size(len + written); } } pub(crate) struct Truncate; impl Truncate { pub(crate) fn op(this: &mut S, new_len: usize) { if new_len < this.len() { assert!(this.deref().is_char_boundary(new_len)); this.set_size(new_len) } } } pub(crate) struct Pop; impl Pop { pub(crate) fn op(this: &mut S) -> Option { let ch = this.deref().chars().rev().next()?; this.set_size(this.len() - ch.len_utf8()); Some(ch) } } pub(crate) struct Remove; impl Remove { pub(crate) fn op(this: &mut S, index: usize) -> char { let ch = match this.deref()[index..].chars().next() { Some(ch) => ch, None => panic!("cannot remove a char from the end of a string"), }; let next = index + ch.len_utf8(); let len = this.len(); let tail_len = len - next; if tail_len > 0 { this.as_mut_capacity_slice().copy_within(next..len, index); } this.set_size(len - (next - index)); ch } } pub(crate) struct Insert; impl Insert { pub(crate) fn cap(this: &S, index: usize, ch: char) -> usize { assert!(this.deref().is_char_boundary(index)); this.len() + ch.len_utf8() } pub(crate) fn op(this: &mut S, index: usize, ch: char) { let mut buffer = [0; 4]; let buffer = ch.encode_utf8(&mut buffer).as_bytes(); insert_bytes(this, index, buffer); } } pub(crate) struct InsertStr; impl InsertStr { pub(crate) fn cap(this: &S, index: usize, string: &str) -> usize { assert!(this.deref().is_char_boundary(index)); this.len() + string.len() } pub(crate) fn op(this: &mut S, index: usize, string: &str) { insert_bytes(this, index, string.as_bytes()); } } pub(crate) struct SplitOff(PhantomData); impl SplitOff { pub(crate) fn op(this: &mut S, index: usize) -> SmartString { assert!(this.deref().is_char_boundary(index)); let result = this.deref()[index..].into(); this.set_size(index); result } } pub(crate) struct Retain; impl Retain { pub(crate) fn op(this: &mut S, mut f: F) where F: FnMut(char) -> bool, S: GenericString, { let len = this.len(); let mut del_bytes = 0; let mut index = 0; while index < len { let ch = this .deref_mut() .get(index..len) .unwrap() .chars() .next() .unwrap(); let ch_len = ch.len_utf8(); if !f(ch) { del_bytes += ch_len; } else if del_bytes > 0 { this.as_mut_capacity_slice() .copy_within(index..index + ch_len, index - del_bytes); } index += ch_len; } if del_bytes > 0 { this.set_size(len - del_bytes); } } } pub(crate) struct ReplaceRange; impl ReplaceRange { pub(crate) fn cap(this: &S, range: &R, replace_with: &str) -> usize where R: RangeBounds, S: GenericString, { let len = this.len(); let (start, end) = bounds_for(range, len); assert!(end >= start); assert!(end <= len); assert!(this.deref().is_char_boundary(start)); assert!(this.deref().is_char_boundary(end)); let replace_len = replace_with.len(); let end_size = len - end; start + replace_len + end_size } pub(crate) fn op(this: &mut S, range: &R, replace_with: &str) where R: RangeBounds, S: GenericString, { let len = this.len(); let (start, end) = bounds_for(range, len); let replace_len = replace_with.len(); let new_end = start + replace_len; let end_size = len - end; this.as_mut_capacity_slice().copy_within(end..len, new_end); if replace_len > 0 { this.as_mut_capacity_slice()[start..new_end].copy_from_slice(replace_with.as_bytes()); } this.set_size(start + replace_len + end_size); } } smartstring-1.0.1/src/proptest.rs000064400000000000000000000017120072674642500152460ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. //! `proptest` strategies (requires the `proptest` feature flag). use crate::{SmartString, SmartStringMode}; use proptest::proptest; use proptest::strategy::{BoxedStrategy, Strategy}; use proptest::string::Error; /// Creates a strategy which generates [`SmartString`][SmartString]s matching the given regular expression. /// /// [SmartString]: ../struct.SmartString.html pub fn string_regex( regex: &str, ) -> Result>, Error> where Mode: 'static, { proptest::string::string_regex(regex).map(|g| g.prop_map(SmartString::from).boxed()) } proptest! { #[test] fn strategy(string in string_regex(".+").unwrap()) { assert!(!SmartString::::is_empty(&string)); } } smartstring-1.0.1/src/serde.rs000064400000000000000000000036710072674642500144760ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{SmartString, SmartStringMode}; use alloc::string::String; use core::{fmt, marker::PhantomData}; use serde::{ de::{Error, Visitor}, Deserialize, Deserializer, Serialize, Serializer, }; impl Serialize for SmartString { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(self) } } impl<'de, T: SmartStringMode> Deserialize<'de> for SmartString { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer .deserialize_string(SmartStringVisitor(PhantomData)) .map(SmartString::from) } } struct SmartStringVisitor(PhantomData<*const T>); impl<'de, T: SmartStringMode> Visitor<'de> for SmartStringVisitor { type Value = SmartString; fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { formatter.write_str("a string") } fn visit_string(self, v: String) -> Result where E: Error, { Ok(SmartString::from(v)) } fn visit_str(self, v: &str) -> Result where E: Error, { Ok(SmartString::from(v)) } } #[cfg(test)] mod test { use super::*; use crate::Compact; #[test] fn test_ser_de() { use serde_test::{assert_tokens, Token}; let strings = [ "", "small test", "longer than inline string for serde testing", ]; for &string in strings.iter() { let value = SmartString::::from(string); assert_tokens(&value, &[Token::String(string)]); } } } smartstring-1.0.1/src/test.rs000064400000000000000000000526320072674642500143540ustar 00000000000000// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. use crate::{config::MAX_INLINE, SmartString, SmartStringMode}; use std::{ cmp::Ordering, fmt::Debug, iter::FromIterator, ops::{Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive}, panic::{catch_unwind, set_hook, take_hook, AssertUnwindSafe}, }; #[cfg(not(test))] use arbitrary::Arbitrary; #[cfg(test)] use proptest::proptest; #[cfg(test)] use proptest_derive::Arbitrary; pub fn assert_panic(f: F) where F: FnOnce() -> A, { let old_hook = take_hook(); set_hook(Box::new(|_| {})); let result = catch_unwind(AssertUnwindSafe(f)); set_hook(old_hook); assert!( result.is_err(), "action that should have panicked didn't panic" ); } #[derive(Arbitrary, Debug, Clone)] pub enum Constructor { New, FromString(String), FromStringSlice(String), FromChars(Vec), } impl Constructor { pub fn construct(self) -> (String, SmartString) { match self { Self::New => (String::new(), SmartString::new()), Self::FromString(string) => (string.clone(), SmartString::from(string)), Self::FromStringSlice(string) => (string.clone(), SmartString::from(string.as_str())), Self::FromChars(chars) => ( String::from_iter(chars.clone()), SmartString::from_iter(chars), ), } } } #[derive(Arbitrary, Debug, Clone)] pub enum TestBounds { Range(usize, usize), From(usize), To(usize), Full, Inclusive(usize, usize), ToInclusive(usize), } impl TestBounds { fn should_panic(&self, control: &str) -> bool { let len = control.len(); match self { Self::Range(start, end) if start > end || start > &len || end > &len || !control.is_char_boundary(*start) || !control.is_char_boundary(*end) => { true } Self::From(start) if start > &len || !control.is_char_boundary(*start) => true, Self::To(end) if end > &len || !control.is_char_boundary(*end) => true, Self::Inclusive(start, end) if *end == usize::max_value() || *start > (end + 1) || start > &len || end > &len || !control.is_char_boundary(*start) || !control.is_char_boundary(*end + 1) => { true } Self::ToInclusive(end) if end > &len || !control.is_char_boundary(*end + 1) => true, _ => false, } } fn assert_range(&self, control: &A, subject: &B) where A: Index>, B: Index>, A: Index>, B: Index>, A: Index>, B: Index>, A: Index, B: Index, A: Index>, B: Index>, A: Index>, B: Index>, >>::Output: PartialEq<>>::Output> + Debug, >>::Output: Debug, >>::Output: PartialEq<>>::Output> + Debug, >>::Output: Debug, >>::Output: PartialEq<>>::Output> + Debug, >>::Output: Debug, >::Output: PartialEq<>::Output> + Debug, >::Output: Debug, >>::Output: PartialEq<>>::Output> + Debug, >>::Output: Debug, >>::Output: PartialEq<>>::Output> + Debug, >>::Output: Debug, { match self { Self::Range(start, end) => assert_eq!(control[*start..*end], subject[*start..*end]), Self::From(start) => assert_eq!(control[*start..], subject[*start..]), Self::To(end) => assert_eq!(control[..*end], subject[..*end]), Self::Full => assert_eq!(control[..], subject[..]), Self::Inclusive(start, end) => { assert_eq!(control[*start..=*end], subject[*start..=*end]) } Self::ToInclusive(end) => assert_eq!(control[..=*end], subject[..=*end]), } } } #[derive(Arbitrary, Debug, Clone)] pub enum Action { Slice(TestBounds), Push(char), PushStr(String), Truncate(usize), Pop, Remove(usize), Insert(usize, char), InsertStr(usize, String), SplitOff(usize), Clear, IntoString, Retain(String), Drain(TestBounds), ReplaceRange(TestBounds, String), } impl Action { pub fn perform( self, control: &mut String, subject: &mut SmartString, ) { match self { Self::Slice(range) => { if range.should_panic(control) { assert_panic(|| range.assert_range(control, subject)) } else { range.assert_range(control, subject); } } Self::Push(ch) => { control.push(ch); subject.push(ch); } Self::PushStr(ref string) => { control.push_str(string); subject.push_str(string); } Self::Truncate(index) => { if index <= control.len() && !control.is_char_boundary(index) { assert_panic(|| control.truncate(index)); assert_panic(|| subject.truncate(index)); } else { control.truncate(index); subject.truncate(index); } } Self::Pop => { assert_eq!(control.pop(), subject.pop()); } Self::Remove(index) => { if index >= control.len() || !control.is_char_boundary(index) { assert_panic(|| control.remove(index)); assert_panic(|| subject.remove(index)); } else { assert_eq!(control.remove(index), subject.remove(index)); } } Self::Insert(index, ch) => { if index > control.len() || !control.is_char_boundary(index) { assert_panic(|| control.insert(index, ch)); assert_panic(|| subject.insert(index, ch)); } else { control.insert(index, ch); subject.insert(index, ch); } } Self::InsertStr(index, ref string) => { if index > control.len() || !control.is_char_boundary(index) { assert_panic(|| control.insert_str(index, string)); assert_panic(|| subject.insert_str(index, string)); } else { control.insert_str(index, string); subject.insert_str(index, string); } } Self::SplitOff(index) => { if !control.is_char_boundary(index) { assert_panic(|| control.split_off(index)); assert_panic(|| subject.split_off(index)); } else { assert_eq!(control.split_off(index), subject.split_off(index)); } } Self::Clear => { control.clear(); subject.clear(); } Self::IntoString => { assert_eq!(control, &Into::::into(subject.clone())); } Self::Retain(filter) => { let f = |ch| filter.contains(ch); control.retain(f); subject.retain(f); } Self::Drain(range) => { // FIXME: ignoring inclusive bounds at usize::max_value(), pending https://github.com/rust-lang/rust/issues/72237 match range { TestBounds::Inclusive(_, end) if end == usize::max_value() => return, TestBounds::ToInclusive(end) if end == usize::max_value() => return, _ => {} } if range.should_panic(control) { assert_panic(|| match range { TestBounds::Range(start, end) => { (control.drain(start..end), subject.drain(start..end)) } TestBounds::From(start) => (control.drain(start..), subject.drain(start..)), TestBounds::To(end) => (control.drain(..end), subject.drain(..end)), TestBounds::Full => (control.drain(..), subject.drain(..)), TestBounds::Inclusive(start, end) => { (control.drain(start..=end), subject.drain(start..=end)) } TestBounds::ToInclusive(end) => { (control.drain(..=end), subject.drain(..=end)) } }) } else { let (control_iter, subject_iter) = match range { TestBounds::Range(start, end) => { (control.drain(start..end), subject.drain(start..end)) } TestBounds::From(start) => (control.drain(start..), subject.drain(start..)), TestBounds::To(end) => (control.drain(..end), subject.drain(..end)), TestBounds::Full => (control.drain(..), subject.drain(..)), TestBounds::Inclusive(start, end) => { (control.drain(start..=end), subject.drain(start..=end)) } TestBounds::ToInclusive(end) => { (control.drain(..=end), subject.drain(..=end)) } }; let control_result: String = control_iter.collect(); let subject_result: String = subject_iter.collect(); assert_eq!(control_result, subject_result); } } Self::ReplaceRange(range, string) => { // FIXME: ignoring inclusive bounds at usize::max_value(), pending https://github.com/rust-lang/rust/issues/72237 match range { TestBounds::Inclusive(_, end) if end == usize::max_value() => return, TestBounds::ToInclusive(end) if end == usize::max_value() => return, _ => {} } if range.should_panic(control) { assert_panic(|| match range { TestBounds::Range(start, end) => { control.replace_range(start..end, &string); subject.replace_range(start..end, &string); } TestBounds::From(start) => { control.replace_range(start.., &string); subject.replace_range(start.., &string); } TestBounds::To(end) => { control.replace_range(..end, &string); subject.replace_range(..end, &string); } TestBounds::Full => { control.replace_range(.., &string); subject.replace_range(.., &string); } TestBounds::Inclusive(start, end) => { control.replace_range(start..=end, &string); subject.replace_range(start..=end, &string); } TestBounds::ToInclusive(end) => { control.replace_range(..=end, &string); subject.replace_range(..=end, &string); } }) } else { match range { TestBounds::Range(start, end) => { control.replace_range(start..end, &string); subject.replace_range(start..end, &string); } TestBounds::From(start) => { control.replace_range(start.., &string); subject.replace_range(start.., &string); } TestBounds::To(end) => { control.replace_range(..end, &string); subject.replace_range(..end, &string); } TestBounds::Full => { control.replace_range(.., &string); subject.replace_range(.., &string); } TestBounds::Inclusive(start, end) => { control.replace_range(start..=end, &string); subject.replace_range(start..=end, &string); } TestBounds::ToInclusive(end) => { control.replace_range(..=end, &string); subject.replace_range(..=end, &string); } } } } } } } fn assert_invariants(control: &str, subject: &SmartString) { assert_eq!(control.len(), subject.len()); assert_eq!(control, subject.as_str()); if Mode::DEALLOC { assert_eq!( subject.is_inline(), subject.len() <= MAX_INLINE, "len {} should be inline (MAX_INLINE = {}) but was boxed", subject.len(), MAX_INLINE ); } assert_eq!( control.partial_cmp("ordering test"), subject.partial_cmp("ordering test") ); let control_smart: SmartString = control.into(); assert_eq!(Ordering::Equal, subject.cmp(&control_smart)); } pub fn test_everything(constructor: Constructor, actions: Vec) { let (mut control, mut subject): (_, SmartString) = constructor.construct(); assert_invariants(&control, &subject); for action in actions { action.perform(&mut control, &mut subject); assert_invariants(&control, &subject); } } pub fn test_ordering(left: String, right: String) { let smart_left = SmartString::::from(&left); let smart_right = SmartString::::from(&right); assert_eq!(left.cmp(&right), smart_left.cmp(&smart_right)); } #[cfg(test)] mod tests { use super::{Action::*, Constructor::*, TestBounds::*, *}; use crate::{Compact, LazyCompact}; proptest! { #[test] fn proptest_everything_compact(constructor: Constructor, actions: Vec) { test_everything::(constructor, actions); } #[test] fn proptest_everything_lazycompact(constructor: Constructor, actions: Vec) { test_everything::(constructor, actions); } #[test] fn proptest_ordering_compact(left: String, right: String) { test_ordering::(left,right) } #[test] fn proptest_ordering_lazycompact(left: String, right: String) { test_ordering::(left,right) } #[test] fn proptest_eq(left: String, right: String) { fn test_eq(left: &str, right: &str) { let smart_left = SmartString::::from(left); let smart_right = SmartString::::from(right); assert_eq!(smart_left, left); assert_eq!(smart_left, *left); assert_eq!(smart_left, left.to_string()); assert_eq!(smart_left == smart_right, left == right); assert_eq!(left, smart_left); assert_eq!(*left, smart_left); assert_eq!(left.to_string(), smart_left); } test_eq::(&left, &right); test_eq::(&left, &right); } } #[test] fn must_panic_on_insert_outside_char_boundary() { test_everything::( Constructor::FromString("a0 A୦a\u{2de0}0 🌀Aa".to_string()), vec![ Action::Push(' '), Action::Push('¡'), Action::Pop, Action::Pop, Action::Push('¡'), Action::Pop, Action::Push('𐀀'), Action::Push('\u{e000}'), Action::Pop, Action::Insert(14, 'A'), ], ); } #[test] fn must_panic_on_out_of_bounds_range() { test_everything::( Constructor::New, vec![Action::Slice(TestBounds::Range(0, usize::MAX - 1))], ); } #[test] fn must_not_promote_before_insert_succeeds() { test_everything::( Constructor::FromString("ኲΣ A𑒀a ®Σ a0🠀 aA®A".to_string()), vec![Action::Insert(21, ' ')], ); } #[test] fn must_panic_on_slice_outside_char_boundary() { test_everything::( Constructor::New, vec![Action::Push('Ь'), Action::Slice(TestBounds::ToInclusive(0))], ) } #[test] fn dont_panic_when_inserting_a_string_at_exactly_inline_capacity() { let string: String = (0..MAX_INLINE).map(|_| '\u{0}').collect(); test_everything::(Constructor::New, vec![Action::InsertStr(0, string)]) } #[test] #[should_panic] fn drain_bounds_integer_overflow_must_panic() { let mut string = SmartString::::from("מ"); string.drain(..=usize::max_value()); } #[test] fn shouldnt_panic_on_inclusive_range_end_one_less_than_start() { test_everything::( Constructor::FromString("\'\'\'\'\'[[[[[[[[[[[-[[[[[[[[[[[[[[[[[[[[[[".to_string()), vec![Action::Slice(TestBounds::Inclusive(1, 0))], ) } #[test] fn drain_over_inline_boundary() { test_everything::( FromString((0..24).map(|_| 'x').collect()), vec![Drain(Range(0, 1))], ) } #[test] fn drain_wrapped_shouldnt_drop_twice() { test_everything::( FromString((0..25).map(|_| 'x').collect()), vec![Drain(Range(0, 1))], ) } #[test] fn fail() { let value = "fo\u{0}\u{0}\u{0}\u{8}\u{0}\u{0}\u{0}\u{0}____bbbbb_____bbbbbbbbb"; let mut control = String::from(value); let mut string = SmartString::::from(value); control.drain(..=0); string.drain(..=0); let control_smart: SmartString = control.into(); assert_eq!(control_smart, string); assert_eq!(Ordering::Equal, string.cmp(&control_smart)); } #[test] fn dont_panic_on_removing_last_index_from_an_inline_string() { let mut s = SmartString::::from("\u{323}\u{323}\u{323}ω\u{323}\u{323}\u{323}㌣\u{e323}㤘"); s.remove(20); } #[test] fn check_alignment() { use crate::boxed::BoxedString; use crate::inline::InlineString; use crate::marker_byte::Discriminant; let inline = InlineString::new(); let inline_ptr: *const InlineString = &inline; let boxed_ptr: *const BoxedString = inline_ptr.cast(); #[allow(unsafe_code)] let discriminant = Discriminant::from_bit(BoxedString::check_alignment(unsafe { &*boxed_ptr })); assert_eq!(Discriminant::Inline, discriminant); let boxed = BoxedString::from_str(32, "welp"); let discriminant = Discriminant::from_bit(BoxedString::check_alignment(&boxed)); assert_eq!(Discriminant::Boxed, discriminant); let mut s = SmartString::::new(); assert_eq!(Discriminant::Inline, s.discriminant()); let big_str = "1234567890123456789012345678901234567890"; assert!(big_str.len() > MAX_INLINE); s.push_str(big_str); assert_eq!(Discriminant::Boxed, s.discriminant()); s.clear(); assert_eq!(Discriminant::Inline, s.discriminant()); } #[test] fn from_string() { let std_s = String::from("I am a teapot short and stout; here is my handle, here is my snout"); let smart_s: SmartString = std_s.clone().into(); assert_eq!(std_s, smart_s); let unsmart_s: String = smart_s.clone().into(); assert_eq!(smart_s, unsmart_s); assert_eq!(std_s, unsmart_s); // This test exists just to provoke a Miri problem when dropping a string created by SmartString::into::() (#28) } }