similar-2.2.1/.cargo_vcs_info.json0000644000000001360000000000100124760ustar { "git": { "sha1": "572ae1e547c0aac0b1bdf724e80be06a826de2a5" }, "path_in_vcs": "" }similar-2.2.1/.github/FUNDING.yml000064400000000000000000000000241046102023000144370ustar 00000000000000github: [mitsuhiko] similar-2.2.1/.github/workflows/clippy.yml000064400000000000000000000005141046102023000167060ustar 00000000000000name: Clippy on: [push] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal components: clippy, rustfmt override: true - name: Run clippy run: make lint similar-2.2.1/.github/workflows/rustfmt.yml000064400000000000000000000005261046102023000171150ustar 00000000000000name: Rustfmt on: [push] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal components: clippy, rustfmt override: true - name: Run rustfmt run: make format-check similar-2.2.1/.github/workflows/tests.yml000064400000000000000000000011621046102023000165500ustar 00000000000000name: Tests on: [push] jobs: build-latest: name: Test on Latest runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal override: true - name: Test run: make test build-stable: name: Build on 1.41.0 runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 with: toolchain: 1.41.0 profile: minimal override: true - name: Test run: cargo check --all-features similar-2.2.1/.gitignore000064400000000000000000000000231046102023000132510ustar 00000000000000/target Cargo.lock similar-2.2.1/.vscode/settings.json000064400000000000000000000002321046102023000153570ustar 00000000000000{ "rust-analyzer.cargo.allFeatures": true, "rust-analyzer.assist.importMergeBehavior": "last", "rust-analyzer.checkOnSave.command": "clippy" }similar-2.2.1/CHANGELOG.md000064400000000000000000000065711046102023000131100ustar 00000000000000# Changelog All notable changes to similar are documented here. ## 2.2.1 * Fixed a panic in LCS diffs on matching input. (#43) ## 2.2.0 * Fixed a panic in text diff generation. (#37) ## 2.1.0 * Removed deprecated alternative slice diffing functions. * Added `serde` feature to allow serialization with serde. ## 2.0.0 * Change the `Change` type and associated methods to work on any `T: Clone` instead of `&T`. This makes the `iter_changes` method also work on slices of integers or other values. ## 1.3.0 * Performance improvements for the LCS algorithm. * Small performance improvements by adding an early opt-out for and inline highlighting. * Added `IdentifyDistinct` to convert sequences to ints. * Small performance improvements for larger text diffs by using `IdentifyDistinct` automatically above a threshold. * Added deadlines to all diffing algorithms to bail early. * Deprecated slice diffing methods in the individual algorithm modules. * Use a default timeout for the inline highlighting feature. * Added a compacting step to clean up diffs. This results in nicer looking diffs and fewer edits. This is happening automatically for captured diffs and is exposed through the `Capture` type. * Fix incorrect ranges in unified diff output. ## 1.2.2 * Added support for Rust 1.41.0 for better compatibility. ## 1.2.1 * Added support for Rust 1.43.0 for better compatibility. ## 1.2.0 * Make the unicode feature optional for inline diffing. * Added Hunt–McIlroy LCS algorithm (`lcs`). * Changed the implementation of Mayer's diff. This has slightly changed the behavior but resulted in significantly improved performance and more readable code. * Added `NoFinishHook` to aid composing of diff hooks. ## 1.1.0 * More generic lifetimes for `iter_changes` and `iter_inline_changes`. * Added `iter_all_changes` shortcut as this is commonly useful. * Added `iter_slices` to `DiffOp` to quickly get an iterator over the encoded slices rather than individual items like `iter_changes` does. * Added the `utils` module with various text diffing utilities. * Added `TextDiffRemapper` which helps with working with the original, pre `TextDiff` tokenization slices. ## 1.0.0 * Add `get_diff_ratio`. * Add support for byte diffing and change the text interface to abstract over `DiffableStr`. * Restructured crate layout greatly. Text diffing is now on the crate root, some functionality remains in the algorithms. * The `Change` type now also works for non text diffs. ## 0.5.0 * Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook. * Added missing newline handling to the `Changes` type. * Made unified diff support more flexible through the introduction of the `UnifiedDiff` type. * Fixed grouped diff operation to return an empty result if the diff does not show any changes. * Added inline diff highlighting support. * Changed word splitting to split into words and whitespace. * Added support for unicode based word splitting (`TextDiff::from_unicode_words`). ## 0.4.0 * Change `get_close_matches` to use Python's quick ratio optimization and order lexicographically when tied. ## 0.3.0 * Added grapheme and character level diffing utilities. * `DiffOp::as_tag_tuple` is now taking the argument by reference. * Added `TextDiff::ratio`. * Added `get_close_matches`. ## 0.2.0 * Fixed a bug in the patience algorithm causing it not not work. ## 0.1.0 * Initial release. similar-2.2.1/Cargo.lock0000644000000173510000000000100104600ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "autocfg" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "bstr" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", "regex-automata", ] [[package]] name = "console" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" dependencies = [ "encode_unicode", "libc", "once_cell", "regex", "terminal_size", "unicode-width", "winapi", ] [[package]] name = "dtoa" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" [[package]] name = "encode_unicode" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" [[package]] name = "indexmap" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", "hashbrown", ] [[package]] name = "insta" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3cb858fc306825b542b1311d5fd536e4483680528f303a17a1d6803b0f6ce17" dependencies = [ "console", "lazy_static", "serde", "serde_json", "serde_yaml", "similar 2.1.0", "uuid", ] [[package]] name = "itoa" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" [[package]] name = "linked-hash-map" version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" [[package]] name = "memchr" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "once_cell" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" [[package]] name = "proc-macro2" version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" dependencies = [ "unicode-xid", ] [[package]] name = "quote" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" dependencies = [ "proc-macro2", ] [[package]] name = "regex" version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "regex-syntax", ] [[package]] name = "regex-automata" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "ryu" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "serde" version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "serde_yaml" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8c608a35705a5d3cdc9fbe403147647ff34b921f8e833e49306df898f9b20af" dependencies = [ "dtoa", "indexmap", "serde", "yaml-rust", ] [[package]] name = "similar" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3" [[package]] name = "similar" version = "2.2.1" dependencies = [ "bstr", "console", "insta", "serde", "serde_json", "unicode-segmentation", ] [[package]] name = "syn" version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2afee18b8beb5a596ecb4a2dce128c719b4ba399d34126b9e4396e3f9860966" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "terminal_size" version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" dependencies = [ "libc", "winapi", ] [[package]] name = "unicode-segmentation" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "unicode-xid" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "uuid" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "yaml-rust" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" dependencies = [ "linked-hash-map", ] similar-2.2.1/Cargo.toml0000644000000041730000000000100105010ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.41" name = "similar" version = "2.2.1" authors = [ "Armin Ronacher ", "Pierre-Étienne Meunier ", "Brandon Williams ", ] exclude = ["assets/*"] description = "A diff library for Rust" readme = "README.md" keywords = [ "diff", "difference", "patience", "compare", "changes", ] license = "Apache-2.0" repository = "https://github.com/mitsuhiko/similar" [package.metadata.docs.rs] all-features = true [profile.release] debug = true [[example]] name = "patience" required-features = [ "text", "inline", ] [[example]] name = "terminal" required-features = ["text"] [[example]] name = "terminal-inline" required-features = [ "text", "inline", "bytes", ] [[example]] name = "original-slices" required-features = ["text"] [[example]] name = "udiff" required-features = [ "text", "bytes", ] [[example]] name = "close-matches" required-features = ["text"] [[example]] name = "large" required-features = ["text"] [[example]] name = "serde" required-features = [ "text", "serde", ] [dependencies.bstr] version = "0.2.14" optional = true default-features = false [dependencies.serde] version = "1.0.130" features = ["derive"] optional = true [dependencies.unicode-segmentation] version = "1.7.1" optional = true [dev-dependencies.console] version = "0.15.0" [dev-dependencies.insta] version = "1.10.0" [dev-dependencies.serde_json] version = "1.0.68" [features] bytes = [ "bstr", "text", ] default = ["text"] inline = ["text"] text = [] unicode = [ "text", "unicode-segmentation", "bstr/unicode", ] similar-2.2.1/Cargo.toml.orig000064400000000000000000000031341046102023000141560ustar 00000000000000[package] name = "similar" version = "2.2.1" authors = ["Armin Ronacher ", "Pierre-Étienne Meunier ", "Brandon Williams "] edition = "2018" rust-version = "1.41" license = "Apache-2.0" description = "A diff library for Rust" repository = "https://github.com/mitsuhiko/similar" keywords = ["diff", "difference", "patience", "compare", "changes"] readme = "README.md" exclude = [ "assets/*" ] [package.metadata.docs.rs] all-features = true [features] default = ["text"] text = [] inline = ["text"] # this annoyingly currently also turns on bstr and not just bstr/unicode # unclear if this is fixable unicode = ["text", "unicode-segmentation", "bstr/unicode"] bytes = ["bstr", "text"] [dev-dependencies] insta = "1.10.0" console = "0.15.0" serde_json = "1.0.68" [dependencies] unicode-segmentation = { version = "1.7.1", optional = true } bstr = { version = "0.2.14", optional = true, default-features = false } serde = { version = "1.0.130", optional = true, features = ["derive"] } [[example]] name = "patience" required-features = ["text", "inline"] [[example]] name = "terminal" required-features = ["text"] [[example]] name = "terminal-inline" required-features = ["text", "inline", "bytes"] [[example]] name = "original-slices" required-features = ["text"] [[example]] name = "udiff" required-features = ["text", "bytes"] [[example]] name = "close-matches" required-features = ["text"] [[example]] name = "large" required-features = ["text"] [[example]] name = "serde" required-features = ["text", "serde"] [profile.release] debug = true similar-2.2.1/LICENSE000064400000000000000000000251371046102023000123030ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. similar-2.2.1/Makefile000064400000000000000000000007461046102023000127350ustar 00000000000000all: test build: @cargo build --all-features doc: @cargo doc --all-features test: @cargo test @cargo test --all-features @cargo test --no-default-features @cargo test --no-default-features --features bytes format: @rustup component add rustfmt 2> /dev/null @cargo fmt --all format-check: @rustup component add rustfmt 2> /dev/null @cargo fmt --all -- --check lint: @rustup component add clippy 2> /dev/null @cargo clippy .PHONY: all doc test format format-check lint similar-2.2.1/README.md000064400000000000000000000044711046102023000125530ustar 00000000000000# Similar: A Diffing Library [![Build Status](https://github.com/mitsuhiko/similar/workflows/Tests/badge.svg?branch=main)](https://github.com/mitsuhiko/similar/actions?query=workflow%3ATests) [![Crates.io](https://img.shields.io/crates/d/similar.svg)](https://crates.io/crates/similar) [![License](https://img.shields.io/github/license/mitsuhiko/similar)](https://github.com/mitsuhiko/similar/blob/main/LICENSE) [![rustc 1.41.0](https://img.shields.io/badge/rust-1.41%2B-orange.svg)](https://img.shields.io/badge/rust-1.41%2B-orange.svg) [![Documentation](https://docs.rs/similar/badge.svg)](https://docs.rs/similar) Similar is a dependency free crate for Rust that implements different diffing algorithms and high level interfaces for it. It is based on the [pijul](https://pijul.org/) implementation of the Patience algorithm and inherits some ideas from there. It also incorporates the Myer's diff algorithm which was largely written by Brandon Williams. This library was built for the [insta snapshot testing library](https://insta.rs). ```rust use similar::{ChangeTag, TextDiff}; fn main() { let diff = TextDiff::from_lines( "Hello World\nThis is the second line.\nThis is the third.", "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", ); for change in diff.iter_all_changes() { let sign = match change.tag() { ChangeTag::Delete => "-", ChangeTag::Insert => "+", ChangeTag::Equal => " ", }; print!("{}{}", sign, change); } } ``` ## Screenshot ![terminal highlighting](https://raw.githubusercontent.com/mitsuhiko/similar/main/assets/terminal-inline.png) ## What's in the box? * Myer's diff * Patience diff * Hunt–McIlroy / Hunt–Szymanski LCS diff * Diffing on arbitrary comparable sequences * Line, word, character and grapheme level diffing * Text and Byte diffing * Unified diff generation ## Related Projects * [insta](https://insta.rs) snapshot testing library * [similar-asserts](https://github.com/mitsuhiko/similar-asserts) assertion library ## License and Links * [Documentation](https://docs.rs/similar/) * [Issue Tracker](https://github.com/mitsuhiko/similar/issues) * [Examples](https://github.com/mitsuhiko/similar/tree/main/examples) * License: [Apache-2.0](https://github.com/mitsuhiko/similar/blob/main/LICENSE) similar-2.2.1/clippy.toml000064400000000000000000000000201046102023000134530ustar 00000000000000msrv = "1.41.0" similar-2.2.1/examples/close-matches.rs000064400000000000000000000005061046102023000162020ustar 00000000000000use similar::get_close_matches; fn main() { let words = vec![ "apple", "appu", "appal", "apparitor", "beer", "beeb", "beeline", ]; println!("{:?}", get_close_matches("app", &words, 3, 0.7)); println!("{:?}", get_close_matches("bee", &words, 3, 0.7)); } similar-2.2.1/examples/large.rs000064400000000000000000000002741046102023000145470ustar 00000000000000use similar::TextDiff; fn main() { let x = "abc".repeat(2000); let y = "abd".repeat(2000); let diff = TextDiff::from_chars(&x, &y); println!("{}", diff.unified_diff()); } similar-2.2.1/examples/nonstring.rs000064400000000000000000000004701046102023000154740ustar 00000000000000use similar::{capture_diff_slices, Algorithm}; fn main() { let old = vec![1, 2, 3]; let new = vec![1, 2, 4]; let ops = capture_diff_slices(Algorithm::Myers, &old, &new); for op in ops { for change in op.iter_changes(&old, &new) { println!("{:?}", change); } } } similar-2.2.1/examples/original-slices.rs000064400000000000000000000004441046102023000165400ustar 00000000000000use similar::utils::diff_chars; use similar::Algorithm; fn main() { let old = "1234567890abcdef".to_string(); let new = "0123456789Oabzdef".to_string(); for (change_tag, value) in diff_chars(Algorithm::Myers, &old, &new) { println!("{}{:?}", change_tag, value); } } similar-2.2.1/examples/patience.rs000064400000000000000000000010551046102023000152430ustar 00000000000000use similar::{Algorithm, TextDiff}; const OLD: &str = r#" [ ( Major, 2, ), ( Minor, 20, ), ( Value, 0, ), ] "#; const NEW: &str = r#" [ ( Major, 2, ), ( Minor, 0, ), ( Value, 0, ), ( Value, 1, ), ] "#; fn main() { println!( "{}", TextDiff::configure() .algorithm(Algorithm::Patience) .diff_lines(OLD, NEW) .unified_diff() ); } similar-2.2.1/examples/serde.rs000064400000000000000000000006651046102023000145630ustar 00000000000000use similar::TextDiff; fn main() { let diff = TextDiff::from_lines( "Hello World\nThis is the second line.\nThis is the third.", "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", ); let all_changes = diff .ops() .iter() .flat_map(|op| diff.iter_changes(op)) .collect::>(); println!("{}", serde_json::to_string_pretty(&all_changes).unwrap()); } similar-2.2.1/examples/terminal-inline.rs000064400000000000000000000035001046102023000165370ustar 00000000000000use std::fmt; use std::fs::read; use std::process::exit; use console::{style, Style}; use similar::{ChangeTag, TextDiff}; struct Line(Option); impl fmt::Display for Line { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.0 { None => write!(f, " "), Some(idx) => write!(f, "{:<4}", idx + 1), } } } fn main() { let args: Vec<_> = std::env::args_os().collect(); if args.len() != 3 { eprintln!("usage: terminal-inline [old] [new]"); exit(1); } let old = read(&args[1]).unwrap(); let new = read(&args[2]).unwrap(); let diff = TextDiff::from_lines(&old, &new); for (idx, group) in diff.grouped_ops(3).iter().enumerate() { if idx > 0 { println!("{:-^1$}", "-", 80); } for op in group { for change in diff.iter_inline_changes(op) { let (sign, s) = match change.tag() { ChangeTag::Delete => ("-", Style::new().red()), ChangeTag::Insert => ("+", Style::new().green()), ChangeTag::Equal => (" ", Style::new().dim()), }; print!( "{}{} |{}", style(Line(change.old_index())).dim(), style(Line(change.new_index())).dim(), s.apply_to(sign).bold(), ); for (emphasized, value) in change.iter_strings_lossy() { if emphasized { print!("{}", s.apply_to(value).underlined().on_black()); } else { print!("{}", s.apply_to(value)); } } if change.missing_newline() { println!(); } } } } } similar-2.2.1/examples/terminal.rs000064400000000000000000000012601046102023000152640ustar 00000000000000use console::Style; use similar::{ChangeTag, TextDiff}; fn main() { let diff = TextDiff::from_lines( "Hello World\nThis is the second line.\nThis is the third.", "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", ); for op in diff.ops() { for change in diff.iter_changes(op) { let (sign, style) = match change.tag() { ChangeTag::Delete => ("-", Style::new().red()), ChangeTag::Insert => ("+", Style::new().green()), ChangeTag::Equal => (" ", Style::new()), }; print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change)); } } } similar-2.2.1/examples/udiff.rs000064400000000000000000000010671046102023000145530ustar 00000000000000use std::fs::read; use std::io; use std::process::exit; use similar::TextDiff; fn main() { let args: Vec<_> = std::env::args_os().collect(); if args.len() != 3 { eprintln!("usage: udiff [old] [new]"); exit(1); } let old = read(&args[1]).unwrap(); let new = read(&args[2]).unwrap(); TextDiff::from_lines(&old, &new) .unified_diff() .header( &args[1].as_os_str().to_string_lossy(), &args[2].as_os_str().to_string_lossy(), ) .to_writer(io::stdout()) .unwrap(); } similar-2.2.1/src/algorithms/capture.rs000064400000000000000000000053441046102023000162450ustar 00000000000000use std::convert::Infallible; use crate::algorithms::DiffHook; use crate::{group_diff_ops, DiffOp}; /// A [`DiffHook`] that captures all diff operations. #[derive(Default, Clone)] pub struct Capture(Vec); impl Capture { /// Creates a new capture hook. pub fn new() -> Capture { Capture::default() } /// Converts the capture hook into a vector of ops. pub fn into_ops(self) -> Vec { self.0 } /// Isolate change clusters by eliminating ranges with no changes. /// /// This is equivalent to calling [`group_diff_ops`] on [`Capture::into_ops`]. pub fn into_grouped_ops(self, n: usize) -> Vec> { group_diff_ops(self.into_ops(), n) } /// Accesses the captured operations. pub fn ops(&self) -> &[DiffOp] { &self.0 } } impl DiffHook for Capture { type Error = Infallible; #[inline(always)] fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { self.0.push(DiffOp::Equal { old_index, new_index, len, }); Ok(()) } #[inline(always)] fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), Self::Error> { self.0.push(DiffOp::Delete { old_index, old_len, new_index, }); Ok(()) } #[inline(always)] fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.0.push(DiffOp::Insert { old_index, new_index, new_len, }); Ok(()) } #[inline(always)] fn replace( &mut self, old_index: usize, old_len: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.0.push(DiffOp::Replace { old_index, old_len, new_index, new_len, }); Ok(()) } } #[test] fn test_capture_hook_grouping() { use crate::algorithms::{diff_slices, Algorithm, Replace}; let rng = (1..100).collect::>(); let mut rng_new = rng.clone(); rng_new[10] = 1000; rng_new[13] = 1000; rng_new[16] = 1000; rng_new[34] = 1000; let mut d = Replace::new(Capture::new()); diff_slices(Algorithm::Myers, &mut d, &rng, &rng_new).unwrap(); let ops = d.into_inner().into_grouped_ops(3); let tags = ops .iter() .map(|group| group.iter().map(|x| x.as_tag_tuple()).collect::>()) .collect::>(); insta::assert_debug_snapshot!(ops); insta::assert_debug_snapshot!(tags); } similar-2.2.1/src/algorithms/compact.rs000064400000000000000000000275131046102023000162320ustar 00000000000000//! Implements basic compacting. This is based on the compaction logic from //! diffy by Brandon Williams. use std::ops::Index; use crate::{DiffOp, DiffTag}; use super::utils::{common_prefix_len, common_suffix_len}; use super::DiffHook; /// Performs semantic cleanup operations on a diff. /// /// This merges similar ops together but also tries to move hunks up and /// down the diff with the desire to connect as many hunks as possible. /// It still needs to be combined with [`Replace`](crate::algorithms::Replace) /// to get actual replace diff ops out. #[derive(Debug)] pub struct Compact<'old, 'new, Old: ?Sized, New: ?Sized, D> { d: D, ops: Vec, old: &'old Old, new: &'new New, } impl<'old, 'new, Old, New, D> Compact<'old, 'new, Old, New, D> where D: DiffHook, Old: Index + ?Sized + 'old, New: Index + ?Sized + 'new, New::Output: PartialEq, { /// Creates a new compact hook wrapping another hook. pub fn new(d: D, old: &'old Old, new: &'new New) -> Self { Compact { d, ops: Vec::new(), old, new, } } /// Extracts the inner hook. pub fn into_inner(self) -> D { self.d } } impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsRef for Compact<'old, 'new, Old, New, D> { fn as_ref(&self) -> &D { &self.d } } impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsMut for Compact<'old, 'new, Old, New, D> { fn as_mut(&mut self) -> &mut D { &mut self.d } } impl<'old, 'new, Old, New, D> DiffHook for Compact<'old, 'new, Old, New, D> where D: DiffHook, Old: Index + ?Sized + 'old, New: Index + ?Sized + 'new, New::Output: PartialEq, { type Error = D::Error; #[inline(always)] fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { self.ops.push(DiffOp::Equal { old_index, new_index, len, }); Ok(()) } #[inline(always)] fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), Self::Error> { self.ops.push(DiffOp::Delete { old_index, old_len, new_index, }); Ok(()) } #[inline(always)] fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.ops.push(DiffOp::Insert { old_index, new_index, new_len, }); Ok(()) } fn finish(&mut self) -> Result<(), Self::Error> { cleanup_diff_ops(self.old, self.new, &mut self.ops); for op in &self.ops { op.apply_to_hook(&mut self.d)?; } self.d.finish() } } // Walks through all edits and shifts them up and then down, trying to see if // they run into similar edits which can be merged. pub fn cleanup_diff_ops(old: &Old, new: &New, ops: &mut Vec) where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { // First attempt to compact all Deletions let mut pointer = 0; while let Some(&op) = ops.get(pointer) { if let DiffTag::Delete = op.tag() { pointer = shift_diff_ops_up(ops, old, new, pointer); pointer = shift_diff_ops_down(ops, old, new, pointer); } pointer += 1; } // Then attempt to compact all Insertions let mut pointer = 0; while let Some(&op) = ops.get(pointer) { if let DiffTag::Insert = op.tag() { pointer = shift_diff_ops_up(ops, old, new, pointer); pointer = shift_diff_ops_down(ops, old, new, pointer); } pointer += 1; } } fn shift_diff_ops_up( ops: &mut Vec, old: &Old, new: &New, mut pointer: usize, ) -> usize where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { while let Some(&prev_op) = pointer.checked_sub(1).and_then(|idx| ops.get(idx)) { let this_op = ops[pointer]; match (this_op.tag(), prev_op.tag()) { // Shift Inserts Upwards (DiffTag::Insert, DiffTag::Equal) => { let suffix_len = common_suffix_len(old, prev_op.old_range(), new, this_op.new_range()); if suffix_len > 0 { if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) { ops[pointer + 1].grow_left(suffix_len); } else { ops.insert( pointer + 1, DiffOp::Equal { old_index: prev_op.old_range().end - suffix_len, new_index: this_op.new_range().end - suffix_len, len: suffix_len, }, ); } ops[pointer].shift_left(suffix_len); ops[pointer - 1].shrink_left(suffix_len); if ops[pointer - 1].is_empty() { ops.remove(pointer - 1); pointer -= 1; } } else if ops[pointer - 1].is_empty() { ops.remove(pointer - 1); pointer -= 1; } else { // We can't shift upwards anymore break; } } // Shift Deletions Upwards (DiffTag::Delete, DiffTag::Equal) => { // check common suffix for the amount we can shift let suffix_len = common_suffix_len(old, prev_op.old_range(), new, this_op.new_range()); if suffix_len != 0 { if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) { ops[pointer + 1].grow_left(suffix_len); } else { let old_range = prev_op.old_range(); ops.insert( pointer + 1, DiffOp::Equal { old_index: old_range.end - suffix_len, new_index: this_op.new_range().end - suffix_len, len: old_range.len() - suffix_len, }, ); } ops[pointer].shift_left(suffix_len); ops[pointer - 1].shrink_left(suffix_len); if ops[pointer - 1].is_empty() { ops.remove(pointer - 1); pointer -= 1; } } else if ops[pointer - 1].is_empty() { ops.remove(pointer - 1); pointer -= 1; } else { // We can't shift upwards anymore break; } } // Swap the Delete and Insert (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => { ops.swap(pointer - 1, pointer); pointer -= 1; } // Merge the two ranges (DiffTag::Insert, DiffTag::Insert) => { ops[pointer - 1].grow_right(this_op.new_range().len()); ops.remove(pointer); pointer -= 1; } (DiffTag::Delete, DiffTag::Delete) => { ops[pointer - 1].grow_right(this_op.old_range().len()); ops.remove(pointer); pointer -= 1; } _ => unreachable!("unexpected tag"), } } pointer } fn shift_diff_ops_down( ops: &mut Vec, old: &Old, new: &New, mut pointer: usize, ) -> usize where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { while let Some(&next_op) = pointer.checked_add(1).and_then(|idx| ops.get(idx)) { let this_op = ops[pointer]; match (this_op.tag(), next_op.tag()) { // Shift Inserts Downwards (DiffTag::Insert, DiffTag::Equal) => { let prefix_len = common_prefix_len(old, next_op.old_range(), new, this_op.new_range()); if prefix_len > 0 { if let Some(DiffTag::Equal) = pointer .checked_sub(1) .and_then(|x| ops.get(x)) .map(|x| x.tag()) { ops[pointer - 1].grow_right(prefix_len); } else { ops.insert( pointer, DiffOp::Equal { old_index: next_op.old_range().start, new_index: this_op.new_range().start, len: prefix_len, }, ); pointer += 1; } ops[pointer].shift_right(prefix_len); ops[pointer + 1].shrink_right(prefix_len); if ops[pointer + 1].is_empty() { ops.remove(pointer + 1); } } else if ops[pointer + 1].is_empty() { ops.remove(pointer + 1); } else { // We can't shift upwards anymore break; } } // Shift Deletions Downwards (DiffTag::Delete, DiffTag::Equal) => { // check common suffix for the amount we can shift let prefix_len = common_prefix_len(old, next_op.old_range(), new, this_op.new_range()); if prefix_len > 0 { if let Some(DiffTag::Equal) = pointer .checked_sub(1) .and_then(|x| ops.get(x)) .map(|x| x.tag()) { ops[pointer - 1].grow_right(prefix_len); } else { ops.insert( pointer, DiffOp::Equal { old_index: next_op.old_range().start, new_index: this_op.new_range().start, len: prefix_len, }, ); pointer += 1; } ops[pointer].shift_right(prefix_len); ops[pointer + 1].shrink_right(prefix_len); if ops[pointer + 1].is_empty() { ops.remove(pointer + 1); } } else if ops[pointer + 1].is_empty() { ops.remove(pointer + 1); } else { // We can't shift downwards anymore break; } } // Swap the Delete and Insert (DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => { ops.swap(pointer, pointer + 1); pointer += 1; } // Merge the two ranges (DiffTag::Insert, DiffTag::Insert) => { ops[pointer].grow_right(next_op.new_range().len()); ops.remove(pointer + 1); } (DiffTag::Delete, DiffTag::Delete) => { ops[pointer].grow_right(next_op.old_range().len()); ops.remove(pointer + 1); } _ => unreachable!("unexpected tag"), } } pointer } similar-2.2.1/src/algorithms/hook.rs000064400000000000000000000113111046102023000155310ustar 00000000000000/// A trait for reacting to an edit script from the "old" version to /// the "new" version. pub trait DiffHook: Sized { /// The error produced from the hook methods. type Error; /// Called when lines with indices `old_index` (in the old version) and /// `new_index` (in the new version) start an section equal in both /// versions, of length `len`. fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { let _ = old_index; let _ = new_index; let _ = len; Ok(()) } /// Called when a section of length `old_len`, starting at `old_index`, /// needs to be deleted from the old version. fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), Self::Error> { let _ = old_index; let _ = old_len; let _ = new_index; Ok(()) } /// Called when a section of the new version, of length `new_len` /// and starting at `new_index`, needs to be inserted at position `old_index'. fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { let _ = old_index; let _ = new_index; let _ = new_len; Ok(()) } /// Called when a section of the old version, starting at index /// `old_index` and of length `old_len`, needs to be replaced with a /// section of length `new_len`, starting at `new_index`, of the new /// version. /// /// The default implementations invokes `delete` and `insert`. /// /// You can use the [`Replace`](crate::algorithms::Replace) hook to /// automatically generate these. #[inline(always)] fn replace( &mut self, old_index: usize, old_len: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.delete(old_index, old_len, new_index)?; self.insert(old_index, new_index, new_len) } /// Always called at the end of the algorithm. #[inline(always)] fn finish(&mut self) -> Result<(), Self::Error> { Ok(()) } } impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D { type Error = D::Error; #[inline(always)] fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { (*self).equal(old_index, new_index, len) } #[inline(always)] fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), Self::Error> { (*self).delete(old_index, old_len, new_index) } #[inline(always)] fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { (*self).insert(old_index, new_index, new_len) } #[inline(always)] fn replace( &mut self, old: usize, old_len: usize, new: usize, new_len: usize, ) -> Result<(), Self::Error> { (*self).replace(old, old_len, new, new_len) } #[inline(always)] fn finish(&mut self) -> Result<(), Self::Error> { (*self).finish() } } /// Wrapper [`DiffHook`] that prevents calls to [`DiffHook::finish`]. /// /// This hook is useful in situations where diff hooks are composed but you /// want to prevent that the finish hook method is called. pub struct NoFinishHook(D); impl NoFinishHook { /// Wraps another hook. pub fn new(d: D) -> NoFinishHook { NoFinishHook(d) } /// Extracts the inner hook. pub fn into_inner(self) -> D { self.0 } } impl DiffHook for NoFinishHook { type Error = D::Error; #[inline(always)] fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> { self.0.equal(old_index, new_index, len) } #[inline(always)] fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), Self::Error> { self.0.delete(old_index, old_len, new_index) } #[inline(always)] fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.0.insert(old_index, new_index, new_len) } #[inline(always)] fn replace( &mut self, old_index: usize, old_len: usize, new_index: usize, new_len: usize, ) -> Result<(), Self::Error> { self.0.replace(old_index, old_len, new_index, new_len) } fn finish(&mut self) -> Result<(), Self::Error> { Ok(()) } } similar-2.2.1/src/algorithms/lcs.rs000064400000000000000000000161521046102023000153620ustar 00000000000000//! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. //! //! * time: `O((NM)D log (M)D)` //! * space `O(MN)` use std::collections::BTreeMap; use std::ops::{Index, Range}; use std::time::Instant; use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; use crate::algorithms::DiffHook; /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. /// /// This diff is done with an optional deadline that defines the maximal /// execution time permitted before it bails and falls back to an very bad /// approximation. Deadlines with LCS do not make a lot of sense and should /// not be used. pub fn diff( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, New::Output: PartialEq, { diff_deadline(d, old, old_range, new, new_range, None) } /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. /// /// This diff is done with an optional deadline that defines the maximal /// execution time permitted before it bails and falls back to an approximation. pub fn diff_deadline( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, New::Output: PartialEq, { if is_empty_range(&new_range) { d.delete(old_range.start, old_range.len(), new_range.start)?; return Ok(()); } else if is_empty_range(&old_range) { d.insert(old_range.start, new_range.start, new_range.len())?; return Ok(()); } let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); // If the sequences are not different then we're done if common_prefix_len == old_range.len() && (old_range.len() == new_range.len()) { d.equal(0, 0, old_range.len())?; return Ok(()); } let maybe_table = make_table( old, common_prefix_len..(old_range.len() - common_suffix_len), new, common_prefix_len..(new_range.len() - common_suffix_len), deadline, ); let mut old_idx = 0; let mut new_idx = 0; let new_len = new_range.len() - common_prefix_len - common_suffix_len; let old_len = old_range.len() - common_prefix_len - common_suffix_len; if common_prefix_len > 0 { d.equal(old_range.start, new_range.start, common_prefix_len)?; } if let Some(table) = maybe_table { while new_idx < new_len && old_idx < old_len { let old_orig_idx = old_range.start + common_prefix_len + old_idx; let new_orig_idx = new_range.start + common_prefix_len + new_idx; if new[new_orig_idx] == old[old_orig_idx] { d.equal(old_orig_idx, new_orig_idx, 1)?; old_idx += 1; new_idx += 1; } else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x) >= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x) { d.delete(old_orig_idx, 1, new_orig_idx)?; old_idx += 1; } else { d.insert(old_orig_idx, new_orig_idx, 1)?; new_idx += 1; } } } else { let old_orig_idx = old_range.start + common_prefix_len + old_idx; let new_orig_idx = new_range.start + common_prefix_len + new_idx; d.delete(old_orig_idx, old_len, new_orig_idx)?; d.insert(old_orig_idx, new_orig_idx, new_len)?; } if old_idx < old_len { d.delete( old_range.start + common_prefix_len + old_idx, old_len - old_idx, new_range.start + common_prefix_len + new_idx, )?; old_idx += old_len - old_idx; } if new_idx < new_len { d.insert( old_range.start + common_prefix_len + old_idx, new_range.start + common_prefix_len + new_idx, new_len - new_idx, )?; } if common_suffix_len > 0 { d.equal( old_range.start + old_len + common_prefix_len, new_range.start + new_len + common_prefix_len, common_suffix_len, )?; } d.finish() } fn make_table( old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Option> where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { let old_len = old_range.len(); let new_len = new_range.len(); let mut table = BTreeMap::new(); for i in (0..new_len).rev() { // are we running for too long? give up on the table if let Some(deadline) = deadline { if Instant::now() > deadline { return None; } } for j in (0..old_len).rev() { let val = if new[i] == old[j] { table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1 } else { table .get(&(i + 1, j)) .map_or(0, |&x| x) .max(table.get(&(i, j + 1)).map_or(0, |&x| x)) }; if val > 0 { table.insert((i, j), val); } } } Some(table) } #[test] fn test_table() { let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap(); let expected = { let mut m = BTreeMap::new(); m.insert((1, 0), 1); m.insert((0, 0), 1); m.insert((2, 0), 1); m }; assert_eq!(table, expected); } #[test] fn test_diff() { let a: &[usize] = &[0, 1, 2, 3, 4]; let b: &[usize] = &[0, 1, 2, 9, 4]; let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } #[test] fn test_contiguous() { let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } #[test] fn test_pat() { let a: &[usize] = &[0, 1, 3, 4, 5]; let b: &[usize] = &[0, 1, 4, 5, 8, 9]; let mut d = crate::algorithms::Capture::new(); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.ops()); } #[test] fn test_same() { let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; let b: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; let mut d = crate::algorithms::Capture::new(); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.ops()); } similar-2.2.1/src/algorithms/mod.rs000064400000000000000000000102701046102023000153530ustar 00000000000000//! Various diff (longest common subsequence) algorithms. //! //! The implementations of the algorithms in this module are relatively low //! level and expose the most generic bounds possible for the algorithm. To //! use them you would typically use the higher level API if possible but //! direct access to these algorithms can be useful in some cases. //! //! All these algorithms provide a `diff` function which takes two indexable //! objects (for instance slices) and a [`DiffHook`]. As the //! diff is generated the diff hook is invoked. Note that the diff hook does //! not get access to the actual values but only the indexes. This is why the //! diff hook is not used outside of the raw algorithm implementations as for //! most situations access to the values is useful of required. //! //! The algorithms module really is the most low-level module in similar and //! generally not the place to start. //! //! # Example //! //! This is a simple example that shows how you can calculate the difference //! between two sequences and capture the ops into a vector. //! //! ```rust //! use similar::algorithms::{Algorithm, Replace, Capture, diff_slices}; //! //! let a = vec![1, 2, 3, 4, 5]; //! let b = vec![1, 2, 3, 4, 7]; //! let mut d = Replace::new(Capture::new()); //! diff_slices(Algorithm::Myers, &mut d, &a, &b).unwrap(); //! let ops = d.into_inner().into_ops(); //! ``` //! //! The above example is equivalent to using //! [`capture_diff_slices`](crate::capture_diff_slices). mod capture; mod compact; mod hook; mod replace; pub(crate) mod utils; use std::hash::Hash; use std::ops::{Index, Range}; use std::time::Instant; pub use capture::Capture; pub use compact::Compact; pub use hook::{DiffHook, NoFinishHook}; pub use replace::Replace; pub use utils::IdentifyDistinct; #[doc(no_inline)] pub use crate::Algorithm; pub mod lcs; pub mod myers; pub mod patience; /// Creates a diff between old and new with the given algorithm. /// /// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. pub fn diff( alg: Algorithm, d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, Old::Output: Hash + Eq + Ord, New::Output: PartialEq + Hash + Eq + Ord, { diff_deadline(alg, d, old, old_range, new, new_range, None) } /// Creates a diff between old and new with the given algorithm with deadline. /// /// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. /// /// This diff is done with an optional deadline that defines the maximal /// execution time permitted before it bails and falls back to an approximation. /// Note that not all algorithms behave well if they reach the deadline (LCS /// for instance produces a very simplistic diff when the deadline is reached /// in all cases). pub fn diff_deadline( alg: Algorithm, d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, Old::Output: Hash + Eq + Ord, New::Output: PartialEq + Hash + Eq + Ord, { match alg { Algorithm::Myers => myers::diff_deadline(d, old, old_range, new, new_range, deadline), Algorithm::Patience => patience::diff_deadline(d, old, old_range, new, new_range, deadline), Algorithm::Lcs => lcs::diff_deadline(d, old, old_range, new, new_range, deadline), } } /// Shortcut for diffing slices with a specific algorithm. pub fn diff_slices(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> where D: DiffHook, T: Eq + Hash + Ord, { diff(alg, d, old, 0..old.len(), new, 0..new.len()) } /// Shortcut for diffing slices with a specific algorithm. pub fn diff_slices_deadline( alg: Algorithm, d: &mut D, old: &[T], new: &[T], deadline: Option, ) -> Result<(), D::Error> where D: DiffHook, T: Eq + Hash + Ord, { diff_deadline(alg, d, old, 0..old.len(), new, 0..new.len(), deadline) } similar-2.2.1/src/algorithms/myers.rs000064400000000000000000000312051046102023000157340ustar 00000000000000//! Myers' diff algorithm. //! //! * time: `O((N+M)D)` //! * space `O(N+M)` //! //! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf) //! describing it. //! //! The implementation of this algorithm is based on the implementation by //! Brandon Williams. //! //! # Heuristics //! //! At present this implementation of Myers' does not implement any more advanced //! heuristics that would solve some pathological cases. For instance passing two //! large and completely distinct sequences to the algorithm will make it spin //! without making reasonable progress. Currently the only protection in the //! library against this is to pass a deadline to the diffing algorithm. //! //! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15). use std::ops::{Index, IndexMut, Range}; use std::time::Instant; use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; use crate::algorithms::DiffHook; /// Myers' diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. pub fn diff( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, New::Output: PartialEq, { diff_deadline(d, old, old_range, new, new_range, None) } /// Myers' diff algorithm with deadline. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. /// /// This diff is done with an optional deadline that defines the maximal /// execution time permitted before it bails and falls back to an approximation. pub fn diff_deadline( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, New::Output: PartialEq, { let max_d = max_d(old_range.len(), new_range.len()); let mut vb = V::new(max_d); let mut vf = V::new(max_d); conquer( d, old, old_range, new, new_range, &mut vf, &mut vb, deadline, )?; d.finish() } // A D-path is a path which starts at (0,0) that has exactly D non-diagonal // edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge // and then a possibly empty sequence of diagonal edges called a snake. /// `V` contains the endpoints of the furthest reaching `D-paths`. For each /// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` because /// `y` can be computed from `x - k`. In other words, `V` is an array of integers /// where `V[k]` contains the row index of the endpoint of the furthest reaching /// path in diagonal `k`. /// /// We can't use a traditional Vec to represent `V` since we use `k` as an index /// and it can take on negative values. So instead `V` is represented as a /// light-weight wrapper around a Vec plus an `offset` which is the maximum value /// `k` can take on in order to map negative `k`'s back to a value >= 0. #[derive(Debug)] struct V { offset: isize, v: Vec, // Look into initializing this to -1 and storing isize } impl V { fn new(max_d: usize) -> Self { Self { offset: max_d as isize, v: vec![0; 2 * max_d], } } fn len(&self) -> usize { self.v.len() } } impl Index for V { type Output = usize; fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] } } impl IndexMut for V { fn index_mut(&mut self, index: isize) -> &mut Self::Output { &mut self.v[(index + self.offset) as usize] } } fn max_d(len1: usize, len2: usize) -> usize { // XXX look into reducing the need to have the additional '+ 1' (len1 + len2 + 1) / 2 + 1 } #[inline(always)] fn split_at(range: Range, at: usize) -> (Range, Range) { (range.start..at, at..range.end) } /// A `Snake` is a sequence of diagonal edges in the edit graph. Normally /// a snake has a start end end point (and it is possible for a snake to have /// a length of zero, meaning the start and end points are the same) however /// we do not need the end point which is why it's not implemented here. /// /// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes /// some of which may be empty. The divide step requires finding the ceil(D/2) + /// 1 or middle snake of an optimal D-path. The idea for doing so is to /// simultaneously run the basic algorithm in both the forward and reverse /// directions until furthest reaching forward and reverse paths starting at /// opposing corners 'overlap'. fn find_middle_snake( old: &Old, old_range: Range, new: &New, new_range: Range, vf: &mut V, vb: &mut V, deadline: Option, ) -> Option<(usize, usize)> where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { let n = old_range.len(); let m = new_range.len(); // By Lemma 1 in the paper, the optimal edit script length is odd or even as // `delta` is odd or even. let delta = n as isize - m as isize; let odd = delta & 1 == 1; // The initial point at (0, -1) vf[1] = 0; // The initial point at (N, M+1) vb[1] = 0; // We only need to explore ceil(D/2) + 1 let d_max = max_d(n, m); assert!(vf.len() >= d_max); assert!(vb.len() >= d_max); for d in 0..d_max as isize { // are we running for too long? if let Some(deadline) = deadline { if Instant::now() > deadline { break; } } // Forward path for k in (-d..=d).rev().step_by(2) { let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { vf[k + 1] } else { vf[k - 1] + 1 }; let y = (x as isize - k) as usize; // The coordinate of the start of a snake let (x0, y0) = (x, y); // While these sequences are identical, keep moving through the // graph with no cost if x < old_range.len() && y < new_range.len() { let advance = common_prefix_len( old, old_range.start + x..old_range.end, new, new_range.start + y..new_range.end, ); x += advance; } // This is the new best x value vf[k] = x; // Only check for connections from the forward search when N - M is // odd and when there is a reciprocal k line coming from the other // direction. if odd && (k - delta).abs() <= (d - 1) { // TODO optimize this so we don't have to compare against n if vf[k] + vb[-(k - delta)] >= n { // Return the snake return Some((x0 + old_range.start, y0 + new_range.start)); } } } // Backward path for k in (-d..=d).rev().step_by(2) { let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { vb[k + 1] } else { vb[k - 1] + 1 }; let mut y = (x as isize - k) as usize; // The coordinate of the start of a snake if x < n && y < m { let advance = common_suffix_len( old, old_range.start..old_range.start + n - x, new, new_range.start..new_range.start + m - y, ); x += advance; y += advance; } // This is the new best x value vb[k] = x; if !odd && (k - delta).abs() <= d { // TODO optimize this so we don't have to compare against n if vb[k] + vf[-(k - delta)] >= n { // Return the snake return Some((n - x + old_range.start, m - y + new_range.start)); } } } // TODO: Maybe there's an opportunity to optimize and bail early? } // deadline reached None } #[allow(clippy::too_many_arguments)] fn conquer( d: &mut D, old: &Old, mut old_range: Range, new: &New, mut new_range: Range, vf: &mut V, vb: &mut V, deadline: Option, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, D: DiffHook, New::Output: PartialEq, { // Check for common prefix let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); if common_prefix_len > 0 { d.equal(old_range.start, new_range.start, common_prefix_len)?; } old_range.start += common_prefix_len; new_range.start += common_prefix_len; // Check for common suffix let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); let common_suffix = ( old_range.end - common_suffix_len, new_range.end - common_suffix_len, ); old_range.end -= common_suffix_len; new_range.end -= common_suffix_len; if is_empty_range(&old_range) && is_empty_range(&new_range) { // Do nothing } else if is_empty_range(&new_range) { d.delete(old_range.start, old_range.len(), new_range.start)?; } else if is_empty_range(&old_range) { d.insert(old_range.start, new_range.start, new_range.len())?; } else if let Some((x_start, y_start)) = find_middle_snake( old, old_range.clone(), new, new_range.clone(), vf, vb, deadline, ) { let (old_a, old_b) = split_at(old_range, x_start); let (new_a, new_b) = split_at(new_range, y_start); conquer(d, old, old_a, new, new_a, vf, vb, deadline)?; conquer(d, old, old_b, new, new_b, vf, vb, deadline)?; } else { d.delete( old_range.start, old_range.end - old_range.start, new_range.start, )?; d.insert( old_range.start, new_range.start, new_range.end - new_range.start, )?; } if common_suffix_len > 0 { d.equal(common_suffix.0, common_suffix.1, common_suffix_len)?; } Ok(()) } #[test] fn test_find_middle_snake() { let a = &b"ABCABBA"[..]; let b = &b"CBABAC"[..]; let max_d = max_d(a.len(), b.len()); let mut vf = V::new(max_d); let mut vb = V::new(max_d); let (x_start, y_start) = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb, None).unwrap(); assert_eq!(x_start, 4); assert_eq!(y_start, 1); } #[test] fn test_diff() { let a: &[usize] = &[0, 1, 2, 3, 4]; let b: &[usize] = &[0, 1, 2, 9, 4]; let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } #[test] fn test_contiguous() { let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } #[test] fn test_pat() { let a: &[usize] = &[0, 1, 3, 4, 5]; let b: &[usize] = &[0, 1, 4, 5, 8, 9]; let mut d = crate::algorithms::Capture::new(); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.ops()); } #[test] fn test_deadline_reached() { use std::ops::Index; use std::time::Duration; let a = (0..100).collect::>(); let mut b = (0..100).collect::>(); b[10] = 99; b[50] = 99; b[25] = 99; struct SlowIndex<'a>(&'a [usize]); impl<'a> Index for SlowIndex<'a> { type Output = usize; fn index(&self, index: usize) -> &Self::Output { std::thread::sleep(Duration::from_millis(1)); &self.0[index] } } let slow_a = SlowIndex(&a); let slow_b = SlowIndex(&b); // don't give it enough time to do anything interesting let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); diff_deadline( &mut d, &slow_a, 0..a.len(), &slow_b, 0..b.len(), Some(Instant::now() + Duration::from_millis(50)), ) .unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } similar-2.2.1/src/algorithms/patience.rs000064400000000000000000000117351046102023000163730ustar 00000000000000//! Patience diff algorithm. //! //! * time: `O(N log N + M log M + (N+M)D)` //! * space: `O(N+M)` //! //! Tends to give more human-readable outputs. See [Bram Cohen's blog //! post](https://bramcohen.livejournal.com/73318.html) describing it. //! //! This is based on the patience implementation of [pijul](https://pijul.org/) //! by Pierre-Étienne Meunier. use std::hash::Hash; use std::ops::{Index, Range}; use std::time::Instant; use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace}; use super::utils::{unique, UniqueItem}; /// Patience diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. pub fn diff( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, Old::Output: Hash + Eq, New::Output: PartialEq + Hash + Eq, D: DiffHook, { diff_deadline(d, old, old_range, new, new_range, None) } /// Patience diff algorithm with deadline. /// /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. /// /// This diff is done with an optional deadline that defines the maximal /// execution time permitted before it bails and falls back to an approximation. pub fn diff_deadline( d: &mut D, old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Result<(), D::Error> where Old: Index + ?Sized, New: Index + ?Sized, Old::Output: Hash + Eq, New::Output: PartialEq + Hash + Eq, D: DiffHook, { let old_indexes = unique(old, old_range.clone()); let new_indexes = unique(new, new_range.clone()); let mut d = Replace::new(Patience { d, old, old_current: old_range.start, old_end: old_range.end, old_indexes: &old_indexes, new, new_current: new_range.start, new_end: new_range.end, new_indexes: &new_indexes, deadline, }); myers::diff_deadline( &mut d, &old_indexes, 0..old_indexes.len(), &new_indexes, 0..new_indexes.len(), deadline, )?; Ok(()) } struct Patience<'old, 'new, 'd, Old: ?Sized, New: ?Sized, D> { d: &'d mut D, old: &'old Old, old_current: usize, old_end: usize, old_indexes: &'old [UniqueItem<'old, Old>], new: &'new New, new_current: usize, new_end: usize, new_indexes: &'new [UniqueItem<'new, New>], deadline: Option, } impl<'old, 'new, 'd, Old, New, D> DiffHook for Patience<'old, 'new, 'd, Old, New, D> where D: DiffHook + 'd, Old: Index + ?Sized + 'old, New: Index + ?Sized + 'new, New::Output: PartialEq, { type Error = D::Error; fn equal(&mut self, old: usize, new: usize, len: usize) -> Result<(), D::Error> { for (old, new) in (old..old + len).zip(new..new + len) { let a0 = self.old_current; let b0 = self.new_current; while self.old_current < self.old_indexes[old].original_index() && self.new_current < self.new_indexes[new].original_index() && self.new[self.new_current] == self.old[self.old_current] { self.old_current += 1; self.new_current += 1; } if self.old_current > a0 { self.d.equal(a0, b0, self.old_current - a0)?; } let mut no_finish_d = NoFinishHook::new(&mut self.d); myers::diff_deadline( &mut no_finish_d, self.old, self.old_current..self.old_indexes[old].original_index(), self.new, self.new_current..self.new_indexes[new].original_index(), self.deadline, )?; self.old_current = self.old_indexes[old].original_index(); self.new_current = self.new_indexes[new].original_index(); } Ok(()) } fn finish(&mut self) -> Result<(), D::Error> { myers::diff_deadline( self.d, self.old, self.old_current..self.old_end, self.new, self.new_current..self.new_end, self.deadline, ) } } #[test] fn test_patience() { let a: &[usize] = &[11, 1, 2, 2, 3, 4, 4, 4, 5, 47, 19]; let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18]; let mut d = Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } #[test] fn test_patience_out_of_bounds_bug() { // this used to be a bug let a: &[usize] = &[1, 2, 3, 4]; let b: &[usize] = &[1, 2, 3]; let mut d = Replace::new(crate::algorithms::Capture::new()); diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops()); } similar-2.2.1/src/algorithms/replace.rs000064400000000000000000000136331046102023000162150ustar 00000000000000use crate::algorithms::DiffHook; /// A [`DiffHook`] that combines deletions and insertions to give blocks /// of maximal length, and replacements when appropriate. /// /// It will replace [`DiffHook::insert`] and [`DiffHook::delete`] events when /// possible with [`DiffHook::replace`] events. Note that even though the /// text processing in the crate does not use replace events and always resolves /// then back to delete and insert, it's useful to always use the replacer to /// ensure a consistent order of inserts and deletes. This is why for instance /// the text diffing automatically uses this hook internally. pub struct Replace { d: D, del: Option<(usize, usize, usize)>, ins: Option<(usize, usize, usize)>, eq: Option<(usize, usize, usize)>, } impl Replace { /// Creates a new replace hook wrapping another hook. pub fn new(d: D) -> Self { Replace { d, del: None, ins: None, eq: None, } } /// Extracts the inner hook. pub fn into_inner(self) -> D { self.d } fn flush_eq(&mut self) -> Result<(), D::Error> { if let Some((eq_old_index, eq_new_index, eq_len)) = self.eq.take() { self.d.equal(eq_old_index, eq_new_index, eq_len)? } Ok(()) } fn flush_del_ins(&mut self) -> Result<(), D::Error> { if let Some((del_old_index, del_old_len, del_new_index)) = self.del.take() { if let Some((_, ins_new_index, ins_new_len)) = self.ins.take() { self.d .replace(del_old_index, del_old_len, ins_new_index, ins_new_len)?; } else { self.d.delete(del_old_index, del_old_len, del_new_index)?; } } else if let Some((ins_old_index, ins_new_index, ins_new_len)) = self.ins.take() { self.d.insert(ins_old_index, ins_new_index, ins_new_len)?; } Ok(()) } } impl AsRef for Replace { fn as_ref(&self) -> &D { &self.d } } impl AsMut for Replace { fn as_mut(&mut self) -> &mut D { &mut self.d } } impl DiffHook for Replace { type Error = D::Error; fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), D::Error> { self.flush_del_ins()?; self.eq = if let Some((eq_old_index, eq_new_index, eq_len)) = self.eq.take() { Some((eq_old_index, eq_new_index, eq_len + len)) } else { Some((old_index, new_index, len)) }; Ok(()) } fn delete( &mut self, old_index: usize, old_len: usize, new_index: usize, ) -> Result<(), D::Error> { self.flush_eq()?; if let Some((del_old_index, del_old_len, del_new_index)) = self.del.take() { debug_assert_eq!(old_index, del_old_index + del_old_len); self.del = Some((del_old_index, del_old_len + old_len, del_new_index)); } else { self.del = Some((old_index, old_len, new_index)); } Ok(()) } fn insert( &mut self, old_index: usize, new_index: usize, new_len: usize, ) -> Result<(), D::Error> { self.flush_eq()?; self.ins = if let Some((ins_old_index, ins_new_index, ins_new_len)) = self.ins.take() { debug_assert_eq!(ins_new_index + ins_new_len, new_index); Some((ins_old_index, ins_new_index, new_len + ins_new_len)) } else { Some((old_index, new_index, new_len)) }; Ok(()) } fn replace( &mut self, old_index: usize, old_len: usize, new_index: usize, new_len: usize, ) -> Result<(), D::Error> { self.flush_eq()?; self.d.replace(old_index, old_len, new_index, new_len) } fn finish(&mut self) -> Result<(), D::Error> { self.flush_eq()?; self.flush_del_ins()?; self.d.finish() } } #[test] fn test_mayers_replace() { use crate::algorithms::{diff_slices, Algorithm}; let a: &[&str] = &[ ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n", "a\n", "b\n", "c\n", "================================\n", "d\n", "e\n", "f\n", "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", ]; let b: &[&str] = &[ ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n", "x\n", "b\n", "c\n", "================================\n", "y\n", "e\n", "f\n", "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n", ]; let mut d = Replace::new(crate::algorithms::Capture::new()); diff_slices(Algorithm::Myers, &mut d, a, b).unwrap(); insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###" [ Equal { old_index: 0, new_index: 0, len: 1, }, Replace { old_index: 1, old_len: 1, new_index: 1, new_len: 1, }, Equal { old_index: 2, new_index: 2, len: 3, }, Replace { old_index: 5, old_len: 1, new_index: 5, new_len: 1, }, Equal { old_index: 6, new_index: 6, len: 3, }, ] "###); } #[test] fn test_replace() { use crate::algorithms::{diff_slices, Algorithm}; let a: &[usize] = &[0, 1, 2, 3, 4]; let b: &[usize] = &[0, 1, 2, 7, 8, 9]; let mut d = Replace::new(crate::algorithms::Capture::new()); diff_slices(Algorithm::Myers, &mut d, a, b).unwrap(); insta::assert_debug_snapshot!(d.into_inner().ops(), @r###" [ Equal { old_index: 0, new_index: 0, len: 3, }, Replace { old_index: 3, old_len: 2, new_index: 3, new_len: 3, }, ] "###); } similar-2.2.1/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping-2.snap000064400000000000000000000015771046102023000320320ustar 00000000000000--- source: src/algorithms/capture.rs expression: tags --- [ [ ( Equal, 7..10, 7..10, ), ( Replace, 10..11, 10..11, ), ( Equal, 11..13, 11..13, ), ( Replace, 13..14, 13..14, ), ( Equal, 14..16, 14..16, ), ( Replace, 16..17, 16..17, ), ( Equal, 17..20, 17..20, ), ], [ ( Equal, 31..34, 31..34, ), ( Replace, 34..35, 34..35, ), ( Equal, 35..38, 35..38, ), ], ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__capture__capture_hook_grouping.snap000064400000000000000000000023001046102023000316540ustar 00000000000000--- source: src/algorithms/capture.rs expression: ops --- [ [ Equal { old_index: 7, new_index: 7, len: 3, }, Replace { old_index: 10, old_len: 1, new_index: 10, new_len: 1, }, Equal { old_index: 11, new_index: 11, len: 2, }, Replace { old_index: 13, old_len: 1, new_index: 13, new_len: 1, }, Equal { old_index: 14, new_index: 14, len: 2, }, Replace { old_index: 16, old_len: 1, new_index: 16, new_len: 1, }, Equal { old_index: 17, new_index: 17, len: 3, }, ], [ Equal { old_index: 31, new_index: 31, len: 3, }, Replace { old_index: 34, old_len: 1, new_index: 34, new_len: 1, }, Equal { old_index: 35, new_index: 35, len: 3, }, ], ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap000064400000000000000000000006731046102023000266070ustar 00000000000000--- source: src/algorithms/lcs.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 3, }, Replace { old_index: 3, old_len: 2, new_index: 3, new_len: 2, }, Equal { old_index: 5, new_index: 5, len: 2, }, Replace { old_index: 7, old_len: 1, new_index: 7, new_len: 1, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap000064400000000000000000000005221046102023000253110ustar 00000000000000--- source: src/algorithms/lcs.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 3, }, Replace { old_index: 3, old_len: 1, new_index: 3, new_len: 1, }, Equal { old_index: 4, new_index: 4, len: 1, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap000064400000000000000000000007231046102023000251700ustar 00000000000000--- source: src/algorithms/lcs.rs expression: d.ops() --- [ Equal { old_index: 0, new_index: 0, len: 2, }, Delete { old_index: 2, old_len: 1, new_index: 2, }, Equal { old_index: 3, new_index: 2, len: 1, }, Equal { old_index: 4, new_index: 3, len: 1, }, Insert { old_index: 5, new_index: 4, new_len: 2, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__lcs__same.snap000064400000000000000000000002411046102023000253240ustar 00000000000000--- source: src/algorithms/lcs.rs assertion_line: 235 expression: d.ops() --- [ Equal { old_index: 0, new_index: 0, len: 8, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__myers__contiguous.snap000064400000000000000000000006751046102023000271670ustar 00000000000000--- source: src/algorithms/myers.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 3, }, Replace { old_index: 3, old_len: 1, new_index: 3, new_len: 2, }, Equal { old_index: 4, new_index: 5, len: 2, }, Replace { old_index: 6, old_len: 2, new_index: 7, new_len: 1, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__myers__deadline_reached.snap000064400000000000000000000005341046102023000302020ustar 00000000000000--- source: src/algorithms/myers.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 10, }, Replace { old_index: 10, old_len: 41, new_index: 10, new_len: 41, }, Equal { old_index: 51, new_index: 51, len: 49, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__myers__diff.snap000064400000000000000000000005241046102023000256710ustar 00000000000000--- source: src/algorithms/myers.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 3, }, Replace { old_index: 3, old_len: 1, new_index: 3, new_len: 1, }, Equal { old_index: 4, new_index: 4, len: 1, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__myers__pat.snap000064400000000000000000000007321046102023000255460ustar 00000000000000--- source: src/algorithms/myers.rs expression: d.ops() --- [ Equal { old_index: 0, new_index: 0, len: 2, }, Delete { old_index: 2, old_len: 1, new_index: 2, }, Equal { old_index: 3, new_index: 2, len: 2, }, Insert { old_index: 5, new_index: 4, new_len: 1, }, Insert { old_index: 5, new_index: 5, new_len: 1, }, ] similar-2.2.1/src/algorithms/snapshots/similar__algorithms__patience__patience.snap000064400000000000000000000013431046102023000272020ustar 00000000000000--- source: src/algorithms/patience.rs expression: d.into_inner().ops() --- [ Replace { old_index: 0, old_len: 1, new_index: 0, new_len: 1, }, Equal { old_index: 1, new_index: 1, len: 3, }, Replace { old_index: 4, old_len: 1, new_index: 4, new_len: 2, }, Equal { old_index: 5, new_index: 6, len: 2, }, Replace { old_index: 7, old_len: 2, new_index: 8, new_len: 1, }, Equal { old_index: 9, new_index: 9, len: 1, }, Replace { old_index: 10, old_len: 1, new_index: 10, new_len: 1, }, ] ././@LongLink00006440000000000000000000000146000000000000007774Lustar similar-2.2.1/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.snapsimilar-2.2.1/src/algorithms/snapshots/similar__algorithms__patience__patience_out_of_bounds_bug.sna000064400000000000000000000003631046102023000326050ustar 00000000000000--- source: src/algorithms/patience.rs expression: d.into_inner().ops() --- [ Equal { old_index: 0, new_index: 0, len: 3, }, Delete { old_index: 3, old_len: 1, new_index: 3, }, ] similar-2.2.1/src/algorithms/utils.rs000064400000000000000000000241601046102023000157370ustar 00000000000000use std::collections::hash_map::Entry; use std::collections::HashMap; use std::fmt::Debug; use std::hash::{Hash, Hasher}; use std::ops::{Add, Index, Range}; /// Utility function to check if a range is empty that works on older rust versions #[inline(always)] #[allow(clippy::neg_cmp_op_on_partial_ord)] pub fn is_empty_range>(range: &Range) -> bool { !(range.start < range.end) } /// Represents an item in the vector returned by [`unique`]. /// /// It compares like the underlying item does it was created from but /// carries the index it was originally created from. pub struct UniqueItem<'a, Idx: ?Sized> { lookup: &'a Idx, index: usize, } impl<'a, Idx: ?Sized> UniqueItem<'a, Idx> where Idx: Index, { /// Returns the value. #[inline(always)] pub fn value(&self) -> &Idx::Output { &self.lookup[self.index] } /// Returns the original index. #[inline(always)] pub fn original_index(&self) -> usize { self.index } } impl<'a, Idx: Index + 'a> Debug for UniqueItem<'a, Idx> where Idx::Output: Debug, { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("UniqueItem") .field("value", &self.value()) .field("original_index", &self.original_index()) .finish() } } impl<'a, 'b, A, B> PartialEq> for UniqueItem<'b, B> where A: Index + 'b + ?Sized, B: Index + 'b + ?Sized, B::Output: PartialEq, { #[inline(always)] fn eq(&self, other: &UniqueItem<'a, A>) -> bool { self.value() == other.value() } } /// Returns only unique items in the sequence as vector. /// /// Each item is wrapped in a [`UniqueItem`] so that both the value and the /// index can be extracted. pub fn unique(lookup: &Idx, range: Range) -> Vec> where Idx: Index + ?Sized, Idx::Output: Hash + Eq, { let mut by_item = HashMap::new(); for index in range { match by_item.entry(&lookup[index]) { Entry::Vacant(entry) => { entry.insert(Some(index)); } Entry::Occupied(mut entry) => { let entry = entry.get_mut(); if entry.is_some() { *entry = None } } } } let mut rv = by_item .into_iter() .filter_map(|(_, x)| x) .map(|index| UniqueItem { lookup, index }) .collect::>(); rv.sort_by_key(|a| a.original_index()); rv } /// Given two lookups and ranges calculates the length of the common prefix. pub fn common_prefix_len( old: &Old, old_range: Range, new: &New, new_range: Range, ) -> usize where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { if is_empty_range(&old_range) || is_empty_range(&new_range) { return 0; } new_range .zip(old_range) .take_while( #[inline(always)] |x| new[x.0] == old[x.1], ) .count() } /// Given two lookups and ranges calculates the length of common suffix. pub fn common_suffix_len( old: &Old, old_range: Range, new: &New, new_range: Range, ) -> usize where Old: Index + ?Sized, New: Index + ?Sized, New::Output: PartialEq, { if is_empty_range(&old_range) || is_empty_range(&new_range) { return 0; } new_range .rev() .zip(old_range.rev()) .take_while( #[inline(always)] |x| new[x.0] == old[x.1], ) .count() } struct OffsetLookup { offset: usize, vec: Vec, } impl Index for OffsetLookup { type Output = Int; #[inline(always)] fn index(&self, index: usize) -> &Self::Output { &self.vec[index - self.offset] } } /// A utility struct to convert distinct items to unique integers. /// /// This can be helpful on larger inputs to speed up the comparisons /// performed by doing a first pass where the data set gets reduced /// to (small) integers. /// /// The idea is that instead of passing two sequences to a diffling algorithm /// you first pass it via [`IdentifyDistinct`]: /// /// ```rust /// use similar::capture_diff; /// use similar::algorithms::{Algorithm, IdentifyDistinct}; /// /// let old = &["foo", "bar", "baz"][..]; /// let new = &["foo", "blah", "baz"][..]; /// let h = IdentifyDistinct::::new(old, 0..old.len(), new, 0..new.len()); /// let ops = capture_diff( /// Algorithm::Myers, /// h.old_lookup(), /// h.old_range(), /// h.new_lookup(), /// h.new_range(), /// ); /// ``` /// /// The indexes are the same as with the passed source ranges. pub struct IdentifyDistinct { old: OffsetLookup, new: OffsetLookup, } impl IdentifyDistinct where Int: Add + From + Default + Copy, { /// Creates an int hasher for two sequences. pub fn new( old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Self where Old: Index + ?Sized, Old::Output: Eq + Hash, New: Index + ?Sized, New::Output: Eq + Hash + PartialEq, { enum Key<'old, 'new, Old: ?Sized, New: ?Sized> { Old(&'old Old), New(&'new New), } impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New> where Old: Hash + ?Sized, New: Hash + ?Sized, { fn hash(&self, state: &mut H) { match *self { Key::Old(val) => val.hash(state), Key::New(val) => val.hash(state), } } } impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New> where Old: Eq + ?Sized, New: Eq + PartialEq + ?Sized, { #[inline(always)] fn eq(&self, other: &Self) -> bool { match (self, other) { (Key::Old(a), Key::Old(b)) => a == b, (Key::New(a), Key::New(b)) => a == b, (Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a, } } } impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New> where Old: Eq + ?Sized, New: Eq + PartialEq + ?Sized, { } let mut map = HashMap::new(); let mut old_seq = Vec::new(); let mut new_seq = Vec::new(); let mut next_id = Int::default(); let step = Int::from(1); let old_start = old_range.start; let new_start = new_range.start; for idx in old_range { let item = Key::Old(&old[idx]); let id = match map.entry(item) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let id = next_id; next_id = next_id + step; *v.insert(id) } }; old_seq.push(id); } for idx in new_range { let item = Key::New(&new[idx]); let id = match map.entry(item) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { let id = next_id; next_id = next_id + step; *v.insert(id) } }; new_seq.push(id); } IdentifyDistinct { old: OffsetLookup { offset: old_start, vec: old_seq, }, new: OffsetLookup { offset: new_start, vec: new_seq, }, } } /// Returns a lookup for the old side. pub fn old_lookup(&self) -> &impl Index { &self.old } /// Returns a lookup for the new side. pub fn new_lookup(&self) -> &impl Index { &self.new } /// Convenience method to get back the old range. pub fn old_range(&self) -> Range { self.old.offset..self.old.offset + self.old.vec.len() } /// Convenience method to get back the new range. pub fn new_range(&self) -> Range { self.new.offset..self.new.offset + self.new.vec.len() } } #[test] fn test_unique() { let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6) .into_iter() .map(|x| (*x.value(), x.original_index())) .collect::>(); assert_eq!(u, vec![('a', 0), ('c', 2)]); } #[test] fn test_int_hasher() { let ih = IdentifyDistinct::::new( &["", "foo", "bar", "baz"][..], 1..4, &["", "foo", "blah", "baz"][..], 1..4, ); assert_eq!(ih.old_lookup()[1], 0); assert_eq!(ih.old_lookup()[2], 1); assert_eq!(ih.old_lookup()[3], 2); assert_eq!(ih.new_lookup()[1], 0); assert_eq!(ih.new_lookup()[2], 3); assert_eq!(ih.new_lookup()[3], 2); assert_eq!(ih.old_range(), 1..4); assert_eq!(ih.new_range(), 1..4); } #[test] fn test_common_prefix_len() { assert_eq!( common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0), 0 ); assert_eq!( common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10), 7 ); assert_eq!( common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9), 0 ); assert_eq!( common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10), 4 ); } #[test] fn test_common_suffix_len() { assert_eq!( common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0), 0 ); assert_eq!( common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8), 4 ); assert_eq!( common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4), 0 ); assert_eq!( common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5), 2 ); } similar-2.2.1/src/common.rs000064400000000000000000000121701046102023000137140ustar 00000000000000use std::hash::Hash; use std::ops::{Index, Range}; use std::time::Instant; use crate::algorithms::{diff_deadline, Capture, Compact, Replace}; use crate::{Algorithm, DiffOp}; /// Creates a diff between old and new with the given algorithm capturing the ops. /// /// This is like [`diff`](crate::algorithms::diff) but instead of using an /// arbitrary hook this will always use [`Compact`] + [`Replace`] + [`Capture`] /// and return the captured [`DiffOp`]s. pub fn capture_diff( alg: Algorithm, old: &Old, old_range: Range, new: &New, new_range: Range, ) -> Vec where Old: Index + ?Sized, New: Index + ?Sized, Old::Output: Hash + Eq + Ord, New::Output: PartialEq + Hash + Eq + Ord, { capture_diff_deadline(alg, old, old_range, new, new_range, None) } /// Creates a diff between old and new with the given algorithm capturing the ops. /// /// Works like [`capture_diff`] but with an optional deadline. pub fn capture_diff_deadline( alg: Algorithm, old: &Old, old_range: Range, new: &New, new_range: Range, deadline: Option, ) -> Vec where Old: Index + ?Sized, New: Index + ?Sized, Old::Output: Hash + Eq + Ord, New::Output: PartialEq + Hash + Eq + Ord, { let mut d = Compact::new(Replace::new(Capture::new()), old, new); diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap(); d.into_inner().into_inner().into_ops() } /// Creates a diff between old and new with the given algorithm capturing the ops. pub fn capture_diff_slices(alg: Algorithm, old: &[T], new: &[T]) -> Vec where T: Eq + Hash + Ord, { capture_diff_slices_deadline(alg, old, new, None) } /// Creates a diff between old and new with the given algorithm capturing the ops. /// /// Works like [`capture_diff_slices`] but with an optional deadline. pub fn capture_diff_slices_deadline( alg: Algorithm, old: &[T], new: &[T], deadline: Option, ) -> Vec where T: Eq + Hash + Ord, { capture_diff_deadline(alg, old, 0..old.len(), new, 0..new.len(), deadline) } /// Return a measure of similarity in the range `0..=1`. /// /// A ratio of `1.0` means the two sequences are a complete match, a /// ratio of `0.0` would indicate completely distinct sequences. The input /// is the sequence of diff operations and the length of the old and new /// sequence. pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 { let matches = ops .iter() .map(|op| { if let DiffOp::Equal { len, .. } = *op { len } else { 0 } }) .sum::(); let len = old_len + new_len; if len == 0 { 1.0 } else { 2.0 * matches as f32 / len as f32 } } /// Isolate change clusters by eliminating ranges with no changes. /// /// This will leave holes behind in long periods of equal ranges so that /// you can build things like unified diffs. pub fn group_diff_ops(mut ops: Vec, n: usize) -> Vec> { if ops.is_empty() { return vec![]; } let mut pending_group = Vec::new(); let mut rv = Vec::new(); if let Some(DiffOp::Equal { old_index, new_index, len, }) = ops.first_mut() { let offset = (*len).saturating_sub(n); *old_index += offset; *new_index += offset; *len -= offset; } if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() { *len -= (*len).saturating_sub(n); } for op in ops.into_iter() { if let DiffOp::Equal { old_index, new_index, len, } = op { // End the current group and start a new one whenever // there is a large range with no changes. if len > n * 2 { pending_group.push(DiffOp::Equal { old_index, new_index, len: n, }); rv.push(pending_group); let offset = len.saturating_sub(n); pending_group = vec![DiffOp::Equal { old_index: old_index + offset, new_index: new_index + offset, len: len - offset, }]; continue; } } pending_group.push(op); } match &pending_group[..] { &[] | &[DiffOp::Equal { .. }] => {} _ => rv.push(pending_group), } rv } #[test] fn test_non_string_iter_change() { use crate::ChangeTag; let old = vec![1, 2, 3]; let new = vec![1, 2, 4]; let ops = capture_diff_slices(Algorithm::Myers, &old, &new); let changes: Vec<_> = ops .iter() .flat_map(|x| x.iter_changes(&old, &new)) .map(|x| (x.tag(), x.value())) .collect(); assert_eq!( changes, vec![ (ChangeTag::Equal, 1), (ChangeTag::Equal, 2), (ChangeTag::Delete, 3), (ChangeTag::Insert, 4), ] ); } similar-2.2.1/src/iter.rs000064400000000000000000000140611046102023000133700ustar 00000000000000//! The various iterators this crate provides. //! //! These iterators are not a very stable interface and you really should //! avoid considering them to be concrete types. A lot of the iterators in //! this crate use `impl Iterator` for this reason but restrictions in the //! language don't allow this to be used in all places on the versions of //! rust this crate wants to compile for. use std::marker::PhantomData; use std::ops::{Index, Range}; use crate::{Change, ChangeTag, DiffOp, DiffTag}; /// Iterator for [`DiffOp::iter_changes`]. pub struct ChangesIter<'lookup, Old: ?Sized, New: ?Sized, T> { old: &'lookup Old, new: &'lookup New, old_range: Range, new_range: Range, old_index: usize, new_index: usize, old_i: usize, new_i: usize, tag: DiffTag, _marker: PhantomData, } impl<'lookup, Old, New, T> ChangesIter<'lookup, Old, New, T> where Old: Index + ?Sized, New: Index + ?Sized, { pub(crate) fn new(old: &'lookup Old, new: &'lookup New, op: DiffOp) -> Self { let (tag, old_range, new_range) = op.as_tag_tuple(); let old_index = old_range.start; let new_index = new_range.start; let old_i = old_range.start; let new_i = new_range.start; ChangesIter { old, new, old_range, new_range, old_index, new_index, old_i, new_i, tag, _marker: PhantomData, } } } impl<'lookup, Old, New, T> Iterator for ChangesIter<'lookup, Old, New, T> where Old: Index + ?Sized, New: Index + ?Sized, T: Clone, { type Item = Change; fn next(&mut self) -> Option { match self.tag { DiffTag::Equal => { if self.old_i < self.old_range.end { let value = self.old[self.old_i].clone(); self.old_i += 1; self.old_index += 1; self.new_index += 1; Some(Change { tag: ChangeTag::Equal, old_index: Some(self.old_index - 1), new_index: Some(self.new_index - 1), value, }) } else { None } } DiffTag::Delete => { if self.old_i < self.old_range.end { let value = self.old[self.old_i].clone(); self.old_i += 1; self.old_index += 1; Some(Change { tag: ChangeTag::Delete, old_index: Some(self.old_index - 1), new_index: None, value, }) } else { None } } DiffTag::Insert => { if self.new_i < self.new_range.end { let value = self.new[self.new_i].clone(); self.new_i += 1; self.new_index += 1; Some(Change { tag: ChangeTag::Insert, old_index: None, new_index: Some(self.new_index - 1), value, }) } else { None } } DiffTag::Replace => { if self.old_i < self.old_range.end { let value = self.old[self.old_i].clone(); self.old_i += 1; self.old_index += 1; Some(Change { tag: ChangeTag::Delete, old_index: Some(self.old_index - 1), new_index: None, value, }) } else if self.new_i < self.new_range.end { let value = self.new[self.new_i].clone(); self.new_i += 1; self.new_index += 1; Some(Change { tag: ChangeTag::Insert, old_index: None, new_index: Some(self.new_index - 1), value, }) } else { None } } } } } #[cfg(feature = "text")] mod text { use super::*; /// Iterator for [`TextDiff::iter_all_changes`](crate::TextDiff::iter_all_changes). pub struct AllChangesIter<'slf, 'data, T: ?Sized> { old: &'slf [&'data T], new: &'slf [&'data T], ops: &'slf [DiffOp], current_iter: Option>, } impl<'slf, 'data, T> AllChangesIter<'slf, 'data, T> where T: 'data + ?Sized + PartialEq, { pub(crate) fn new( old: &'slf [&'data T], new: &'slf [&'data T], ops: &'slf [DiffOp], ) -> Self { AllChangesIter { old, new, ops, current_iter: None, } } } impl<'slf, 'data, T> Iterator for AllChangesIter<'slf, 'data, T> where T: PartialEq + 'data + ?Sized, 'data: 'slf, { type Item = Change<&'data T>; fn next(&mut self) -> Option { loop { if let Some(ref mut iter) = self.current_iter { if let Some(rv) = iter.next() { return Some(rv); } self.current_iter.take(); } if let Some((&first, rest)) = self.ops.split_first() { self.current_iter = Some(ChangesIter::new(self.old, self.new, first)); self.ops = rest; } else { return None; } } } } } #[cfg(feature = "text")] pub use self::text::*; similar-2.2.1/src/lib.rs000064400000000000000000000162351046102023000132000ustar 00000000000000//! This crate implements diffing utilities. It attempts to provide an abstraction //! interface over different types of diffing algorithms. The design of the //! library is inspired by pijul's diff library by Pierre-Étienne Meunier and //! also inherits the patience diff algorithm from there. //! //! The API of the crate is split into high and low level functionality. Most //! of what you probably want to use is available top level. Additionally the //! following sub modules exist: //! //! * [`algorithms`]: This implements the different types of diffing algorithms. //! It provides both low level access to the algorithms with the minimal //! trait bounds necessary, as well as a generic interface. //! * [`udiff`]: Unified diff functionality. //! * [`utils`]: utilities for common diff related operations. This module //! provides additional diffing functions for working with text diffs. //! //! # Sequence Diffing //! //! If you want to diff sequences generally indexable things you can use the //! [`capture_diff`] and [`capture_diff_slices`] functions. They will directly //! diff an indexable object or slice and return a vector of [`DiffOp`] objects. //! //! ```rust //! use similar::{Algorithm, capture_diff_slices}; //! //! let a = vec![1, 2, 3, 4, 5]; //! let b = vec![1, 2, 3, 4, 7]; //! let ops = capture_diff_slices(Algorithm::Myers, &a, &b); //! ``` //! //! # Text Diffing //! //! Similar provides helpful utilities for text (and more specifically line) diff //! operations. The main type you want to work with is [`TextDiff`] which //! uses the underlying diff algorithms to expose a convenient API to work with //! texts: //! //! ```rust //! # #[cfg(feature = "text")] { //! use similar::{ChangeTag, TextDiff}; //! //! let diff = TextDiff::from_lines( //! "Hello World\nThis is the second line.\nThis is the third.", //! "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", //! ); //! //! for change in diff.iter_all_changes() { //! let sign = match change.tag() { //! ChangeTag::Delete => "-", //! ChangeTag::Insert => "+", //! ChangeTag::Equal => " ", //! }; //! print!("{}{}", sign, change); //! } //! # } //! ``` //! //! ## Trailing Newlines //! //! When working with line diffs (and unified diffs in general) there are two //! "philosophies" to look at lines. One is to diff lines without their newline //! character, the other is to diff with the newline character. Typically the //! latter is done because text files do not _have_ to end in a newline character. //! As a result there is a difference between `foo\n` and `foo` as far as diffs //! are concerned. //! //! In similar this is handled on the [`Change`] or [`InlineChange`] level. If //! a diff was created via [`TextDiff::from_lines`] the text diffing system is //! instructed to check if there are missing newlines encountered //! ([`TextDiff::newline_terminated`] returns true). //! //! In any case the [`Change`] object has a convenience method called //! [`Change::missing_newline`] which returns `true` if the change is missing //! a trailing newline. Armed with that information the caller knows to handle //! this by either rendering a virtual newline at that position or to indicate //! it in different ways. For instance the unified diff code will render the //! special `\ No newline at end of file` marker. //! //! ## Bytes vs Unicode //! //! Similar module concerns itself with a loser definition of "text" than you would //! normally see in Rust. While by default it can only operate on [`str`] types //! by enabling the `bytes` feature it gains support for byte slices with some //! caveats. //! //! A lot of text diff functionality assumes that what is being diffed constitutes //! text, but in the real world it can often be challenging to ensure that this is //! all valid utf-8. Because of this the crate is built so that most functionality //! also still works with bytes for as long as they are roughly ASCII compatible. //! //! This means you will be successful in creating a unified diff from latin1 //! encoded bytes but if you try to do the same with EBCDIC encoded bytes you //! will only get garbage. //! //! # Ops vs Changes //! //! Because very commonly two compared sequences will largely match this module //! splits it's functionality into two layers: //! //! Changes are encoded as [diff operations](crate::DiffOp). These are //! ranges of the differences by index in the source sequence. Because this //! can be cumbersome to work with a separate method [`DiffOp::iter_changes`] //! (and [`TextDiff::iter_changes`] when working with text diffs) is provided //! which expands all the changes on an item by item level encoded in an operation. //! //! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes //! this even works for very long files if paired with this method. //! //! # Deadlines and Performance //! //! For large and very distinct inputs the algorithms as implemented can take //! a very, very long time to execute. Too long to make sense in practice. //! To work around this issue all diffing algorithms also provide a version //! that accepts a deadline which is the point in time as defined by an //! [`Instant`](std::time::Instant) after which the algorithm should give up. //! What giving up means depends on the algorithm. For instance due to the //! recursive, divide and conquer nature of Myer's diff you will still get a //! pretty decent diff in many cases when a deadline is reached. Whereas on the //! other hand the LCS diff is unlikely to give any decent results in such a //! situation. //! //! The [`TextDiff`] type also lets you configure a deadline and/or timeout //! when performing a text diff. //! //! # Feature Flags //! //! The crate by default does not have any dependencies however for some use //! cases it's useful to pull in extra functionality. Likewise you can turn //! off some functionality. //! //! * `text`: this feature is enabled by default and enables the text based //! diffing types such as [`TextDiff`]. //! If the crate is used without default features it's removed. //! * `unicode`: when this feature is enabled the text diffing functionality //! gains the ability to diff on a grapheme instead of character level. This //! is particularly useful when working with text containing emojis. This //! pulls in some relatively complex dependencies for working with the unicode //! database. //! * `bytes`: this feature adds support for working with byte slices in text //! APIs in addition to unicode strings. This pulls in the //! [`bstr`] dependency. //! * `inline`: this feature gives access to additional functionality of the //! text diffing to provide inline information about which values changed //! in a line diff. This currently also enables the `unicode` feature. //! * `serde`: this feature enables serialization to some types in this //! crate. For enums without payload deserialization is then also supported. #![warn(missing_docs)] pub mod algorithms; pub mod iter; #[cfg(feature = "text")] pub mod udiff; #[cfg(feature = "text")] pub mod utils; mod common; #[cfg(feature = "text")] mod text; mod types; pub use self::common::*; #[cfg(feature = "text")] pub use self::text::*; pub use self::types::*; similar-2.2.1/src/snapshots/similar__udiff__unified_diff.snap000064400000000000000000000003221046102023000225650ustar 00000000000000--- source: src/udiff.rs expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" --- --- a.txt +++ b.txt @@ -16,7 +16,7 @@ p q r -s +S t u v @@ -38,7 +38,7 @@ L M N -O +o P Q R similar-2.2.1/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap000064400000000000000000000003451046102023000254740ustar 00000000000000--- source: src/udiff.rs expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n \"b.txt\").to_string()" --- --- a.txt +++ b.txt @@ -1 +1 @@ -a +b similar-2.2.1/src/snapshots/similar__udiff__unified_diff_newline_hint.snap000064400000000000000000000002541046102023000253340ustar 00000000000000--- source: src/udiff.rs expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" --- --- a.txt +++ b.txt @@ -1 +1 @@ -a +b \ No newline at end of file similar-2.2.1/src/text/abstraction.rs000064400000000000000000000277551046102023000157400ustar 00000000000000use std::borrow::Cow; use std::hash::Hash; use std::ops::Range; /// Reference to a [`DiffableStr`]. /// /// This type exists because while the library only really provides ways to /// work with `&str` and `&[u8]` there are types that deref into those string /// slices such as `String` and `Vec`. /// /// This trait is used in the library whenever it's nice to be able to pass /// strings of different types in. /// /// Requires the `text` feature. pub trait DiffableStrRef { /// The type of the resolved [`DiffableStr`]. type Output: DiffableStr + ?Sized; /// Resolves the reference. fn as_diffable_str(&self) -> &Self::Output; } impl DiffableStrRef for T { type Output = T; fn as_diffable_str(&self) -> &T { self } } impl DiffableStrRef for String { type Output = str; fn as_diffable_str(&self) -> &str { self.as_str() } } impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> { type Output = T; fn as_diffable_str(&self) -> &T { self } } /// All supported diffable strings. /// /// The text module can work with different types of strings depending /// on how the crate is compiled. Out of the box `&str` is always supported /// but with the `bytes` feature one can also work with `[u8]` slices for /// as long as they are ASCII compatible. /// /// Requires the `text` feature. pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned { /// Splits the value into newlines with newlines attached. fn tokenize_lines(&self) -> Vec<&Self>; /// Splits the value into newlines with newlines separated. fn tokenize_lines_and_newlines(&self) -> Vec<&Self>; /// Tokenizes into words. fn tokenize_words(&self) -> Vec<&Self>; /// Tokenizes the input into characters. fn tokenize_chars(&self) -> Vec<&Self>; /// Tokenizes into unicode words. #[cfg(feature = "unicode")] fn tokenize_unicode_words(&self) -> Vec<&Self>; /// Tokenizes into unicode graphemes. #[cfg(feature = "unicode")] fn tokenize_graphemes(&self) -> Vec<&Self>; /// Decodes the string (potentially) lossy. fn as_str(&self) -> Option<&str>; /// Decodes the string (potentially) lossy. fn to_string_lossy(&self) -> Cow<'_, str>; /// Checks if the string ends in a newline. fn ends_with_newline(&self) -> bool; /// The length of the string. fn len(&self) -> usize; /// Slices the string. fn slice(&self, rng: Range) -> &Self; /// Returns the string as slice of raw bytes. fn as_bytes(&self) -> &[u8]; /// Checks if the string is empty. fn is_empty(&self) -> bool { self.len() == 0 } } impl DiffableStr for str { fn tokenize_lines(&self) -> Vec<&Self> { let mut iter = self.char_indices().peekable(); let mut last_pos = 0; let mut lines = vec![]; while let Some((idx, c)) = iter.next() { if c == '\r' { if iter.peek().map_or(false, |x| x.1 == '\n') { lines.push(&self[last_pos..=idx + 1]); iter.next(); last_pos = idx + 2; } else { lines.push(&self[last_pos..=idx]); last_pos = idx + 1; } } else if c == '\n' { lines.push(&self[last_pos..=idx]); last_pos = idx + 1; } } if last_pos < self.len() { lines.push(&self[last_pos..]); } lines } fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { let mut rv = vec![]; let mut iter = self.char_indices().peekable(); while let Some((idx, c)) = iter.next() { let is_newline = c == '\r' || c == '\n'; let start = idx; let mut end = idx + c.len_utf8(); while let Some(&(_, next_char)) = iter.peek() { if (next_char == '\r' || next_char == '\n') != is_newline { break; } iter.next(); end += next_char.len_utf8(); } rv.push(&self[start..end]); } rv } fn tokenize_words(&self) -> Vec<&Self> { let mut iter = self.char_indices().peekable(); let mut rv = vec![]; while let Some((idx, c)) = iter.next() { let is_whitespace = c.is_whitespace(); let start = idx; let mut end = idx + c.len_utf8(); while let Some(&(_, next_char)) = iter.peek() { if next_char.is_whitespace() != is_whitespace { break; } iter.next(); end += next_char.len_utf8(); } rv.push(&self[start..end]); } rv } fn tokenize_chars(&self) -> Vec<&Self> { self.char_indices() .map(move |(i, c)| &self[i..i + c.len_utf8()]) .collect() } #[cfg(feature = "unicode")] fn tokenize_unicode_words(&self) -> Vec<&Self> { unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect() } #[cfg(feature = "unicode")] fn tokenize_graphemes(&self) -> Vec<&Self> { unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect() } fn as_str(&self) -> Option<&str> { Some(self) } fn to_string_lossy(&self) -> Cow<'_, str> { Cow::Borrowed(self) } fn ends_with_newline(&self) -> bool { self.ends_with(&['\r', '\n'][..]) } fn len(&self) -> usize { str::len(self) } fn slice(&self, rng: Range) -> &Self { &self[rng] } fn as_bytes(&self) -> &[u8] { str::as_bytes(self) } } #[cfg(feature = "bytes")] mod bytes_support { use super::*; use bstr::ByteSlice; impl DiffableStrRef for Vec { type Output = [u8]; fn as_diffable_str(&self) -> &[u8] { self.as_slice() } } /// Allows viewing ASCII compatible byte slices as strings. /// /// Requires the `bytes` feature. impl DiffableStr for [u8] { fn tokenize_lines(&self) -> Vec<&Self> { let mut iter = self.char_indices().peekable(); let mut last_pos = 0; let mut lines = vec![]; while let Some((_, end, c)) = iter.next() { if c == '\r' { if iter.peek().map_or(false, |x| x.2 == '\n') { lines.push(&self[last_pos..end + 1]); iter.next(); last_pos = end + 1; } else { lines.push(&self[last_pos..end]); last_pos = end; } } else if c == '\n' { lines.push(&self[last_pos..end]); last_pos = end; } } if last_pos < self.len() { lines.push(&self[last_pos..]); } lines } fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { let mut rv = vec![]; let mut iter = self.char_indices().peekable(); while let Some((start, mut end, c)) = iter.next() { let is_newline = c == '\r' || c == '\n'; while let Some(&(_, new_end, next_char)) = iter.peek() { if (next_char == '\r' || next_char == '\n') != is_newline { break; } iter.next(); end = new_end; } rv.push(&self[start..end]); } rv } fn tokenize_words(&self) -> Vec<&Self> { let mut iter = self.char_indices().peekable(); let mut rv = vec![]; while let Some((start, mut end, c)) = iter.next() { let is_whitespace = c.is_whitespace(); while let Some(&(_, new_end, next_char)) = iter.peek() { if next_char.is_whitespace() != is_whitespace { break; } iter.next(); end = new_end; } rv.push(&self[start..end]); } rv } #[cfg(feature = "unicode")] fn tokenize_unicode_words(&self) -> Vec<&Self> { self.words_with_breaks().map(|x| x.as_bytes()).collect() } #[cfg(feature = "unicode")] fn tokenize_graphemes(&self) -> Vec<&Self> { self.graphemes().map(|x| x.as_bytes()).collect() } fn tokenize_chars(&self) -> Vec<&Self> { self.char_indices() .map(move |(start, end, _)| &self[start..end]) .collect() } fn as_str(&self) -> Option<&str> { std::str::from_utf8(self).ok() } fn to_string_lossy(&self) -> Cow<'_, str> { String::from_utf8_lossy(self) } fn ends_with_newline(&self) -> bool { if let Some(b'\r') | Some(b'\n') = self.last_byte() { true } else { false } } fn len(&self) -> usize { <[u8]>::len(self) } fn slice(&self, rng: Range) -> &Self { &self[rng] } fn as_bytes(&self) -> &[u8] { self } } } #[test] fn test_split_lines() { assert_eq!( DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast"), vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"] ); assert_eq!(DiffableStr::tokenize_lines("\n\n"), vec!["\n", "\n"]); assert_eq!(DiffableStr::tokenize_lines("\n"), vec!["\n"]); assert!(DiffableStr::tokenize_lines("").is_empty()); } #[test] fn test_split_words() { assert_eq!( DiffableStr::tokenize_words("foo bar baz\n\n aha"), ["foo", " ", "bar", " ", "baz", "\n\n ", "aha"] ); } #[test] fn test_split_chars() { assert_eq!( DiffableStr::tokenize_chars("abcfö❄️"), vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"] ); } #[test] #[cfg(feature = "unicode")] fn test_split_graphemes() { assert_eq!( DiffableStr::tokenize_graphemes("abcfö❄️"), vec!["a", "b", "c", "f", "ö", "❄️"] ); } #[test] #[cfg(feature = "bytes")] fn test_split_lines_bytes() { assert_eq!( DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()), vec![ "first\n".as_bytes(), "second\r".as_bytes(), "third\r\n".as_bytes(), "fourth\n".as_bytes(), "last".as_bytes() ] ); assert_eq!( DiffableStr::tokenize_lines("\n\n".as_bytes()), vec!["\n".as_bytes(), "\n".as_bytes()] ); assert_eq!( DiffableStr::tokenize_lines("\n".as_bytes()), vec!["\n".as_bytes()] ); assert!(DiffableStr::tokenize_lines("".as_bytes()).is_empty()); } #[test] #[cfg(feature = "bytes")] fn test_split_words_bytes() { assert_eq!( DiffableStr::tokenize_words("foo bar baz\n\n aha".as_bytes()), [ &b"foo"[..], &b" "[..], &b"bar"[..], &b" "[..], &b"baz"[..], &b"\n\n "[..], &b"aha"[..] ] ); } #[test] #[cfg(feature = "bytes")] fn test_split_chars_bytes() { assert_eq!( DiffableStr::tokenize_chars("abcfö❄️".as_bytes()), vec![ &b"a"[..], &b"b"[..], &b"c"[..], &b"f"[..], "ö".as_bytes(), "❄".as_bytes(), "\u{fe0f}".as_bytes() ] ); } #[test] #[cfg(all(feature = "bytes", feature = "unicode"))] fn test_split_graphemes_bytes() { assert_eq!( DiffableStr::tokenize_graphemes("abcfö❄️".as_bytes()), vec![ &b"a"[..], &b"b"[..], &b"c"[..], &b"f"[..], "ö".as_bytes(), "❄️".as_bytes() ] ); } similar-2.2.1/src/text/inline.rs000064400000000000000000000251461046102023000146750ustar 00000000000000#![cfg(feature = "inline")] use std::borrow::Cow; use std::fmt; use crate::text::{DiffableStr, TextDiff}; use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; use crate::{capture_diff_deadline, get_diff_ratio}; use std::ops::Index; use std::time::{Duration, Instant}; use super::utils::upper_seq_ratio; struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> { strings: &'bufs [&'s T], seqs: Vec<(&'s T, usize, usize)>, } impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> { fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> { let mut seqs = Vec::new(); for (string_idx, string) in strings.iter().enumerate() { let mut offset = 0; let iter = { #[cfg(feature = "unicode")] { string.tokenize_unicode_words() } #[cfg(not(feature = "unicode"))] { string.tokenize_words() } }; for word in iter { seqs.push((word, string_idx, offset)); offset += word.len(); } } MultiLookup { strings, seqs } } pub fn len(&self) -> usize { self.seqs.len() } fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> { let mut last = None; let mut rv = Vec::new(); for offset in 0..len { let (s, str_idx, char_idx) = self.seqs[idx + offset]; last = match last { None => Some((str_idx, char_idx, s.len())), Some((last_str_idx, start_char_idx, last_len)) => { if last_str_idx == str_idx { Some((str_idx, start_char_idx, last_len + s.len())) } else { rv.push(( last_str_idx, self.strings[last_str_idx] .slice(start_char_idx..start_char_idx + last_len), )); Some((str_idx, char_idx, s.len())) } } }; } if let Some((str_idx, start_char_idx, len)) = last { rv.push(( str_idx, self.strings[str_idx].slice(start_char_idx..start_char_idx + len), )); } rv } } impl<'bufs, 's, T: DiffableStr + ?Sized> Index for MultiLookup<'bufs, 's, T> { type Output = T; fn index(&self, index: usize) -> &Self::Output { self.seqs[index].0 } } fn push_values<'s, T: DiffableStr + ?Sized>( v: &mut Vec>, idx: usize, emphasized: bool, s: &'s T, ) { v.resize_with(v.len().max(idx + 1), Vec::new); // newlines cause all kinds of wacky stuff if they end up highlighted. // because of this we want to unemphasize all newlines we encounter. if emphasized { for seg in s.tokenize_lines_and_newlines() { v[idx].push((!seg.ends_with_newline(), seg)); } } else { v[idx].push((false, s)); } } /// Represents the expanded textual change with inline highlights. /// /// This is like [`Change`] but with inline highlight info. #[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct InlineChange<'s, T: DiffableStr + ?Sized> { tag: ChangeTag, old_index: Option, new_index: Option, values: Vec<(bool, &'s T)>, } impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> { /// Returns the change tag. pub fn tag(&self) -> ChangeTag { self.tag } /// Returns the old index if available. pub fn old_index(&self) -> Option { self.old_index } /// Returns the new index if available. pub fn new_index(&self) -> Option { self.new_index } /// Returns the changed values. /// /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` /// is true if it should be highlighted as an inline diff. /// /// Depending on the type of the underlying [`DiffableStr`] this value is /// more or less useful. If you always want to have a utf-8 string it's /// better to use the [`InlineChange::iter_strings_lossy`] method. pub fn values(&self) -> &[(bool, &'s T)] { &self.values } /// Iterates over all (potentially lossy) utf-8 decoded values. /// /// Each item is a tuple in the form `(emphasized, value)` where `emphasized` /// is true if it should be highlighted as an inline diff. pub fn iter_strings_lossy(&self) -> impl Iterator)> { self.values() .iter() .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy())) } /// Returns `true` if this change does not end in a newline and must be /// followed up by one if line based diffs are used. pub fn missing_newline(&self) -> bool { !self.values.last().map_or(true, |x| x.1.ends_with_newline()) } } impl<'s, T: DiffableStr + ?Sized> From> for InlineChange<'s, T> { fn from(change: Change<&'s T>) -> InlineChange<'s, T> { InlineChange { tag: change.tag(), old_index: change.old_index(), new_index: change.new_index(), values: vec![(false, change.value())], } } } impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (emphasized, value) in self.iter_strings_lossy() { let marker = match (emphasized, self.tag) { (false, _) | (true, ChangeTag::Equal) => "", (true, ChangeTag::Delete) => "-", (true, ChangeTag::Insert) => "+", }; write!(f, "{}{}{}", marker, value, marker)?; } if self.missing_newline() { writeln!(f)?; } Ok(()) } } const MIN_RATIO: f32 = 0.5; const TIMEOUT_MS: u64 = 500; pub(crate) fn iter_inline_changes<'x, 'diff, 'old, 'new, 'bufs, T>( diff: &'diff TextDiff<'old, 'new, 'bufs, T>, op: &DiffOp, ) -> impl Iterator> + 'diff where T: DiffableStr + ?Sized, 'x: 'diff, 'old: 'x, 'new: 'x, { let (tag, old_range, new_range) = op.as_tag_tuple(); if let DiffTag::Equal | DiffTag::Insert | DiffTag::Delete = tag { return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; } let mut old_index = old_range.start; let mut new_index = new_range.start; let old_slices = &diff.old_slices()[old_range]; let new_slices = &diff.new_slices()[new_range]; if upper_seq_ratio(old_slices, new_slices) < MIN_RATIO { return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; } let old_lookup = MultiLookup::new(old_slices); let new_lookup = MultiLookup::new(new_slices); let ops = capture_diff_deadline( Algorithm::Patience, &old_lookup, 0..old_lookup.len(), &new_lookup, 0..new_lookup.len(), Some(Instant::now() + Duration::from_millis(TIMEOUT_MS)), ); if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < MIN_RATIO { return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; } let mut old_values = Vec::>::new(); let mut new_values = Vec::>::new(); for op in ops { match op { DiffOp::Equal { old_index, len, new_index, } => { for (idx, slice) in old_lookup.get_original_slices(old_index, len) { push_values(&mut old_values, idx, false, slice); } for (idx, slice) in new_lookup.get_original_slices(new_index, len) { push_values(&mut new_values, idx, false, slice); } } DiffOp::Delete { old_index, old_len, .. } => { for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { push_values(&mut old_values, idx, true, slice); } } DiffOp::Insert { new_index, new_len, .. } => { for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { push_values(&mut new_values, idx, true, slice); } } DiffOp::Replace { old_index, old_len, new_index, new_len, } => { for (idx, slice) in old_lookup.get_original_slices(old_index, old_len) { push_values(&mut old_values, idx, true, slice); } for (idx, slice) in new_lookup.get_original_slices(new_index, new_len) { push_values(&mut new_values, idx, true, slice); } } } } let mut rv = Vec::new(); for values in old_values { rv.push(InlineChange { tag: ChangeTag::Delete, old_index: Some(old_index), new_index: None, values, }); old_index += 1; } for values in new_values { rv.push(InlineChange { tag: ChangeTag::Insert, old_index: None, new_index: Some(new_index), values, }); new_index += 1; } Box::new(rv.into_iter()) as Box> } #[test] fn test_line_ops_inline() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", ); assert_eq!(diff.newline_terminated(), true); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_inline_changes(op)) .collect::>(); insta::assert_debug_snapshot!(&changes); } #[test] #[cfg(feature = "serde")] fn test_serde() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", ); assert_eq!(diff.newline_terminated(), true); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_inline_changes(op)) .collect::>(); let json = serde_json::to_string_pretty(&changes).unwrap(); insta::assert_snapshot!(&json); } similar-2.2.1/src/text/mod.rs000064400000000000000000000604571046102023000142020ustar 00000000000000//! Text diffing utilities. use std::borrow::Cow; use std::cmp::Reverse; use std::collections::BinaryHeap; use std::time::{Duration, Instant}; mod abstraction; #[cfg(feature = "inline")] mod inline; mod utils; pub use self::abstraction::{DiffableStr, DiffableStrRef}; #[cfg(feature = "inline")] pub use self::inline::InlineChange; use self::utils::{upper_seq_ratio, QuickSeqRatio}; use crate::algorithms::IdentifyDistinct; use crate::iter::{AllChangesIter, ChangesIter}; use crate::udiff::UnifiedDiff; use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp}; #[derive(Debug, Clone, Copy)] enum Deadline { Absolute(Instant), Relative(Duration), } impl Deadline { fn into_instant(self) -> Instant { match self { Deadline::Absolute(instant) => instant, Deadline::Relative(duration) => Instant::now() + duration, } } } /// A builder type config for more complex uses of [`TextDiff`]. /// /// Requires the `text` feature. #[derive(Clone, Debug, Default)] pub struct TextDiffConfig { algorithm: Algorithm, newline_terminated: Option, deadline: Option, } impl TextDiffConfig { /// Changes the algorithm. /// /// The default algorithm is [`Algorithm::Myers`]. pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self { self.algorithm = alg; self } /// Sets a deadline for the diff operation. /// /// By default a diff will take as long as it takes. For certain diff /// algorithms like Myer's and Patience a maximum running time can be /// defined after which the algorithm gives up and approximates. pub fn deadline(&mut self, deadline: Instant) -> &mut Self { self.deadline = Some(Deadline::Absolute(deadline)); self } /// Sets a timeout for thediff operation. /// /// This is like [`deadline`](Self::deadline) but accepts a duration. pub fn timeout(&mut self, timeout: Duration) -> &mut Self { self.deadline = Some(Deadline::Relative(timeout)); self } /// Changes the newline termination flag. /// /// The default is automatic based on input. This flag controls the /// behavior of [`TextDiff::iter_changes`] and unified diff generation /// with regards to newlines. When the flag is set to `false` (which /// is the default) then newlines are added. Otherwise the newlines /// from the source sequences are reused. pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { self.newline_terminated = Some(yes); self } /// Creates a diff of lines. /// /// This splits the text `old` and `new` into lines preserving newlines /// in the input. Line diffs are very common and because of that enjoy /// special handling in similar. When a line diff is created with this /// method the `newline_terminated` flag is flipped to `true` and will /// influence the behavior of unified diff generation. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC"); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "a\n"), /// (ChangeTag::Equal, "b\n"), /// (ChangeTag::Delete, "c"), /// (ChangeTag::Insert, "C"), /// ]); /// ``` pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { self.diff( Cow::Owned(old.as_diffable_str().tokenize_lines()), Cow::Owned(new.as_diffable_str().tokenize_lines()), true, ) } /// Creates a diff of words. /// /// This splits the text into words and whitespace. /// /// Note on word diffs: because the text differ will tokenize the strings /// into small segments it can be inconvenient to work with the results /// depending on the use case. You might also want to combine word level /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) /// which lets you remap the diffs back to the original input strings. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz"); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "foo"), /// (ChangeTag::Equal, " "), /// (ChangeTag::Delete, "bar"), /// (ChangeTag::Insert, "BAR"), /// (ChangeTag::Equal, " "), /// (ChangeTag::Equal, "baz"), /// ]); /// ``` pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { self.diff( Cow::Owned(old.as_diffable_str().tokenize_words()), Cow::Owned(new.as_diffable_str().tokenize_words()), false, ) } /// Creates a diff of characters. /// /// Note on character diffs: because the text differ will tokenize the strings /// into small segments it can be inconvenient to work with the results /// depending on the use case. You might also want to combine word level /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) /// which lets you remap the diffs back to the original input strings. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf"); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "a"), /// (ChangeTag::Equal, "b"), /// (ChangeTag::Equal, "c"), /// (ChangeTag::Delete, "d"), /// (ChangeTag::Delete, "e"), /// (ChangeTag::Insert, "D"), /// (ChangeTag::Insert, "D"), /// (ChangeTag::Equal, "f"), /// ]); /// ``` pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { self.diff( Cow::Owned(old.as_diffable_str().tokenize_chars()), Cow::Owned(new.as_diffable_str().tokenize_chars()), false, ) } /// Creates a diff of unicode words. /// /// This splits the text into words according to unicode rules. This is /// generally recommended over [`TextDiffConfig::diff_words`] but /// requires a dependency. /// /// This requires the `unicode` feature. /// /// Note on word diffs: because the text differ will tokenize the strings /// into small segments it can be inconvenient to work with the results /// depending on the use case. You might also want to combine word level /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) /// which lets you remap the diffs back to the original input strings. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce"); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "ah"), /// (ChangeTag::Equal, "("), /// (ChangeTag::Delete, "be"), /// (ChangeTag::Insert, "ah"), /// (ChangeTag::Equal, ")"), /// (ChangeTag::Equal, "ce"), /// ]); /// ``` #[cfg(feature = "unicode")] pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { self.diff( Cow::Owned(old.as_diffable_str().tokenize_unicode_words()), Cow::Owned(new.as_diffable_str().tokenize_unicode_words()), false, ) } /// Creates a diff of graphemes. /// /// This requires the `unicode` feature. /// /// Note on grapheme diffs: because the text differ will tokenize the strings /// into small segments it can be inconvenient to work with the results /// depending on the use case. You might also want to combine word level /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) /// which lets you remap the diffs back to the original input strings. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️"); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "💩"), /// (ChangeTag::Delete, "🇦🇹"), /// (ChangeTag::Delete, "🦠"), /// (ChangeTag::Insert, "🇦🇱"), /// (ChangeTag::Insert, "❄️"), /// ]); /// ``` #[cfg(feature = "unicode")] pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { self.diff( Cow::Owned(old.as_diffable_str().tokenize_graphemes()), Cow::Owned(new.as_diffable_str().tokenize_graphemes()), false, ) } /// Creates a diff of arbitrary slices. /// /// ```rust /// use similar::{TextDiff, ChangeTag}; /// /// let old = &["foo", "bar", "baz"]; /// let new = &["foo", "BAR", "baz"]; /// let diff = TextDiff::configure().diff_slices(old, new); /// let changes: Vec<_> = diff /// .iter_all_changes() /// .map(|x| (x.tag(), x.value())) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "foo"), /// (ChangeTag::Delete, "bar"), /// (ChangeTag::Insert, "BAR"), /// (ChangeTag::Equal, "baz"), /// ]); /// ``` pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( &self, old: &'bufs [&'old T], new: &'bufs [&'new T], ) -> TextDiff<'old, 'new, 'bufs, T> { self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false) } fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( &self, old: Cow<'bufs, [&'old T]>, new: Cow<'bufs, [&'new T]>, newline_terminated: bool, ) -> TextDiff<'old, 'new, 'bufs, T> { let deadline = self.deadline.map(|x| x.into_instant()); let ops = if old.len() > 100 || new.len() > 100 { let ih = IdentifyDistinct::::new(&old[..], 0..old.len(), &new[..], 0..new.len()); capture_diff_deadline( self.algorithm, ih.old_lookup(), ih.old_range(), ih.new_lookup(), ih.new_range(), deadline, ) } else { capture_diff_deadline( self.algorithm, &old[..], 0..old.len(), &new[..], 0..new.len(), deadline, ) }; TextDiff { old, new, ops, newline_terminated: self.newline_terminated.unwrap_or(newline_terminated), algorithm: self.algorithm, } } } /// Captures diff op codes for textual diffs. /// /// The exact diff behavior is depending on the underlying [`DiffableStr`]. /// For instance diffs on bytes and strings are slightly different. You can /// create a text diff from constructors such as [`TextDiff::from_lines`] or /// the [`TextDiffConfig`] created by [`TextDiff::configure`]. /// /// Requires the `text` feature. pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> { old: Cow<'bufs, [&'old T]>, new: Cow<'bufs, [&'new T]>, ops: Vec, newline_terminated: bool, algorithm: Algorithm, } impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Configures a text differ before diffing. pub fn configure() -> TextDiffConfig { TextDiffConfig::default() } /// Creates a diff of lines. /// /// For more information see [`TextDiffConfig::diff_lines`]. pub fn from_lines( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { TextDiff::configure().diff_lines(old, new) } /// Creates a diff of words. /// /// For more information see [`TextDiffConfig::diff_words`]. pub fn from_words( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { TextDiff::configure().diff_words(old, new) } /// Creates a diff of chars. /// /// For more information see [`TextDiffConfig::diff_chars`]. pub fn from_chars( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { TextDiff::configure().diff_chars(old, new) } /// Creates a diff of unicode words. /// /// For more information see [`TextDiffConfig::diff_unicode_words`]. /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] pub fn from_unicode_words( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { TextDiff::configure().diff_unicode_words(old, new) } /// Creates a diff of graphemes. /// /// For more information see [`TextDiffConfig::diff_graphemes`]. /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] pub fn from_graphemes( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output> { TextDiff::configure().diff_graphemes(old, new) } } impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> { /// Creates a diff of arbitrary slices. /// /// For more information see [`TextDiffConfig::diff_slices`]. pub fn from_slices( old: &'bufs [&'old T], new: &'bufs [&'new T], ) -> TextDiff<'old, 'new, 'bufs, T> { TextDiff::configure().diff_slices(old, new) } /// The name of the algorithm that created the diff. pub fn algorithm(&self) -> Algorithm { self.algorithm } /// Returns `true` if items in the slice are newline terminated. /// /// This flag is used by the unified diff writer to determine if extra /// newlines have to be added. pub fn newline_terminated(&self) -> bool { self.newline_terminated } /// Returns all old slices. pub fn old_slices(&self) -> &[&'old T] { &self.old } /// Returns all new slices. pub fn new_slices(&self) -> &[&'new T] { &self.new } /// Return a measure of the sequences' similarity in the range `0..=1`. /// /// A ratio of `1.0` means the two sequences are a complete match, a /// ratio of `0.0` would indicate completely distinct sequences. /// /// ```rust /// # use similar::TextDiff; /// let diff = TextDiff::from_chars("abcd", "bcde"); /// assert_eq!(diff.ratio(), 0.75); /// ``` pub fn ratio(&self) -> f32 { get_diff_ratio(self.ops(), self.old.len(), self.new.len()) } /// Iterates over the changes the op expands to. /// /// This method is a convenient way to automatically resolve the different /// ways in which a change could be encoded (insert/delete vs replace), look /// up the value from the appropriate slice and also handle correct index /// handling. pub fn iter_changes<'x, 'slf>( &'slf self, op: &DiffOp, ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T> where 'x: 'slf, 'old: 'x, 'new: 'x, { op.iter_changes(self.old_slices(), self.new_slices()) } /// Returns the captured diff ops. pub fn ops(&self) -> &[DiffOp] { &self.ops } /// Isolate change clusters by eliminating ranges with no changes. /// /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`]. pub fn grouped_ops(&self, n: usize) -> Vec> { group_diff_ops(self.ops().to_vec(), n) } /// Flattens out the diff into all changes. /// /// This is a shortcut for combining [`TextDiff::ops`] with /// [`TextDiff::iter_changes`]. pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> where 'x: 'slf + 'old + 'new, 'old: 'x, 'new: 'x, { AllChangesIter::new(&self.old[..], &self.new[..], self.ops()) } /// Utility to return a unified diff formatter. pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { UnifiedDiff::from_text_diff(self) } /// Iterates over the changes the op expands to with inline emphasis. /// /// This is very similar to [`TextDiff::iter_changes`] but it performs a second /// level diff on adjacent line replacements. The exact behavior of /// this function with regards to how it detects those inline changes /// is currently not defined and will likely change over time. /// /// As of similar 1.2.0 the behavior of this function changes depending on /// if the `unicode` feature is enabled or not. It will prefer unicode word /// splitting over word splitting depending on the feature flag. /// /// Requires the `inline` feature. #[cfg(feature = "inline")] pub fn iter_inline_changes<'slf>( &'slf self, op: &DiffOp, ) -> impl Iterator> + '_ where 'slf: 'old + 'new, { inline::iter_inline_changes(self, op) } } /// Use the text differ to find `n` close matches. /// /// `cutoff` defines the threshold which needs to be reached for a word /// to be considered similar. See [`TextDiff::ratio`] for more information. /// /// ``` /// # use similar::get_close_matches; /// let matches = get_close_matches( /// "appel", /// &["ape", "apple", "peach", "puppy"][..], /// 3, /// 0.6 /// ); /// assert_eq!(matches, vec!["apple", "ape"]); /// ``` /// /// Requires the `text` feature. pub fn get_close_matches<'a, T: DiffableStr + ?Sized>( word: &T, possibilities: &[&'a T], n: usize, cutoff: f32, ) -> Vec<&'a T> { let mut matches = BinaryHeap::new(); let seq1 = word.tokenize_chars(); let quick_ratio = QuickSeqRatio::new(&seq1); for &possibility in possibilities { let seq2 = possibility.tokenize_chars(); if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff { continue; } let diff = TextDiff::from_slices(&seq1, &seq2); let ratio = diff.ratio(); if ratio >= cutoff { // we're putting the word itself in reverse in so that matches with // the same ratio are ordered lexicographically. matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility))); } } let mut rv = vec![]; for _ in 0..n { if let Some((_, elt)) = matches.pop() { rv.push(elt.0); } else { break; } } rv } #[test] fn test_captured_ops() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n", "Hello World\nsome amazing stuff here\nsome more stuff here\n", ); insta::assert_debug_snapshot!(&diff.ops()); } #[test] fn test_captured_word_ops() { let diff = TextDiff::from_words( "Hello World\nsome stuff here\nsome more stuff here\n", "Hello World\nsome amazing stuff here\nsome more stuff here\n", ); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_changes(op)) .collect::>(); insta::assert_debug_snapshot!(&changes); } #[test] fn test_unified_diff() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n", "Hello World\nsome amazing stuff here\nsome more stuff here\n", ); assert_eq!(diff.newline_terminated(), true); insta::assert_snapshot!(&diff .unified_diff() .context_radius(3) .header("old", "new") .to_string()); } #[test] fn test_line_ops() { let a = "Hello World\nsome stuff here\nsome more stuff here\n"; let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n"; let diff = TextDiff::from_lines(a, b); assert_eq!(diff.newline_terminated(), true); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_changes(op)) .collect::>(); insta::assert_debug_snapshot!(&changes); #[cfg(feature = "bytes")] { let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes()); let byte_changes = byte_diff .ops() .iter() .flat_map(|op| byte_diff.iter_changes(op)) .collect::>(); for (change, byte_change) in changes.iter().zip(byte_changes.iter()) { assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy()); } } } #[test] fn test_virtual_newlines() { let diff = TextDiff::from_lines("a\nb", "a\nc\n"); assert_eq!(diff.newline_terminated(), true); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_changes(op)) .collect::>(); insta::assert_debug_snapshot!(&changes); } #[test] fn test_char_diff() { let diff = TextDiff::from_chars("Hello World", "Hallo Welt"); insta::assert_debug_snapshot!(diff.ops()); #[cfg(feature = "bytes")] { let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes()); assert_eq!(diff.ops(), byte_diff.ops()); } } #[test] fn test_ratio() { let diff = TextDiff::from_chars("abcd", "bcde"); assert_eq!(diff.ratio(), 0.75); let diff = TextDiff::from_chars("", ""); assert_eq!(diff.ratio(), 1.0); } #[test] fn test_get_close_matches() { let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6); assert_eq!(matches, vec!["apple", "ape"]); let matches = get_close_matches( "hulo", &[ "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo", ][..], 5, 0.7, ); assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]); } #[test] fn test_lifetimes_on_iter() { use crate::Change; fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec> where T: DiffableStrRef + ?Sized, { TextDiff::from_lines(old, new).iter_all_changes().collect() } let a = "1\n2\n3\n".to_string(); let b = "1\n99\n3\n".to_string(); let changes = diff_lines(&a, &b); insta::assert_debug_snapshot!(&changes); } #[test] #[cfg(feature = "serde")] fn test_serde() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", ); let changes = diff .ops() .iter() .flat_map(|op| diff.iter_changes(op)) .collect::>(); let json = serde_json::to_string_pretty(&changes).unwrap(); insta::assert_snapshot!(&json); } #[test] #[cfg(feature = "serde")] fn test_serde_ops() { let diff = TextDiff::from_lines( "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", ); let changes = diff.ops(); let json = serde_json::to_string_pretty(&changes).unwrap(); insta::assert_snapshot!(&json); } #[test] fn test_regression_issue_37() { let config = TextDiffConfig::default(); let diff = config.diff_lines("\u{18}\n\n", "\n\n\r"); let mut output = diff.unified_diff(); assert_eq!( output.context_radius(0).to_string(), "@@ -1 +1,0 @@\n-\u{18}\n@@ -2,0 +2,2 @@\n+\n+\r" ); } similar-2.2.1/src/text/snapshots/similar__text__captured_ops.snap000064400000000000000000000005051046102023000235200ustar 00000000000000--- source: src/text/mod.rs expression: "&diff.ops()" --- [ Equal { old_index: 0, new_index: 0, len: 1, }, Replace { old_index: 1, old_len: 1, new_index: 1, new_len: 1, }, Equal { old_index: 2, new_index: 2, len: 1, }, ] similar-2.2.1/src/text/snapshots/similar__text__captured_word_ops.snap000064400000000000000000000063531046102023000245620ustar 00000000000000--- source: src/text/mod.rs expression: "&changes" --- [ Change { tag: Equal, old_index: Some( 0, ), new_index: Some( 0, ), value: "Hello", }, Change { tag: Equal, old_index: Some( 1, ), new_index: Some( 1, ), value: " ", }, Change { tag: Equal, old_index: Some( 2, ), new_index: Some( 2, ), value: "World", }, Change { tag: Equal, old_index: Some( 3, ), new_index: Some( 3, ), value: "\n", }, Change { tag: Equal, old_index: Some( 4, ), new_index: Some( 4, ), value: "some", }, Change { tag: Equal, old_index: Some( 5, ), new_index: Some( 5, ), value: " ", }, Change { tag: Insert, old_index: None, new_index: Some( 6, ), value: "amazing", }, Change { tag: Insert, old_index: None, new_index: Some( 7, ), value: " ", }, Change { tag: Equal, old_index: Some( 6, ), new_index: Some( 8, ), value: "stuff", }, Change { tag: Equal, old_index: Some( 7, ), new_index: Some( 9, ), value: " ", }, Change { tag: Equal, old_index: Some( 8, ), new_index: Some( 10, ), value: "here", }, Change { tag: Equal, old_index: Some( 9, ), new_index: Some( 11, ), value: "\n", }, Change { tag: Equal, old_index: Some( 10, ), new_index: Some( 12, ), value: "some", }, Change { tag: Equal, old_index: Some( 11, ), new_index: Some( 13, ), value: " ", }, Change { tag: Equal, old_index: Some( 12, ), new_index: Some( 14, ), value: "more", }, Change { tag: Equal, old_index: Some( 13, ), new_index: Some( 15, ), value: " ", }, Change { tag: Equal, old_index: Some( 14, ), new_index: Some( 16, ), value: "stuff", }, Change { tag: Equal, old_index: Some( 15, ), new_index: Some( 17, ), value: " ", }, Change { tag: Equal, old_index: Some( 16, ), new_index: Some( 18, ), value: "here", }, Change { tag: Equal, old_index: Some( 17, ), new_index: Some( 19, ), value: "\n", }, ] similar-2.2.1/src/text/snapshots/similar__text__char_diff.snap000064400000000000000000000011441046102023000227350ustar 00000000000000--- source: src/text/mod.rs expression: diff.ops() --- [ Equal { old_index: 0, new_index: 0, len: 1, }, Replace { old_index: 1, old_len: 1, new_index: 1, new_len: 1, }, Equal { old_index: 2, new_index: 2, len: 5, }, Replace { old_index: 7, old_len: 2, new_index: 7, new_len: 1, }, Equal { old_index: 9, new_index: 8, len: 1, }, Replace { old_index: 10, old_len: 1, new_index: 9, new_len: 1, }, ] similar-2.2.1/src/text/snapshots/similar__text__inline__line_ops_inline.snap000064400000000000000000000042731046102023000257010ustar 00000000000000--- source: src/text/inline.rs expression: "&changes" --- [ InlineChange { tag: Insert, old_index: None, new_index: Some( 0, ), values: [ ( false, "Stuff\n", ), ], }, InlineChange { tag: Equal, old_index: Some( 0, ), new_index: Some( 1, ), values: [ ( false, "Hello World\n", ), ], }, InlineChange { tag: Delete, old_index: Some( 1, ), new_index: None, values: [ ( false, "some ", ), ( false, "stuff here\n", ), ], }, InlineChange { tag: Insert, old_index: None, new_index: Some( 2, ), values: [ ( false, "some ", ), ( true, "amazing ", ), ( false, "stuff here\n", ), ], }, InlineChange { tag: Equal, old_index: Some( 2, ), new_index: Some( 3, ), values: [ ( false, "some more stuff here\n", ), ], }, InlineChange { tag: Delete, old_index: Some( 3, ), new_index: None, values: [ ( false, "\n", ), ], }, InlineChange { tag: Delete, old_index: Some( 4, ), new_index: None, values: [ ( false, "Aha stuff here\n", ), ], }, InlineChange { tag: Delete, old_index: Some( 5, ), new_index: None, values: [ ( false, "and more stuff", ), ], }, ] similar-2.2.1/src/text/snapshots/similar__text__inline__serde.snap000064400000000000000000000025571046102023000236400ustar 00000000000000--- source: src/text/inline.rs expression: "&json" --- [ { "tag": "insert", "old_index": null, "new_index": 0, "values": [ [ false, "Stuff\n" ] ] }, { "tag": "equal", "old_index": 0, "new_index": 1, "values": [ [ false, "Hello World\n" ] ] }, { "tag": "delete", "old_index": 1, "new_index": null, "values": [ [ false, "some " ], [ false, "stuff here\n" ] ] }, { "tag": "insert", "old_index": null, "new_index": 2, "values": [ [ false, "some " ], [ true, "amazing " ], [ false, "stuff here\n" ] ] }, { "tag": "equal", "old_index": 2, "new_index": 3, "values": [ [ false, "some more stuff here\n" ] ] }, { "tag": "delete", "old_index": 3, "new_index": null, "values": [ [ false, "\n" ] ] }, { "tag": "delete", "old_index": 4, "new_index": null, "values": [ [ false, "Aha stuff here\n" ] ] }, { "tag": "delete", "old_index": 5, "new_index": null, "values": [ [ false, "and more stuff" ] ] } ] similar-2.2.1/src/text/snapshots/similar__text__lifetimes_on_iter.snap000064400000000000000000000012321046102023000245260ustar 00000000000000--- source: src/text/mod.rs expression: "&changes" --- [ Change { tag: Equal, old_index: Some( 0, ), new_index: Some( 0, ), value: "1\n", }, Change { tag: Delete, old_index: Some( 1, ), new_index: None, value: "2\n", }, Change { tag: Insert, old_index: None, new_index: Some( 1, ), value: "99\n", }, Change { tag: Equal, old_index: Some( 2, ), new_index: Some( 2, ), value: "3\n", }, ] similar-2.2.1/src/text/snapshots/similar__text__line_ops.snap000064400000000000000000000013321046102023000226370ustar 00000000000000--- source: src/text/mod.rs expression: "&changes" --- [ Change { tag: Equal, old_index: Some( 0, ), new_index: Some( 0, ), value: "Hello World\n", }, Change { tag: Delete, old_index: Some( 1, ), new_index: None, value: "some stuff here\n", }, Change { tag: Insert, old_index: None, new_index: Some( 1, ), value: "some amazing stuff here\n", }, Change { tag: Equal, old_index: Some( 2, ), new_index: Some( 2, ), value: "some more stuff here\n", }, ] similar-2.2.1/src/text/snapshots/similar__text__serde.snap000064400000000000000000000015541046102023000221370ustar 00000000000000--- source: src/text/mod.rs expression: "&json" --- [ { "tag": "insert", "old_index": null, "new_index": 0, "value": "Stuff\n" }, { "tag": "equal", "old_index": 0, "new_index": 1, "value": "Hello World\n" }, { "tag": "delete", "old_index": 1, "new_index": null, "value": "some stuff here\n" }, { "tag": "insert", "old_index": null, "new_index": 2, "value": "some amazing stuff here\n" }, { "tag": "equal", "old_index": 2, "new_index": 3, "value": "some more stuff here\n" }, { "tag": "delete", "old_index": 3, "new_index": null, "value": "\n" }, { "tag": "delete", "old_index": 4, "new_index": null, "value": "Aha stuff here\n" }, { "tag": "delete", "old_index": 5, "new_index": null, "value": "and more stuff" } ] similar-2.2.1/src/text/snapshots/similar__text__serde_ops.snap000064400000000000000000000007571046102023000230240ustar 00000000000000--- source: src/text/mod.rs expression: "&json" --- [ { "op": "insert", "old_index": 0, "new_index": 0, "new_len": 1 }, { "op": "equal", "old_index": 0, "new_index": 1, "len": 1 }, { "op": "replace", "old_index": 1, "old_len": 1, "new_index": 2, "new_len": 1 }, { "op": "equal", "old_index": 2, "new_index": 3, "len": 1 }, { "op": "delete", "old_index": 3, "old_len": 3, "new_index": 4 } ] similar-2.2.1/src/text/snapshots/similar__text__unified_diff.snap000064400000000000000000000003501046102023000234410ustar 00000000000000--- source: src/text/mod.rs expression: "&diff.unified_diff().context_radius(3).header(\"old\", \"new\").to_string()" --- --- old +++ new @@ -1,3 +1,3 @@ Hello World -some stuff here +some amazing stuff here some more stuff here similar-2.2.1/src/text/snapshots/similar__text__virtual_newlines.snap000064400000000000000000000007631046102023000244300ustar 00000000000000--- source: src/text/mod.rs expression: "&changes" --- [ Change { tag: Equal, old_index: Some( 0, ), new_index: Some( 0, ), value: "a\n", }, Change { tag: Delete, old_index: Some( 1, ), new_index: None, value: "b", }, Change { tag: Insert, old_index: None, new_index: Some( 1, ), value: "c\n", }, ] similar-2.2.1/src/text/utils.rs000064400000000000000000000031021046102023000145430ustar 00000000000000use std::collections::HashMap; use std::hash::Hash; use super::DiffableStrRef; // quick and dirty way to get an upper sequence ratio. pub fn upper_seq_ratio(seq1: &[T], seq2: &[T]) -> f32 { let n = seq1.len() + seq2.len(); if n == 0 { 1.0 } else { 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 } } /// Internal utility to calculate an upper bound for a ratio for /// [`get_close_matches`]. This is based on Python's difflib approach /// of considering the two sets to be multisets. /// /// It counts the number of matches without regard to order, which is an /// obvious upper bound. pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { let mut counts = HashMap::new(); for &word in seq { *counts.entry(word).or_insert(0) += 1; } QuickSeqRatio(counts) } pub fn calc(&self, seq: &[&T]) -> f32 { let n = self.0.len() + seq.len(); if n == 0 { return 1.0; } let mut available = HashMap::new(); let mut matches = 0; for &word in seq { let x = if let Some(count) = available.get(&word) { *count } else { self.0.get(&word).copied().unwrap_or(0) }; available.insert(word, x - 1); if x > 0 { matches += 1; } } 2.0 * matches as f32 / n as f32 } } similar-2.2.1/src/types.rs000064400000000000000000000363101046102023000135720ustar 00000000000000use std::fmt; use std::ops::{Index, Range}; use crate::algorithms::utils::is_empty_range; use crate::algorithms::DiffHook; use crate::iter::ChangesIter; /// An enum representing a diffing algorithm. #[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] #[cfg_attr( feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(rename_all = "snake_case") )] pub enum Algorithm { /// Picks the myers algorithm from [`crate::algorithms::myers`] Myers, /// Picks the patience algorithm from [`crate::algorithms::patience`] Patience, /// Picks the LCS algorithm from [`crate::algorithms::lcs`] Lcs, } impl Default for Algorithm { /// Returns the default algorithm ([`Algorithm::Myers`]). fn default() -> Algorithm { Algorithm::Myers } } /// The tag of a change. #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] #[cfg_attr( feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(rename_all = "snake_case") )] pub enum ChangeTag { /// The change indicates equality (not a change) Equal, /// The change indicates deleted text. Delete, /// The change indicates inserted text. Insert, } impl fmt::Display for ChangeTag { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "{}", match &self { ChangeTag::Equal => ' ', ChangeTag::Delete => '-', ChangeTag::Insert => '+', } ) } } /// Represents the expanded [`DiffOp`] change. /// /// This type is returned from [`DiffOp::iter_changes`] and /// [`TextDiff::iter_changes`](crate::text::TextDiff::iter_changes). /// /// It exists so that it's more convenient to work with textual differences as /// the underlying [`DiffOp`] encodes a group of changes. /// /// This type has additional methods that are only available for types /// implementing [`DiffableStr`](crate::text::DiffableStr). #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct Change { pub(crate) tag: ChangeTag, pub(crate) old_index: Option, pub(crate) new_index: Option, pub(crate) value: T, } /// These methods are available for all change types. impl Change { /// Returns the change tag. pub fn tag(&self) -> ChangeTag { self.tag } /// Returns the old index if available. pub fn old_index(&self) -> Option { self.old_index } /// Returns the new index if available. pub fn new_index(&self) -> Option { self.new_index } /// Returns the underlying changed value. /// /// Depending on the type of the underlying [`crate::text::DiffableStr`] /// this value is more or less useful. If you always want to have a utf-8 /// string it's best to use the [`Change::as_str`] and /// [`Change::to_string_lossy`] methods. pub fn value(&self) -> T { self.value.clone() } } /// Utility enum to capture a diff operation. /// /// This is used by [`Capture`](crate::algorithms::Capture). #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] #[cfg_attr( feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(rename_all = "snake_case", tag = "op") )] pub enum DiffOp { /// A segment is equal (see [`DiffHook::equal`]) Equal { /// The starting index in the old sequence. old_index: usize, /// The starting index in the new sequence. new_index: usize, /// The length of the segment. len: usize, }, /// A segment was deleted (see [`DiffHook::delete`]) Delete { /// The starting index in the old sequence. old_index: usize, /// The length of the old segment. old_len: usize, /// The starting index in the new sequence. new_index: usize, }, /// A segment was inserted (see [`DiffHook::insert`]) Insert { /// The starting index in the old sequence. old_index: usize, /// The starting index in the new sequence. new_index: usize, /// The length of the new segment. new_len: usize, }, /// A segment was replaced (see [`DiffHook::replace`]) Replace { /// The starting index in the old sequence. old_index: usize, /// The length of the old segment. old_len: usize, /// The starting index in the new sequence. new_index: usize, /// The length of the new segment. new_len: usize, }, } /// The tag of a diff operation. #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] #[cfg_attr( feature = "serde", derive(serde::Serialize, serde::Deserialize), serde(rename_all = "snake_case") )] pub enum DiffTag { /// The diff op encodes an equal segment. Equal, /// The diff op encodes a deleted segment. Delete, /// The diff op encodes an inserted segment. Insert, /// The diff op encodes a replaced segment. Replace, } impl DiffOp { /// Returns the tag of the operation. pub fn tag(self) -> DiffTag { self.as_tag_tuple().0 } /// Returns the old range. pub fn old_range(&self) -> Range { self.as_tag_tuple().1 } /// Returns the new range. pub fn new_range(&self) -> Range { self.as_tag_tuple().2 } /// Transform the op into a tuple of diff tag and ranges. /// /// This is useful when operating on slices. The returned format is /// `(tag, i1..i2, j1..j2)`: /// /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. pub fn as_tag_tuple(&self) -> (DiffTag, Range, Range) { match *self { DiffOp::Equal { old_index, new_index, len, } => ( DiffTag::Equal, old_index..old_index + len, new_index..new_index + len, ), DiffOp::Delete { old_index, new_index, old_len, } => ( DiffTag::Delete, old_index..old_index + old_len, new_index..new_index, ), DiffOp::Insert { old_index, new_index, new_len, } => ( DiffTag::Insert, old_index..old_index, new_index..new_index + new_len, ), DiffOp::Replace { old_index, old_len, new_index, new_len, } => ( DiffTag::Replace, old_index..old_index + old_len, new_index..new_index + new_len, ), } } /// Apply this operation to a diff hook. pub fn apply_to_hook(&self, d: &mut D) -> Result<(), D::Error> { match *self { DiffOp::Equal { old_index, new_index, len, } => d.equal(old_index, new_index, len), DiffOp::Delete { old_index, old_len, new_index, } => d.delete(old_index, old_len, new_index), DiffOp::Insert { old_index, new_index, new_len, } => d.insert(old_index, new_index, new_len), DiffOp::Replace { old_index, old_len, new_index, new_len, } => d.replace(old_index, old_len, new_index, new_len), } } /// Iterates over all changes encoded in the diff op against old and new /// sequences. /// /// `old` and `new` are two indexable objects like the types you pass to /// the diffing algorithm functions. /// /// ```rust /// use similar::{ChangeTag, Algorithm}; /// use similar::capture_diff_slices; /// let old = vec!["foo", "bar", "baz"]; /// let new = vec!["foo", "bar", "blah"]; /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); /// let changes: Vec<_> = ops /// .iter() /// .flat_map(|x| x.iter_changes(&old, &new)) /// .map(|x| (x.tag(), x.value())) /// .collect(); /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "foo"), /// (ChangeTag::Equal, "bar"), /// (ChangeTag::Delete, "baz"), /// (ChangeTag::Insert, "blah"), /// ]); /// ``` pub fn iter_changes<'lookup, Old, New, T>( &self, old: &'lookup Old, new: &'lookup New, ) -> ChangesIter<'lookup, Old, New, T> where Old: Index + ?Sized, New: Index + ?Sized, { ChangesIter::new(old, new, *self) } /// Given a diffop yields the changes it encodes against the given slices. /// /// This is similar to [`DiffOp::iter_changes`] but instead of yielding the /// individual changes it yields consequitive changed slices. /// /// This will only ever yield a single tuple or two tuples in case a /// [`DiffOp::Replace`] operation is passed. /// /// ```rust /// use similar::{ChangeTag, Algorithm}; /// use similar::capture_diff_slices; /// let old = vec!["foo", "bar", "baz"]; /// let new = vec!["foo", "bar", "blah"]; /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); /// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect(); /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, &["foo", "bar"][..]), /// (ChangeTag::Delete, &["baz"][..]), /// (ChangeTag::Insert, &["blah"][..]), /// ]); /// ``` /// /// Due to lifetime restrictions it's currently impossible for the /// returned slices to outlive the lookup. pub fn iter_slices<'lookup, Old, New, T>( &self, old: &'lookup Old, new: &'lookup New, ) -> impl Iterator where T: 'lookup + ?Sized, Old: Index, Output = T> + ?Sized, New: Index, Output = T> + ?Sized, { match *self { DiffOp::Equal { old_index, len, .. } => { Some((ChangeTag::Equal, &old[old_index..old_index + len])) .into_iter() .chain(None.into_iter()) } DiffOp::Insert { new_index, new_len, .. } => Some((ChangeTag::Insert, &new[new_index..new_index + new_len])) .into_iter() .chain(None.into_iter()), DiffOp::Delete { old_index, old_len, .. } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) .into_iter() .chain(None.into_iter()), DiffOp::Replace { old_index, old_len, new_index, new_len, } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) .into_iter() .chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()), } } pub(crate) fn is_empty(&self) -> bool { let (_, old, new) = self.as_tag_tuple(); is_empty_range(&old) && is_empty_range(&new) } pub(crate) fn shift_left(&mut self, adjust: usize) { self.adjust((adjust, true), (0, false)); } pub(crate) fn shift_right(&mut self, adjust: usize) { self.adjust((adjust, false), (0, false)); } pub(crate) fn grow_left(&mut self, adjust: usize) { self.adjust((adjust, true), (adjust, false)); } pub(crate) fn grow_right(&mut self, adjust: usize) { self.adjust((0, false), (adjust, false)); } pub(crate) fn shrink_left(&mut self, adjust: usize) { self.adjust((0, false), (adjust, true)); } pub(crate) fn shrink_right(&mut self, adjust: usize) { self.adjust((adjust, false), (adjust, true)); } fn adjust(&mut self, adjust_offset: (usize, bool), adjust_len: (usize, bool)) { #[inline(always)] fn modify(val: &mut usize, adj: (usize, bool)) { if adj.1 { *val -= adj.0; } else { *val += adj.0; } } match self { DiffOp::Equal { old_index, new_index, len, } => { modify(old_index, adjust_offset); modify(new_index, adjust_offset); modify(len, adjust_len); } DiffOp::Delete { old_index, old_len, new_index, } => { modify(old_index, adjust_offset); modify(old_len, adjust_len); modify(new_index, adjust_offset); } DiffOp::Insert { old_index, new_index, new_len, } => { modify(old_index, adjust_offset); modify(new_index, adjust_offset); modify(new_len, adjust_len); } DiffOp::Replace { old_index, old_len, new_index, new_len, } => { modify(old_index, adjust_offset); modify(old_len, adjust_len); modify(new_index, adjust_offset); modify(new_len, adjust_len); } } } } #[cfg(feature = "text")] mod text_additions { use super::*; use crate::text::DiffableStr; use std::borrow::Cow; /// The text interface can produce changes over [`DiffableStr`] implementing /// values. As those are generic interfaces for different types of strings /// utility methods to make working with standard rust strings more enjoyable. impl<'s, T: DiffableStr + ?Sized> Change<&'s T> { /// Returns the value as string if it is utf-8. pub fn as_str(&self) -> Option<&'s str> { T::as_str(self.value) } /// Returns the value (lossy) decoded as utf-8 string. pub fn to_string_lossy(&self) -> Cow<'s, str> { T::to_string_lossy(self.value) } /// Returns `true` if this change does not end in a newline and must be /// followed up by one if line based diffs are used. /// /// The [`std::fmt::Display`] implementation of [`Change`] will automatically /// insert a newline after the value if this is true. pub fn missing_newline(&self) -> bool { !T::ends_with_newline(self.value) } } impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<&'s T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{}{}", self.to_string_lossy(), if self.missing_newline() { "\n" } else { "" } ) } } } similar-2.2.1/src/udiff.rs000064400000000000000000000264271046102023000135330ustar 00000000000000//! This module provides unified diff functionality. //! //! It is available for as long as the `text` feature is enabled which //! is enabled by default: //! //! ```rust //! use similar::TextDiff; //! # let old_text = ""; //! # let new_text = ""; //! let text_diff = TextDiff::from_lines(old_text, new_text); //! print!("{}", text_diff //! .unified_diff() //! .context_radius(10) //! .header("old_file", "new_file")); //! ``` //! //! # Unicode vs Bytes //! //! The [`UnifiedDiff`] type supports both unicode and byte diffs for all //! types compatible with [`DiffableStr`]. You can pick between the two //! versions by using the [`Display`](std::fmt::Display) implementation or //! [`UnifiedDiff`] or [`UnifiedDiff::to_writer`]. //! //! The former uses [`DiffableStr::to_string_lossy`], the latter uses //! [`DiffableStr::as_bytes`] for each line. #[cfg(feature = "text")] use std::{fmt, io}; use crate::iter::AllChangesIter; use crate::text::{DiffableStr, TextDiff}; use crate::types::{Algorithm, DiffOp}; struct MissingNewlineHint(bool); impl fmt::Display for MissingNewlineHint { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.0 { write!(f, "\n\\ No newline at end of file")?; } Ok(()) } } #[derive(Copy, Clone, Debug)] struct UnifiedDiffHunkRange(usize, usize); impl UnifiedDiffHunkRange { fn start(&self) -> usize { self.0 } fn end(&self) -> usize { self.1 } } impl fmt::Display for UnifiedDiffHunkRange { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut beginning = self.start() + 1; let len = self.end().saturating_sub(self.start()); if len == 1 { write!(f, "{}", beginning) } else { if len == 0 { // empty ranges begin at line just before the range beginning -= 1; } write!(f, "{},{}", beginning, len) } } } /// Unified diff hunk header formatter. pub struct UnifiedHunkHeader { old_range: UnifiedDiffHunkRange, new_range: UnifiedDiffHunkRange, } impl UnifiedHunkHeader { /// Creates a hunk header from a (non empty) slice of diff ops. pub fn new(ops: &[DiffOp]) -> UnifiedHunkHeader { let first = ops[0]; let last = ops[ops.len() - 1]; let old_start = first.old_range().start; let new_start = first.new_range().start; let old_end = last.old_range().end; let new_end = last.new_range().end; UnifiedHunkHeader { old_range: UnifiedDiffHunkRange(old_start, old_end), new_range: UnifiedDiffHunkRange(new_start, new_end), } } } impl fmt::Display for UnifiedHunkHeader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range) } } /// Unified diff formatter. /// /// ```rust /// use similar::TextDiff; /// # let old_text = ""; /// # let new_text = ""; /// let text_diff = TextDiff::from_lines(old_text, new_text); /// print!("{}", text_diff /// .unified_diff() /// .context_radius(10) /// .header("old_file", "new_file")); /// ``` /// /// ## Unicode vs Bytes /// /// The [`UnifiedDiff`] type supports both unicode and byte diffs for all /// types compatible with [`DiffableStr`]. You can pick between the two /// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`]. /// The former uses [`DiffableStr::to_string_lossy`], the latter uses /// [`DiffableStr::as_bytes`] for each line. pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> { diff: &'diff TextDiff<'old, 'new, 'bufs, T>, context_radius: usize, missing_newline_hint: bool, header: Option<(String, String)>, } impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> { /// Creates a formatter from a text diff object. pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self { UnifiedDiff { diff, context_radius: 3, missing_newline_hint: true, header: None, } } /// Changes the context radius. /// /// The context radius is the number of lines between changes that should /// be emitted. This defaults to `3`. pub fn context_radius(&mut self, n: usize) -> &mut Self { self.context_radius = n; self } /// Sets a header to the diff. /// /// `a` and `b` are the file names that are added to the top of the unified /// file format. The names are accepted verbatim which lets you encode /// a timestamp into it when separated by a tab (`\t`). For more information /// see [the unified diff format specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10_07) pub fn header(&mut self, a: &str, b: &str) -> &mut Self { self.header = Some((a.to_string(), b.to_string())); self } /// Controls the missing newline hint. /// /// By default a special `\ No newline at end of file` marker is added to /// the output when a file is not terminated with a final newline. This can /// be disabled with this flag. pub fn missing_newline_hint(&mut self, yes: bool) -> &mut Self { self.missing_newline_hint = yes; self } /// Iterates over all hunks as configured. pub fn iter_hunks(&self) -> impl Iterator> { let diff = self.diff; let missing_newline_hint = self.missing_newline_hint; self.diff .grouped_ops(self.context_radius) .into_iter() .filter(|ops| !ops.is_empty()) .map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint)) } /// Write the unified diff as bytes to the output stream. pub fn to_writer(&self, mut w: W) -> Result<(), io::Error> where 'diff: 'old + 'new + 'bufs, { let mut header = self.header.as_ref(); for hunk in self.iter_hunks() { if let Some((old_file, new_file)) = header.take() { writeln!(w, "--- {}", old_file)?; writeln!(w, "+++ {}", new_file)?; } write!(w, "{}", hunk)?; } Ok(()) } fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self { if let Some((a, b)) = header { self.header(a, b); } self } } /// Unified diff hunk formatter. /// /// The `Display` this renders out a single unified diff's hunk. pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> { diff: &'diff TextDiff<'old, 'new, 'bufs, T>, ops: Vec, missing_newline_hint: bool, } impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> { /// Creates a new hunk for some operations. pub fn new( ops: Vec, diff: &'diff TextDiff<'old, 'new, 'bufs, T>, missing_newline_hint: bool, ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> { UnifiedDiffHunk { diff, ops, missing_newline_hint, } } /// Returns the header for the hunk. pub fn header(&self) -> UnifiedHunkHeader { UnifiedHunkHeader::new(&self.ops) } /// Returns all operations in the hunk. pub fn ops(&self) -> &[DiffOp] { &self.ops } /// Returns the value of the `missing_newline_hint` flag. pub fn missing_newline_hint(&self) -> bool { self.missing_newline_hint } /// Iterates over all changes in a hunk. pub fn iter_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> where 'x: 'slf + 'old + 'new, 'old: 'x, 'new: 'x, { AllChangesIter::new(self.diff.old_slices(), self.diff.new_slices(), self.ops()) } /// Write the hunk as bytes to the output stream. pub fn to_writer(&self, mut w: W) -> Result<(), io::Error> where 'diff: 'old + 'new + 'bufs, { for (idx, change) in self.iter_changes().enumerate() { if idx == 0 { writeln!(w, "{}", self.header())?; } write!(w, "{}", change.tag())?; w.write_all(change.value().as_bytes())?; if !self.diff.newline_terminated() { writeln!(w)?; } if self.diff.newline_terminated() && change.missing_newline() { writeln!(w, "{}", MissingNewlineHint(self.missing_newline_hint))?; } } Ok(()) } } impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> where 'diff: 'old + 'new + 'bufs, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for (idx, change) in self.iter_changes().enumerate() { if idx == 0 { writeln!(f, "{}", self.header())?; } write!(f, "{}{}", change.tag(), change.to_string_lossy())?; if !self.diff.newline_terminated() { writeln!(f)?; } if self.diff.newline_terminated() && change.missing_newline() { writeln!(f, "{}", MissingNewlineHint(self.missing_newline_hint))?; } } Ok(()) } } impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs, T> where 'diff: 'old + 'new + 'bufs, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut header = self.header.as_ref(); for hunk in self.iter_hunks() { if let Some((old_file, new_file)) = header.take() { writeln!(f, "--- {}", old_file)?; writeln!(f, "+++ {}", new_file)?; } write!(f, "{}", hunk)?; } Ok(()) } } /// Quick way to get a unified diff as string. /// /// `n` configures [`UnifiedDiff::context_radius`] and /// `header` configures [`UnifiedDiff::header`] when not `None`. pub fn unified_diff<'old, 'new>( alg: Algorithm, old: &'old str, new: &'new str, n: usize, header: Option<(&str, &str)>, ) -> String { TextDiff::configure() .algorithm(alg) .diff_lines(old, new) .unified_diff() .context_radius(n) .header_opt(header) .to_string() } #[test] fn test_unified_diff() { let diff = TextDiff::from_lines( "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ", "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\nS\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\no\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ", ); insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string()); } #[test] fn test_empty_unified_diff() { let diff = TextDiff::from_lines("abc", "abc"); assert_eq!(diff.unified_diff().header("a.txt", "b.txt").to_string(), ""); } #[test] fn test_unified_diff_newline_hint() { let diff = TextDiff::from_lines("a\n", "b"); insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string()); insta::assert_snapshot!(&diff .unified_diff() .missing_newline_hint(false) .header("a.txt", "b.txt") .to_string()); } similar-2.2.1/src/utils.rs000064400000000000000000000331741046102023000135730ustar 00000000000000//! Utilities for common diff related operations. //! //! This module provides specialized utilities and simplified diff operations //! for common operations. It's useful when you want to work with text diffs //! and you're interested in getting vectors of these changes directly. //! //! # Slice Remapping //! //! When working with [`TextDiff`] it's common that one takes advantage of the //! built-in tokenization of the differ. This for instance lets you do //! grapheme level diffs. This is implemented by the differ generating rather //! small slices of strings and running a diff algorithm over them. //! //! The downside of this is that all the [`DiffOp`] objects produced by the //! diffing algorithm encode operations on these rather small slices. For //! a lot of use cases this is not what one wants which can make this very //! inconvenient. This module provides a [`TextDiffRemapper`] which lets you //! map from the ranges that the [`TextDiff`] returns to the original input //! strings. For more information see [`TextDiffRemapper`]. //! //! # Simple Diff Functions //! //! This module provides a range of common test diff functions that will //! produce vectors of `(change_tag, value)` tuples. They will automatically //! optimize towards returning the most useful slice that one would expect for //! the type of diff performed. use std::hash::Hash; use std::ops::{Index, Range}; use crate::{ capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff, }; struct SliceRemapper<'x, T: ?Sized> { source: &'x T, indexes: Vec>, } impl<'x, T: DiffableStr + ?Sized> SliceRemapper<'x, T> { fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> { let indexes = slices .iter() .scan(0, |state, item| { let start = *state; let end = start + item.len(); *state = end; Some(start..end) }) .collect(); SliceRemapper { source, indexes } } fn slice(&self, range: Range) -> Option<&'x T> { let start = self.indexes.get(range.start)?.start; let end = self.indexes.get(range.end - 1)?.end; Some(self.source.slice(start..end)) } } impl<'x, T: DiffableStr + ?Sized> Index> for SliceRemapper<'x, T> { type Output = T; fn index(&self, range: Range) -> &Self::Output { self.slice(range).expect("out of bounds") } } /// A remapper that can remap diff ops to the original slices. /// /// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from /// two strings and the internal tokenization is used, this remapper can take /// a range in the tokenized sequences and remap it to the original string. /// This is particularly useful when you want to do things like character or /// grapheme level diffs but you want to not have to iterate over small sequences /// but large consequitive ones from the source. /// /// ```rust /// use similar::{ChangeTag, TextDiff}; /// use similar::utils::TextDiffRemapper; /// /// let old = "yo! foo bar baz"; /// let new = "yo! foo bor baz"; /// let diff = TextDiff::from_words(old, new); /// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); /// let changes: Vec<_> = diff.ops() /// .iter() /// .flat_map(move |x| remapper.iter_slices(x)) /// .collect(); /// /// assert_eq!(changes, vec![ /// (ChangeTag::Equal, "yo! foo "), /// (ChangeTag::Delete, "bar"), /// (ChangeTag::Insert, "bor"), /// (ChangeTag::Equal, " baz") /// ]); pub struct TextDiffRemapper<'x, T: ?Sized> { old: SliceRemapper<'x, T>, new: SliceRemapper<'x, T>, } impl<'x, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> { /// Creates a new remapper from strings and slices. pub fn new( old_slices: &[&'x T], new_slices: &[&'x T], old: &'x T, new: &'x T, ) -> TextDiffRemapper<'x, T> { TextDiffRemapper { old: SliceRemapper::new(old, old_slices), new: SliceRemapper::new(new, new_slices), } } /// Creates a new remapper from a text diff and the original strings. pub fn from_text_diff<'old, 'new, 'bufs>( diff: &TextDiff<'old, 'new, 'bufs, T>, old: &'x T, new: &'x T, ) -> TextDiffRemapper<'x, T> where 'old: 'x, 'new: 'x, { TextDiffRemapper { old: SliceRemapper::new(old, diff.old_slices()), new: SliceRemapper::new(new, diff.new_slices()), } } /// Slices into the old string. pub fn slice_old(&self, range: Range) -> Option<&'x T> { self.old.slice(range) } /// Slices into the new string. pub fn slice_new(&self, range: Range) -> Option<&'x T> { self.new.slice(range) } /// Given a diffop yields the changes it encodes against the original strings. /// /// This is the same as the [`DiffOp::iter_slices`] method. /// /// ## Panics /// /// This method can panic if the input strings passed to the constructor /// are incompatible with the input strings passed to the diffing algorithm. pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator { // note: this is equivalent to the code in `DiffOp::iter_slices`. It is // a copy/paste because the slicing currently cannot be well abstracted // because of lifetime issues caused by the `Index` trait. match *op { DiffOp::Equal { old_index, len, .. } => { Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len))) .into_iter() .chain(None.into_iter()) } DiffOp::Insert { new_index, new_len, .. } => Some(( ChangeTag::Insert, self.new.slice(new_index..new_index + new_len), )) .into_iter() .chain(None.into_iter()), DiffOp::Delete { old_index, old_len, .. } => Some(( ChangeTag::Delete, self.old.slice(old_index..old_index + old_len), )) .into_iter() .chain(None.into_iter()), DiffOp::Replace { old_index, old_len, new_index, new_len, } => Some(( ChangeTag::Delete, self.old.slice(old_index..old_index + old_len), )) .into_iter() .chain( Some(( ChangeTag::Insert, self.new.slice(new_index..new_index + new_len), )) .into_iter(), ), } .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds"))) } } /// Shortcut for diffing two slices. /// /// This function produces the diff of two slices and returns a vector /// with the changes. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_slices; /// /// let old = "foo\nbar\nbaz".lines().collect::>(); /// let new = "foo\nbar\nBAZ".lines().collect::>(); /// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![ /// (ChangeTag::Equal, &["foo", "bar"][..]), /// (ChangeTag::Delete, &["baz"][..]), /// (ChangeTag::Insert, &["BAZ"][..]), /// ]); /// ``` pub fn diff_slices<'x, T: PartialEq + Hash + Ord>( alg: Algorithm, old: &'x [T], new: &'x [T], ) -> Vec<(ChangeTag, &'x [T])> { capture_diff_slices(alg, old, new) .iter() .flat_map(|op| op.iter_slices(old, new)) .collect() } /// Shortcut for making a character level diff. /// /// This function produces the diff of two strings and returns a vector /// with the changes. It returns connected slices into the original string /// rather than character level slices. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_chars; /// /// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![ /// (ChangeTag::Equal, "foo"), /// (ChangeTag::Delete, "bar"), /// (ChangeTag::Insert, "BAR"), /// (ChangeTag::Equal, "baz"), /// ]); /// ``` pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>( alg: Algorithm, old: &'x T, new: &'x T, ) -> Vec<(ChangeTag, &'x T::Output)> { let old = old.as_diffable_str(); let new = new.as_diffable_str(); let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new); let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); diff.ops() .iter() .flat_map(move |x| remapper.iter_slices(x)) .collect() } /// Shortcut for making a word level diff. /// /// This function produces the diff of two strings and returns a vector /// with the changes. It returns connected slices into the original string /// rather than word level slices. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_words; /// /// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![ /// (ChangeTag::Equal, "foo "), /// (ChangeTag::Delete, "bar"), /// (ChangeTag::Insert, "bor"), /// (ChangeTag::Equal, " baz"), /// ]); /// ``` pub fn diff_words<'x, T: DiffableStrRef + ?Sized>( alg: Algorithm, old: &'x T, new: &'x T, ) -> Vec<(ChangeTag, &'x T::Output)> { let old = old.as_diffable_str(); let new = new.as_diffable_str(); let diff = TextDiff::configure().algorithm(alg).diff_words(old, new); let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); diff.ops() .iter() .flat_map(move |x| remapper.iter_slices(x)) .collect() } /// Shortcut for making a unicode word level diff. /// /// This function produces the diff of two strings and returns a vector /// with the changes. It returns connected slices into the original string /// rather than word level slices. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_unicode_words; /// /// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; /// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?"; /// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![ /// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "), /// (ChangeTag::Delete, "32.3"), /// (ChangeTag::Insert, "9.84"), /// (ChangeTag::Equal, " "), /// (ChangeTag::Delete, "feet"), /// (ChangeTag::Insert, "meters"), /// (ChangeTag::Equal, ", right?") /// ]); /// ``` /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>( alg: Algorithm, old: &'x T, new: &'x T, ) -> Vec<(ChangeTag, &'x T::Output)> { let old = old.as_diffable_str(); let new = new.as_diffable_str(); let diff = TextDiff::configure() .algorithm(alg) .diff_unicode_words(old, new); let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); diff.ops() .iter() .flat_map(move |x| remapper.iter_slices(x)) .collect() } /// Shortcut for making a grapheme level diff. /// /// This function produces the diff of two strings and returns a vector /// with the changes. It returns connected slices into the original string /// rather than grapheme level slices. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_graphemes; /// /// let old = "The flag of Austria is 🇦🇹"; /// let new = "The flag of Albania is 🇦🇱"; /// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![ /// (ChangeTag::Equal, "The flag of A"), /// (ChangeTag::Delete, "ustr"), /// (ChangeTag::Insert, "lban"), /// (ChangeTag::Equal, "ia is "), /// (ChangeTag::Delete, "🇦🇹"), /// (ChangeTag::Insert, "🇦🇱"), /// ]); /// ``` /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>( alg: Algorithm, old: &'x T, new: &'x T, ) -> Vec<(ChangeTag, &'x T::Output)> { let old = old.as_diffable_str(); let new = new.as_diffable_str(); let diff = TextDiff::configure() .algorithm(alg) .diff_graphemes(old, new); let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); diff.ops() .iter() .flat_map(move |x| remapper.iter_slices(x)) .collect() } /// Shortcut for making a line diff. /// /// This function produces the diff of two slices and returns a vector /// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a /// change tag for each line. /// /// ```rust /// use similar::{Algorithm, ChangeTag}; /// use similar::utils::diff_lines; /// /// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![ /// (ChangeTag::Equal, "foo\n"), /// (ChangeTag::Equal, "bar\n"), /// (ChangeTag::Equal, "baz\n"), /// (ChangeTag::Delete, "blah"), /// (ChangeTag::Insert, "blurgh"), /// ]); /// ``` pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>( alg: Algorithm, old: &'x T, new: &'x T, ) -> Vec<(ChangeTag, &'x T::Output)> { TextDiff::configure() .algorithm(alg) .diff_lines(old, new) .iter_all_changes() .map(|change| (change.tag(), change.value())) .collect() } #[test] fn test_remapper() { let a = "foo bar baz"; let words = a.tokenize_words(); dbg!(&words); let remap = SliceRemapper::new(a, &words); assert_eq!(remap.slice(0..3), Some("foo bar")); assert_eq!(remap.slice(1..3), Some(" bar")); assert_eq!(remap.slice(0..1), Some("foo")); assert_eq!(remap.slice(0..5), Some("foo bar baz")); assert_eq!(remap.slice(0..6), None); }