dissimilar-1.0.2/.cargo_vcs_info.json0000644000000001121367221054400132700ustar00{ "git": { "sha1": "f1194cbcc21cbcfb1fe358cc99ff85737a5fe809" } } dissimilar-1.0.2/.github/workflows/ci.yml010064400017500001750000000015051365432733000166150ustar0000000000000000name: CI on: push: pull_request: schedule: [cron: "40 1 * * *"] jobs: test: name: Rust ${{matrix.rust}} runs-on: ubuntu-latest strategy: fail-fast: false matrix: rust: [nightly, beta, stable] steps: - uses: actions/checkout@v2 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{matrix.rust}} - run: cargo test - run: cargo test --benches release if: matrix.rust == 'nightly' msrv: name: Rust 1.31.0 runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: dtolnay/rust-toolchain@1.31.0 - run: cargo check clippy: name: Clippy runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: dtolnay/rust-toolchain@clippy - run: cargo clippy -- -Dclippy::all dissimilar-1.0.2/.gitignore010064400017500001750000000000231363422642000140600ustar0000000000000000/target Cargo.lock dissimilar-1.0.2/Cargo.toml0000644000000017561367221054400113050ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "dissimilar" version = "1.0.2" authors = ["David Tolnay "] description = "Diff library with semantic cleanup, based on Google's diff-match-patch" documentation = "https://docs.rs/dissimilar" readme = "README.md" keywords = ["diff"] categories = ["algorithms", "text-processing"] license = "MIT OR Apache-2.0" repository = "https://github.com/dtolnay/dissimilar" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] dissimilar-1.0.2/Cargo.toml.orig010064400017500001750000000007421367221053300147670ustar0000000000000000[package] name = "dissimilar" version = "1.0.2" authors = ["David Tolnay "] edition = "2018" license = "MIT OR Apache-2.0" description = "Diff library with semantic cleanup, based on Google's diff-match-patch" repository = "https://github.com/dtolnay/dissimilar" documentation = "https://docs.rs/dissimilar" keywords = ["diff"] categories = ["algorithms", "text-processing"] readme = "README.md" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] dissimilar-1.0.2/LICENSE-APACHE010064400017500001750000000251371363422642000140310ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. dissimilar-1.0.2/LICENSE-MIT010064400017500001750000000017771363422642000135450ustar0000000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. dissimilar-1.0.2/README.md010064400017500001750000000062541367133260700133710ustar0000000000000000Dissimilar: diff library with semantic cleanup ============================================== [github](https://github.com/dtolnay/dissimilar) [crates.io](https://crates.io/crates/dissimilar) [docs.rs](https://docs.rs/dissimilar) [build status](https://github.com/dtolnay/dissimilar/actions?query=branch%3Amaster) This library is a port of the Diff component of [Diff Match Patch] to Rust. The diff implementation is based on [Myers' diff algorithm] but includes some [semantic cleanups] to increase human readability by factoring out commonalities which are likely to be coincidental. Diff Match Patch was originally built in 2006 to power Google Docs. [Diff Match Patch]: https://github.com/google/diff-match-patch [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf [semantic cleanups]: https://neil.fraser.name/writing/diff/ ```toml [dependencies] dissimilar = "1.0" ``` *Compiler support: requires rustc 1.31+* ## Interface Here is the entire API of the Rust implementation. It operates on borrowed strings and the return value of the diff algorithm is a vector of chunks pointing into slices of those input strings. ```rust pub enum Chunk<'a> { Equal(&'a str), Delete(&'a str), Insert(&'a str), } pub fn diff(text1: &str, text2: &str) -> Vec; ```
#### License Licensed under either of Apache License, Version 2.0 or MIT license at your option.
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in this crate by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. dissimilar-1.0.2/benches/bench.rs010064400017500001750000000005311363422642000151300ustar0000000000000000#![feature(test)] extern crate test; use dissimilar::diff; use std::{fs, io}; use test::Bencher; #[bench] fn bench(b: &mut Bencher) -> io::Result<()> { let document1 = fs::read_to_string("benches/document1.txt")?; let document2 = fs::read_to_string("benches/document2.txt")?; b.iter(|| diff(&document1, &document2)); Ok(()) } dissimilar-1.0.2/benches/document1.txt010064400017500001750000000312631363422642000161510ustar0000000000000000This is a '''list of newspapers published by [[Journal Register Company]]'''. The company owns daily and weekly newspapers, other print media properties and newspaper-affiliated local Websites in the [[U.S.]] states of [[Connecticut]], [[Michigan]], [[New York]], [[Ohio]] and [[Pennsylvania]], organized in six geographic "clusters":[http://www.journalregister.com/newspapers.html Journal Register Company: Our Newspapers], accessed February 10, 2008. == Capital-Saratoga == Three dailies, associated weeklies and [[pennysaver]]s in greater [[Albany, New York]]; also [http://www.capitalcentral.com capitalcentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. * ''The Oneida Daily Dispatch'' {{WS|oneidadispatch.com}} of [[Oneida, New York]] * ''[[The Record (Troy)|The Record]]'' {{WS|troyrecord.com}} of [[Troy, New York]] * ''[[The Saratogian]]'' {{WS|saratogian.com}} of [[Saratoga Springs, New York]] * Weeklies: ** ''Community News'' {{WS|cnweekly.com}} weekly of [[Clifton Park, New York]] ** ''Rome Observer'' of [[Rome, New York]] ** ''Life & Times of Utica'' of [[Utica, New York]] == Connecticut == Five dailies, associated weeklies and [[pennysaver]]s in the state of [[Connecticut]]; also [http://www.ctcentral.com CTcentral.com], [http://www.ctcarsandtrucks.com CTCarsAndTrucks.com] and [http://www.jobsinct.com JobsInCT.com]. * ''The Middletown Press'' {{WS|middletownpress.com}} of [[Middletown, Connecticut|Middletown]] * ''[[New Haven Register]]'' {{WS|newhavenregister.com}} of [[New Haven, Connecticut|New Haven]] * ''The Register Citizen'' {{WS|registercitizen.com}} of [[Torrington, Connecticut|Torrington]] * [[New Haven Register#Competitors|Elm City Newspapers]] {{WS|ctcentral.com}} ** ''The Advertiser'' of [[East Haven, Connecticut|East Haven]] ** ''Hamden Chronicle'' of [[Hamden, Connecticut|Hamden]] ** ''Milford Weekly'' of [[Milford, Connecticut|Milford]] ** ''The Orange Bulletin'' of [[Orange, Connecticut|Orange]] ** ''The Post'' of [[North Haven, Connecticut|North Haven]] ** ''Shelton Weekly'' of [[Shelton, Connecticut|Shelton]] ** ''The Stratford Bard'' of [[Stratford, Connecticut|Stratford]] ** ''Wallingford Voice'' of [[Wallingford, Connecticut|Wallingford]] ** ''West Haven News'' of [[West Haven, Connecticut|West Haven]] * Housatonic Publications ** ''The New Milford Times'' {{WS|newmilfordtimes.com}} of [[New Milford, Connecticut|New Milford]] ** ''The Brookfield Journal'' of [[Brookfield, Connecticut|Brookfield]] ** ''The Kent Good Times Dispatch'' of [[Kent, Connecticut|Kent]] ** ''The Bethel Beacon'' of [[Bethel, Connecticut|Bethel]] ** ''The Litchfield Enquirer'' of [[Litchfield, Connecticut|Litchfield]] ** ''Litchfield County Times'' of [[Litchfield, Connecticut|Litchfield]] * Imprint Newspapers {{WS|imprintnewspapers.com}} ** ''West Hartford News'' of [[West Hartford, Connecticut|West Hartford]] ** ''Windsor Journal'' of [[Windsor, Connecticut|Windsor]] ** ''Windsor Locks Journal'' of [[Windsor Locks, Connecticut|Windsor Locks]] ** ''Avon Post'' of [[Avon, Connecticut|Avon]] ** ''Farmington Post'' of [[Farmington, Connecticut|Farmington]] ** ''Simsbury Post'' of [[Simsbury, Connecticut|Simsbury]] ** ''Tri-Town Post'' of [[Burlington, Connecticut|Burlington]], [[Canton, Connecticut|Canton]] and [[Harwinton, Connecticut|Harwinton]] * Minuteman Publications ** ''[[Fairfield Minuteman]]'' of [[Fairfield, Connecticut|Fairfield]] ** ''The Westport Minuteman'' {{WS|westportminuteman.com}} of [[Westport, Connecticut|Westport]] * Shoreline Newspapers weeklies: ** ''Branford Review'' of [[Branford, Connecticut|Branford]] ** ''Clinton Recorder'' of [[Clinton, Connecticut|Clinton]] ** ''The Dolphin'' of [[Naval Submarine Base New London]] in [[New London, Connecticut|New London]] ** ''Main Street News'' {{WS|ctmainstreetnews.com}} of [[Essex, Connecticut|Essex]] ** ''Pictorial Gazette'' of [[Old Saybrook, Connecticut|Old Saybrook]] ** ''Regional Express'' of [[Colchester, Connecticut|Colchester]] ** ''Regional Standard'' of [[Colchester, Connecticut|Colchester]] ** ''Shoreline Times'' {{WS|shorelinetimes.com}} of [[Guilford, Connecticut|Guilford]] ** ''Shore View East'' of [[Madison, Connecticut|Madison]] ** ''Shore View West'' of [[Guilford, Connecticut|Guilford]] * Other weeklies: ** ''Registro'' {{WS|registroct.com}} of [[New Haven, Connecticut|New Haven]] ** ''Thomaston Express'' {{WS|thomastownexpress.com}} of [[Thomaston, Connecticut|Thomaston]] ** ''Foothills Traders'' {{WS|foothillstrader.com}} of Torrington, Bristol, Canton == Michigan == Four dailies, associated weeklies and [[pennysaver]]s in the state of [[Michigan]]; also [http://www.micentralhomes.com MIcentralhomes.com] and [http://www.micentralautos.com MIcentralautos.com] * ''[[Oakland Press]]'' {{WS|theoaklandpress.com}} of [[Oakland, Michigan|Oakland]] * ''Daily Tribune'' {{WS|dailytribune.com}} of [[Royal Oak, Michigan|Royal Oak]] * ''Macomb Daily'' {{WS|macombdaily.com}} of [[Mt. Clemens, Michigan|Mt. Clemens]] * ''[[Morning Sun]]'' {{WS|themorningsun.com}} of [[Mount Pleasant, Michigan|Mount Pleasant]] * Heritage Newspapers {{WS|heritage.com}} ** ''Belleville View'' ** ''Ile Camera'' ** ''Monroe Guardian'' ** ''Ypsilanti Courier'' ** ''News-Herald'' ** ''Press & Guide'' ** ''Chelsea Standard & Dexter Leader'' ** ''Manchester Enterprise'' ** ''Milan News-Leader'' ** ''Saline Reporter'' * Independent Newspapers {{WS|sourcenewspapers.com}} ** ''Advisor'' ** ''Source'' * Morning Star {{WS|morningstarpublishing.com}} ** ''Alma Reminder'' ** ''Alpena Star'' ** ''Antrim County News'' ** ''Carson City Reminder'' ** ''The Leader & Kalkaskian'' ** ''Ogemaw/Oscoda County Star'' ** ''Petoskey/Charlevoix Star'' ** ''Presque Isle Star'' ** ''Preview Community Weekly'' ** ''Roscommon County Star'' ** ''St. Johns Reminder'' ** ''Straits Area Star'' ** ''The (Edmore) Advertiser'' * Voice Newspapers {{WS|voicenews.com}} ** ''Armada Times'' ** ''Bay Voice'' ** ''Blue Water Voice'' ** ''Downriver Voice'' ** ''Macomb Township Voice'' ** ''North Macomb Voice'' ** ''Weekend Voice'' ** ''Suburban Lifestyles'' {{WS|suburbanlifestyles.com}} == Mid-Hudson == One daily, associated magazines in the [[Hudson River Valley]] of [[New York]]; also [http://www.midhudsoncentral.com MidHudsonCentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. * ''[[Daily Freeman]]'' {{WS|dailyfreeman.com}} of [[Kingston, New York]] == Ohio == Two dailies, associated magazines and three shared Websites, all in the state of [[Ohio]]: [http://www.allaroundcleveland.com AllAroundCleveland.com], [http://www.allaroundclevelandcars.com AllAroundClevelandCars.com] and [http://www.allaroundclevelandjobs.com AllAroundClevelandJobs.com]. * ''[[The News-Herald (Ohio)|The News-Herald]]'' {{WS|news-herald.com}} of [[Willoughby, Ohio|Willoughby]] * ''[[The Morning Journal]]'' {{WS|morningjournal.com}} of [[Lorain, Ohio|Lorain]] == Philadelphia area == Seven dailies and associated weeklies and magazines in [[Pennsylvania]] and [[New Jersey]], and associated Websites: [http://www.allaroundphilly.com AllAroundPhilly.com], [http://www.jobsinnj.com JobsInNJ.com], [http://www.jobsinpa.com JobsInPA.com], and [http://www.phillycarsearch.com PhillyCarSearch.com]. * ''The Daily Local'' {{WS|dailylocal.com}} of [[West Chester, Pennsylvania|West Chester]] * ''[[Delaware County Daily and Sunday Times]] {{WS|delcotimes.com}} of Primos * ''[[The Mercury (Pennsylvania)|The Mercury]]'' {{WS|pottstownmercury.com}} of [[Pottstown, Pennsylvania|Pottstown]] * ''The Phoenix'' {{WS|phoenixvillenews.com}} of [[Phoenixville, Pennsylvania|Phoenixville]] * ''[[The Reporter (Lansdale)|The Reporter]]'' {{WS|thereporteronline.com}} of [[Lansdale, Pennsylvania|Lansdale]] * ''The Times Herald'' {{WS|timesherald.com}} of [[Norristown, Pennsylvania|Norristown]] * ''[[The Trentonian]]'' {{WS|trentonian.com}} of [[Trenton, New Jersey]] * Weeklies ** ''El Latino Expreso'' of [[Trenton, New Jersey]] ** ''La Voz'' of [[Norristown, Pennsylvania]] ** ''The Village News'' of [[Downingtown, Pennsylvania]] ** ''The Times Record'' of [[Kennett Square, Pennsylvania]] ** ''The Tri-County Record'' {{WS|tricountyrecord.com}} of [[Morgantown, Pennsylvania]] ** ''News of Delaware County'' {{WS|newsofdelawarecounty.com}}of [[Havertown, Pennsylvania]] ** ''Main Line Times'' {{WS|mainlinetimes.com}}of [[Ardmore, Pennsylvania]] ** ''Penny Pincher'' of [[Pottstown, Pennsylvania]] ** ''Town Talk'' {{WS|towntalknews.com}} of [[Ridley, Pennsylvania]] * Chesapeake Publishing {{WS|pa8newsgroup.com}} ** ''Solanco Sun Ledger'' of [[Quarryville, Pennsylvania]] ** ''Columbia Ledger'' of [[Columbia, Pennsylvania]] ** ''Coatesville Ledger'' of [[Downingtown, Pennsylvania]] ** ''Parkesburg Post Ledger'' of [[Quarryville, Pennsylvania]] ** ''Downingtown Ledger'' of [[Downingtown, Pennsylvania]] ** ''The Kennett Paper'' of [[Kennett Square, Pennsylvania]] ** ''Avon Grove Sun'' of [[West Grove, Pennsylvania]] ** ''Oxford Tribune'' of [[Oxford, Pennsylvania]] ** ''Elizabethtown Chronicle'' of [[Elizabethtown, Pennsylvania]] ** ''Donegal Ledger'' of [[Donegal, Pennsylvania]] ** ''Chadds Ford Post'' of [[Chadds Ford, Pennsylvania]] ** ''The Central Record'' of [[Medford, New Jersey]] ** ''Maple Shade Progress'' of [[Maple Shade, New Jersey]] * Intercounty Newspapers {{WS|buckslocalnews.com}} ** ''The Review'' of Roxborough, Pennsylvania ** ''The Recorder'' of [[Conshohocken, Pennsylvania]] ** ''The Leader'' of [[Mount Airy, Pennsylvania|Mount Airy]] and West Oak Lake, Pennsylvania ** ''The Pennington Post'' of [[Pennington, New Jersey]] ** ''The Bristol Pilot'' of [[Bristol, Pennsylvania]] ** ''Yardley News'' of [[Yardley, Pennsylvania]] ** ''New Hope Gazette'' of [[New Hope, Pennsylvania]] ** ''Doylestown Patriot'' of [[Doylestown, Pennsylvania]] ** ''Newtown Advance'' of [[Newtown, Pennsylvania]] ** ''The Plain Dealer'' of [[Williamstown, New Jersey]] ** ''News Report'' of [[Sewell, New Jersey]] ** ''Record Breeze'' of [[Berlin, New Jersey]] ** ''Newsweekly'' of [[Moorestown, New Jersey]] ** ''Haddon Herald'' of [[Haddonfield, New Jersey]] ** ''New Egypt Press'' of [[New Egypt, New Jersey]] ** ''Community News'' of [[Pemberton, New Jersey]] ** ''Plymouth Meeting Journal'' of [[Plymouth Meeting, Pennsylvania]] ** ''Lafayette Hill Journal'' of [[Lafayette Hill, Pennsylvania]] * Montgomery Newspapers {{WS|montgomerynews.com}} ** ''Ambler Gazette'' of [[Ambler, Pennsylvania]] ** ''Central Bucks Life'' of [[Bucks County, Pennsylvania]] ** ''The Colonial'' of [[Plymouth Meeting, Pennsylvania]] ** ''Glenside News'' of [[Glenside, Pennsylvania]] ** ''The Globe'' of [[Lower Moreland Township, Pennsylvania]] ** ''Main Line Life'' of [[Ardmore, Pennsylvania]] ** ''Montgomery Life'' of [[Fort Washington, Pennsylvania]] ** ''North Penn Life'' of [[Lansdale, Pennsylvania]] ** ''Perkasie News Herald'' of [[Perkasie, Pennsylvania]] ** ''Public Spirit'' of [[Hatboro, Pennsylvania]] ** ''Souderton Independent'' of [[Souderton, Pennsylvania]] ** ''Springfield Sun'' of [[Springfield, Pennsylvania]] ** ''Spring-Ford Reporter'' of [[Royersford, Pennsylvania]] ** ''Times Chronicle'' of [[Jenkintown, Pennsylvania]] ** ''Valley Item'' of [[Perkiomenville, Pennsylvania]] ** ''Willow Grove Guide'' of [[Willow Grove, Pennsylvania]] * News Gleaner Publications (closed December 2008) {{WS|newsgleaner.com}} ** ''Life Newspapers'' of [[Philadelphia, Pennsylvania]] * Suburban Publications ** ''The Suburban & Wayne Times'' {{WS|waynesuburban.com}} of [[Wayne, Pennsylvania]] ** ''The Suburban Advertiser'' of [[Exton, Pennsylvania]] ** ''The King of Prussia Courier'' of [[King of Prussia, Pennsylvania]] * Press Newspapers {{WS|countypressonline.com}} ** ''County Press'' of [[Newtown Square, Pennsylvania]] ** ''Garnet Valley Press'' of [[Glen Mills, Pennsylvania]] ** ''Haverford Press'' of [[Newtown Square, Pennsylvania]] (closed January 2009) ** ''Hometown Press'' of [[Glen Mills, Pennsylvania]] (closed January 2009) ** ''Media Press'' of [[Newtown Square, Pennsylvania]] (closed January 2009) ** ''Springfield Press'' of [[Springfield, Pennsylvania]] * Berks-Mont Newspapers {{WS|berksmontnews.com}} ** ''The Boyertown Area Times'' of [[Boyertown, Pennsylvania]] ** ''The Kutztown Area Patriot'' of [[Kutztown, Pennsylvania]] ** ''The Hamburg Area Item'' of [[Hamburg, Pennsylvania]] ** ''The Southern Berks News'' of [[Exeter Township, Berks County, Pennsylvania]] ** ''The Free Press'' of [[Quakertown, Pennsylvania]] ** ''The Saucon News'' of [[Quakertown, Pennsylvania]] ** ''Westside Weekly'' of [[Reading, Pennsylvania]] * Magazines ** ''Bucks Co. Town & Country Living'' ** ''Chester Co. Town & Country Living'' ** ''Montomgery Co. Town & Country Living'' ** ''Garden State Town & Country Living'' ** ''Montgomery Homes'' ** ''Philadelphia Golfer'' ** ''Parents Express'' ** ''Art Matters'' {{JRC}} ==References== [[Category:Journal Register publications|*]] dissimilar-1.0.2/benches/document2.txt010064400017500001750000000272161363422642000161550ustar0000000000000000This is a '''list of newspapers published by [[Journal Register Company]]'''. The company owns daily and weekly newspapers, other print media properties and newspaper-affiliated local Websites in the [[U.S.]] states of [[Connecticut]], [[Michigan]], [[New York]], [[Ohio]], [[Pennsylvania]] and [[New Jersey]], organized in six geographic "clusters":[http://www.journalregister.com/publications.html Journal Register Company: Our Publications], accessed April 21, 2010. == Capital-Saratoga == Three dailies, associated weeklies and [[pennysaver]]s in greater [[Albany, New York]]; also [http://www.capitalcentral.com capitalcentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. * ''The Oneida Daily Dispatch'' {{WS|oneidadispatch.com}} of [[Oneida, New York]] * ''[[The Record (Troy)|The Record]]'' {{WS|troyrecord.com}} of [[Troy, New York]] * ''[[The Saratogian]]'' {{WS|saratogian.com}} of [[Saratoga Springs, New York]] * Weeklies: ** ''Community News'' {{WS|cnweekly.com}} weekly of [[Clifton Park, New York]] ** ''Rome Observer'' {{WS|romeobserver.com}} of [[Rome, New York]] ** ''WG Life '' {{WS|saratogian.com/wglife/}} of [[Wilton, New York]] ** ''Ballston Spa Life '' {{WS|saratogian.com/bspalife}} of [[Ballston Spa, New York]] ** ''Greenbush Life'' {{WS|troyrecord.com/greenbush}} of [[Troy, New York]] ** ''Latham Life'' {{WS|troyrecord.com/latham}} of [[Latham, New York]] ** ''River Life'' {{WS|troyrecord.com/river}} of [[Troy, New York]] == Connecticut == Three dailies, associated weeklies and [[pennysaver]]s in the state of [[Connecticut]]; also [http://www.ctcentral.com CTcentral.com], [http://www.ctcarsandtrucks.com CTCarsAndTrucks.com] and [http://www.jobsinct.com JobsInCT.com]. * ''The Middletown Press'' {{WS|middletownpress.com}} of [[Middletown, Connecticut|Middletown]] * ''[[New Haven Register]]'' {{WS|newhavenregister.com}} of [[New Haven, Connecticut|New Haven]] * ''The Register Citizen'' {{WS|registercitizen.com}} of [[Torrington, Connecticut|Torrington]] * Housatonic Publications ** ''The Housatonic Times'' {{WS|housatonictimes.com}} of [[New Milford, Connecticut|New Milford]] ** ''Litchfield County Times'' {{WS|countytimes.com}} of [[Litchfield, Connecticut|Litchfield]] * Minuteman Publications ** ''[[Fairfield Minuteman]]'' {{WS|fairfieldminuteman.com}}of [[Fairfield, Connecticut|Fairfield]] ** ''The Westport Minuteman'' {{WS|westportminuteman.com}} of [[Westport, Connecticut|Westport]] * Shoreline Newspapers ** ''The Dolphin'' {{WS|dolphin-news.com}} of [[Naval Submarine Base New London]] in [[New London, Connecticut|New London]] ** ''Shoreline Times'' {{WS|shorelinetimes.com}} of [[Guilford, Connecticut|Guilford]] * Foothills Media Group {{WS|foothillsmediagroup.com}} ** ''Thomaston Express'' {{WS|thomastonexpress.com}} of [[Thomaston, Connecticut|Thomaston]] ** ''Good News About Torrington'' {{WS|goodnewsabouttorrington.com}} of [[Torrington, Connecticut|Torrington]] ** ''Granby News'' {{WS|foothillsmediagroup.com/granby}} of [[Granby, Connecticut|Granby]] ** ''Canton News'' {{WS|foothillsmediagroup.com/canton}} of [[Canton, Connecticut|Canton]] ** ''Avon News'' {{WS|foothillsmediagroup.com/avon}} of [[Avon, Connecticut|Avon]] ** ''Simsbury News'' {{WS|foothillsmediagroup.com/simsbury}} of [[Simsbury, Connecticut|Simsbury]] ** ''Litchfield News'' {{WS|foothillsmediagroup.com/litchfield}} of [[Litchfield, Connecticut|Litchfield]] ** ''Foothills Trader'' {{WS|foothillstrader.com}} of Torrington, Bristol, Canton * Other weeklies ** ''The Milford-Orange Bulletin'' {{WS|ctbulletin.com}} of [[Orange, Connecticut|Orange]] ** ''The Post-Chronicle'' {{WS|ctpostchronicle.com}} of [[North Haven, Connecticut|North Haven]] ** ''West Hartford News'' {{WS|westhartfordnews.com}} of [[West Hartford, Connecticut|West Hartford]] * Magazines ** ''The Connecticut Bride'' {{WS|connecticutmag.com}} ** ''Connecticut Magazine'' {{WS|theconnecticutbride.com}} ** ''Passport Magazine'' {{WS|passport-mag.com}} == Michigan == Four dailies, associated weeklies and [[pennysaver]]s in the state of [[Michigan]]; also [http://www.micentralhomes.com MIcentralhomes.com] and [http://www.micentralautos.com MIcentralautos.com] * ''[[Oakland Press]]'' {{WS|theoaklandpress.com}} of [[Oakland, Michigan|Oakland]] * ''Daily Tribune'' {{WS|dailytribune.com}} of [[Royal Oak, Michigan|Royal Oak]] * ''Macomb Daily'' {{WS|macombdaily.com}} of [[Mt. Clemens, Michigan|Mt. Clemens]] * ''[[Morning Sun]]'' {{WS|themorningsun.com}} of [[Mount Pleasant, Michigan|Mount Pleasant]] * Heritage Newspapers {{WS|heritage.com}} ** ''Belleville View'' {{WS|bellevilleview.com}} ** ''Ile Camera'' {{WS|thenewsherald.com/ile_camera}} ** ''Monroe Guardian'' {{WS|monreguardian.com}} ** ''Ypsilanti Courier'' {{WS|ypsilanticourier.com}} ** ''News-Herald'' {{WS|thenewsherald.com}} ** ''Press & Guide'' {{WS|pressandguide.com}} ** ''Chelsea Standard & Dexter Leader'' {{WS|chelseastandard.com}} ** ''Manchester Enterprise'' {{WS|manchesterguardian.com}} ** ''Milan News-Leader'' {{WS|milannews.com}} ** ''Saline Reporter'' {{WS|salinereporter.com}} * Independent Newspapers ** ''Advisor'' {{WS|sourcenewspapers.com}} ** ''Source'' {{WS|sourcenewspapers.com}} * Morning Star {{WS|morningstarpublishing.com}} ** ''The Leader & Kalkaskian'' {{WS|leaderandkalkaskian.com}} ** ''Grand Traverse Insider'' {{WS|grandtraverseinsider.com}} ** ''Alma Reminder'' ** ''Alpena Star'' ** ''Ogemaw/Oscoda County Star'' ** ''Presque Isle Star'' ** ''St. Johns Reminder'' * Voice Newspapers {{WS|voicenews.com}} ** ''Armada Times'' ** ''Bay Voice'' ** ''Blue Water Voice'' ** ''Downriver Voice'' ** ''Macomb Township Voice'' ** ''North Macomb Voice'' ** ''Weekend Voice'' == Mid-Hudson == One daily, associated magazines in the [[Hudson River Valley]] of [[New York]]; also [http://www.midhudsoncentral.com MidHudsonCentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. * ''[[Daily Freeman]]'' {{WS|dailyfreeman.com}} of [[Kingston, New York]] * ''Las Noticias'' {{WS|lasnoticiasny.com}} of [[Kingston, New York]] == Ohio == Two dailies, associated magazines and three shared Websites, all in the state of [[Ohio]]: [http://www.allaroundcleveland.com AllAroundCleveland.com], [http://www.allaroundclevelandcars.com AllAroundClevelandCars.com] and [http://www.allaroundclevelandjobs.com AllAroundClevelandJobs.com]. * ''[[The News-Herald (Ohio)|The News-Herald]]'' {{WS|news-herald.com}} of [[Willoughby, Ohio|Willoughby]] * ''[[The Morning Journal]]'' {{WS|morningjournal.com}} of [[Lorain, Ohio|Lorain]] * ''El Latino Expreso'' {{WS|lorainlatino.com}} of [[Lorain, Ohio|Lorain]] == Philadelphia area == Seven dailies and associated weeklies and magazines in [[Pennsylvania]] and [[New Jersey]], and associated Websites: [http://www.allaroundphilly.com AllAroundPhilly.com], [http://www.jobsinnj.com JobsInNJ.com], [http://www.jobsinpa.com JobsInPA.com], and [http://www.phillycarsearch.com PhillyCarSearch.com]. * ''[[The Daily Local News]]'' {{WS|dailylocal.com}} of [[West Chester, Pennsylvania|West Chester]] * ''[[Delaware County Daily and Sunday Times]] {{WS|delcotimes.com}} of Primos [[Upper Darby Township, Pennsylvania]] * ''[[The Mercury (Pennsylvania)|The Mercury]]'' {{WS|pottstownmercury.com}} of [[Pottstown, Pennsylvania|Pottstown]] * ''[[The Reporter (Lansdale)|The Reporter]]'' {{WS|thereporteronline.com}} of [[Lansdale, Pennsylvania|Lansdale]] * ''The Times Herald'' {{WS|timesherald.com}} of [[Norristown, Pennsylvania|Norristown]] * ''[[The Trentonian]]'' {{WS|trentonian.com}} of [[Trenton, New Jersey]] * Weeklies * ''The Phoenix'' {{WS|phoenixvillenews.com}} of [[Phoenixville, Pennsylvania]] ** ''El Latino Expreso'' {{WS|njexpreso.com}} of [[Trenton, New Jersey]] ** ''La Voz'' {{WS|lavozpa.com}} of [[Norristown, Pennsylvania]] ** ''The Tri County Record'' {{WS|tricountyrecord.com}} of [[Morgantown, Pennsylvania]] ** ''Penny Pincher'' {{WS|pennypincherpa.com}}of [[Pottstown, Pennsylvania]] * Chesapeake Publishing {{WS|southernchestercountyweeklies.com}} ** ''The Kennett Paper'' {{WS|kennettpaper.com}} of [[Kennett Square, Pennsylvania]] ** ''Avon Grove Sun'' {{WS|avongrovesun.com}} of [[West Grove, Pennsylvania]] ** ''The Central Record'' {{WS|medfordcentralrecord.com}} of [[Medford, New Jersey]] ** ''Maple Shade Progress'' {{WS|mapleshadeprogress.com}} of [[Maple Shade, New Jersey]] * Intercounty Newspapers {{WS|buckslocalnews.com}} {{WS|southjerseylocalnews.com}} ** ''The Pennington Post'' {{WS|penningtonpost.com}} of [[Pennington, New Jersey]] ** ''The Bristol Pilot'' {{WS|bristolpilot.com}} of [[Bristol, Pennsylvania]] ** ''Yardley News'' {{WS|yardleynews.com}} of [[Yardley, Pennsylvania]] ** ''Advance of Bucks County'' {{WS|advanceofbucks.com}} of [[Newtown, Pennsylvania]] ** ''Record Breeze'' {{WS|recordbreeze.com}} of [[Berlin, New Jersey]] ** ''Community News'' {{WS|sjcommunitynews.com}} of [[Pemberton, New Jersey]] * Montgomery Newspapers {{WS|montgomerynews.com}} ** ''Ambler Gazette'' {{WS|amblergazette.com}} of [[Ambler, Pennsylvania]] ** ''The Colonial'' {{WS|colonialnews.com}} of [[Plymouth Meeting, Pennsylvania]] ** ''Glenside News'' {{WS|glensidenews.com}} of [[Glenside, Pennsylvania]] ** ''The Globe'' {{WS|globenewspaper.com}} of [[Lower Moreland Township, Pennsylvania]] ** ''Montgomery Life'' {{WS|montgomerylife.com}} of [[Fort Washington, Pennsylvania]] ** ''North Penn Life'' {{WS|northpennlife.com}} of [[Lansdale, Pennsylvania]] ** ''Perkasie News Herald'' {{WS|perkasienewsherald.com}} of [[Perkasie, Pennsylvania]] ** ''Public Spirit'' {{WS|thepublicspirit.com}} of [[Hatboro, Pennsylvania]] ** ''Souderton Independent'' {{WS|soudertonindependent.com}} of [[Souderton, Pennsylvania]] ** ''Springfield Sun'' {{WS|springfieldsun.com}} of [[Springfield, Pennsylvania]] ** ''Spring-Ford Reporter'' {{WS|springfordreporter.com}} of [[Royersford, Pennsylvania]] ** ''Times Chronicle'' {{WS|thetimeschronicle.com}} of [[Jenkintown, Pennsylvania]] ** ''Valley Item'' {{WS|valleyitem.com}} of [[Perkiomenville, Pennsylvania]] ** ''Willow Grove Guide'' {{WS|willowgroveguide.com}} of [[Willow Grove, Pennsylvania]] ** ''The Review'' {{WS|roxreview.com}} of [[Roxborough, Philadelphia, Pennsylvania]] * Main Line Media News {{WS|mainlinemedianews.com}} ** ''Main Line Times'' {{WS|mainlinetimes.com}} of [[Ardmore, Pennsylvania]] ** ''Main Line Life'' {{WS|mainlinelife.com}} of [[Ardmore, Pennsylvania]] ** ''The King of Prussia Courier'' {{WS|kingofprussiacourier.com}} of [[King of Prussia, Pennsylvania]] * Delaware County News Network {{WS|delconewsnetwork.com}} ** ''News of Delaware County'' {{WS|newsofdelawarecounty.com}} of [[Havertown, Pennsylvania]] ** ''County Press'' {{WS|countypressonline.com}} of [[Newtown Square, Pennsylvania]] ** ''Garnet Valley Press'' {{WS|countypressonline.com}} of [[Glen Mills, Pennsylvania]] ** ''Springfield Press'' {{WS|countypressonline.com}} of [[Springfield, Pennsylvania]] ** ''Town Talk'' {{WS|towntalknews.com}} of [[Ridley, Pennsylvania]] * Berks-Mont Newspapers {{WS|berksmontnews.com}} ** ''The Boyertown Area Times'' {{WS|berksmontnews.com/boyertown_area_times}} of [[Boyertown, Pennsylvania]] ** ''The Kutztown Area Patriot'' {{WS|berksmontnews.com/kutztown_area_patriot}} of [[Kutztown, Pennsylvania]] ** ''The Hamburg Area Item'' {{WS|berksmontnews.com/hamburg_area_item}} of [[Hamburg, Pennsylvania]] ** ''The Southern Berks News'' {{WS|berksmontnews.com/southern_berks_news}} of [[Exeter Township, Berks County, Pennsylvania]] ** ''Community Connection'' {{WS|berksmontnews.com/community_connection}} of [[Boyertown, Pennsylvania]] * Magazines ** ''Bucks Co. Town & Country Living'' {{WS|buckscountymagazine.com}} ** ''Parents Express'' {{WS|parents-express.com}} ** ''Real Men, Rednecks'' {{WS|realmenredneck.com}} {{JRC}} ==References== [[Category:Journal Register publications|*]] dissimilar-1.0.2/src/find.rs010064400017500001750000000222411363422642000141530ustar0000000000000000// The strstr implementation in this file is extracted from the Rust standard // library's str::find. The algorithm works for arbitrary &[u8] haystack and // needle but is only exposed by the standard library on UTF-8 strings. // // https://github.com/rust-lang/rust/blob/1.40.0/src/libcore/str/pattern.rs // // --- // // This is the Two-Way search algorithm, which was introduced in the paper: // Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675. // // Here's some background information. // // A *word* is a string of symbols. The *length* of a word should be a familiar // notion, and here we denote it for any word x by |x|. (We also allow for the // possibility of the *empty word*, a word of length zero.) // // If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be // a *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == // x[i+p]. For example, both 1 and 2 are periods for the string "aa". As another // example, the only period of the string "abcd" is 4. // // We denote by period(x) the *smallest* period of x (provided that x is // non-empty). This is always well-defined since every non-empty word x has at // least one period, |x|. We sometimes call this *the period* of x. // // If u, v and x are words such that x = uv, where uv is the concatenation of u // and v, then we say that (u, v) is a *factorization* of x. // // Let (u, v) be a factorization for a word x. Then if w is a non-empty word // such that both of the following hold // // - either w is a suffix of u or u is a suffix of w // - either w is a prefix of v or v is a prefix of w // // then w is said to be a *repetition* for the factorization (u, v). // // Just to unpack this, there are four possibilities here. Let w = "abc". Then // we might have: // // - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde") // - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab") // - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi") // - u is a suffix of w and v is a prefix of w. ex: ("bc", "a") // // Note that the word vu is a repetition for any factorization (u,v) of x = uv, // so every factorization has at least one repetition. // // If x is a string and (u, v) is a factorization for x, then a *local period* // for (u, v) is an integer r such that there is some word w such that |w| = r // and w is a repetition for (u, v). // // We denote by local_period(u, v) the smallest local period of (u, v). We // sometimes call this *the local period* of (u, v). Provided that x = uv is // non-empty, this is well-defined (because each non-empty word has at least one // factorization, as noted above). // // It can be proven that the following is an equivalent definition of a local // period for a factorization (u, v): any positive integer r such that x[i] == // x[i+r] for all i such that |u| - r <= i <= |u| - 1 and such that both x[i] // and x[i+r] are defined. (i.e., i > 0 and i + r < |x|). // // Using the above reformulation, it is easy to prove that // // 1 <= local_period(u, v) <= period(uv) // // A factorization (u, v) of x such that local_period(u,v) = period(x) is called // a *critical factorization*. // // The algorithm hinges on the following theorem, which is stated without proof: // // **Critical Factorization Theorem** Any word x has at least one critical // factorization (u, v) such that |u| < period(x). // // The purpose of maximal_suffix is to find such a critical factorization. // // If the period is short, compute another factorization x = u' v' to use for // reverse search, chosen instead so that |v'| < period(x). use std::cmp; use std::usize; pub fn find(haystack: &[u8], needle: &[u8]) -> Option { assert!(!needle.is_empty()); // crit_pos: critical factorization index let (crit_pos_false, period_false) = maximal_suffix(needle, false); let (crit_pos_true, period_true) = maximal_suffix(needle, true); let (crit_pos, mut period) = if crit_pos_false > crit_pos_true { (crit_pos_false, period_false) } else { (crit_pos_true, period_true) }; // Byteset is an extension (not part of the two way algorithm); it is a // 64-bit "fingerprint" where each set bit j corresponds to a (byte & 63) == // j present in the needle. let byteset; // Index into needle before which we have already matched. let mut memory; // A particularly readable explanation of what's going on here can be found // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically // see the code for "Algorithm CP" on p. 323. // // What's going on is we have some critical factorization (u, v) of the // needle, and we want to determine whether u is a suffix of &v[..period]. // If it is, we use "Algorithm CP1". Otherwise we use "Algorithm CP2", which // is optimized for when the period of the needle is large. let long_period = needle[..crit_pos] != needle[period..period + crit_pos]; if long_period { // Long period case -- we have an approximation to the actual period, // and don't use memorization. // // Approximate the period by lower bound max(|u|, |v|) + 1. period = cmp::max(crit_pos, needle.len() - crit_pos) + 1; byteset = byteset_create(needle); // Dummy value to signify that the period is long. memory = usize::MAX; } else { // Short period case -- the period is exact. byteset = byteset_create(&needle[..period]); memory = 0; } // One of the main ideas of Two-Way is that we factorize the needle into two // halves, (u, v), and begin trying to find v in the haystack by scanning // left to right. If v matches, we try to match u by scanning right to left. // How far we can jump when we encounter a mismatch is all based on the fact // that (u, v) is a critical factorization for the needle. let mut position = 0; let needle_last = needle.len() - 1; 'search: loop { // Check that we have room to search in. position + needle_last cannot // overflow if we assume slices are bounded by isize's range. let tail_byte = *haystack.get(position + needle_last)?; // Quickly skip by large portions unrelated to our substring. if !byteset_contains(byteset, tail_byte) { position += needle.len(); if !long_period { memory = 0; } continue 'search; } // See if the right part of the needle matches. let start = if long_period { crit_pos } else { cmp::max(crit_pos, memory) }; for i in start..needle.len() { if needle[i] != haystack[position + i] { position += i - crit_pos + 1; if !long_period { memory = 0; } continue 'search; } } // See if the left part of the needle matches. let start = if long_period { 0 } else { memory }; for i in (start..crit_pos).rev() { if needle[i] != haystack[position + i] { position += period; if !long_period { memory = needle.len() - period; } continue 'search; } } // We have found a match! return Some(position); } } fn byteset_create(bytes: &[u8]) -> u64 { bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a) } fn byteset_contains(byteset: u64, byte: u8) -> bool { (byteset >> ((byte & 0x3f) as usize)) & 1 != 0 } // Compute the maximal suffix of `arr`. // // The maximal suffix is a possible critical factorization (u, v) of `arr`. // // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the // period of v. // // `order_greater` determines if lexical order is `<` or `>`. Both // orders must be computed -- the ordering with the largest `i` gives // a critical factorization. // // For long period cases, the resulting period is not exact (it is too short). fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) { let mut left = 0; // Corresponds to i in the paper let mut right = 1; // Corresponds to j in the paper let mut offset = 0; // Corresponds to k in the paper, but starting at 0 // to match 0-based indexing. let mut period = 1; // Corresponds to p in the paper while let Some(&a) = arr.get(right + offset) { // `left` will be inbounds when `right` is. let b = arr[left + offset]; if (a < b && !order_greater) || (a > b && order_greater) { // Suffix is smaller, period is entire prefix so far. right += offset + 1; offset = 0; period = right - left; } else if a == b { // Advance through repetition of the current period. if offset + 1 == period { right += offset + 1; offset = 0; } else { offset += 1; } } else { // Suffix is larger, start over from current location. left = right; right += 1; offset = 0; period = 1; } } (left, period) } dissimilar-1.0.2/src/lib.rs010064400017500001750000001032511367221053600140050ustar0000000000000000//! [![github]](https://github.com/dtolnay/dissimilar) [![crates-io]](https://crates.io/crates/dissimilar) [![docs-rs]](https://docs.rs/dissimilar) //! //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust //! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo= //! //!
//! //! ## Diff library with semantic cleanup, based on Google's diff-match-patch //! //! This library is a port of the Diff component of [Diff Match Patch] to Rust. //! The diff implementation is based on [Myers' diff algorithm] but includes //! some [semantic cleanups] to increase human readability by factoring out //! commonalities which are likely to be coincidental. //! //! Diff Match Patch was originally built in 2006 to power Google Docs. //! //! # Interface //! //! Here is the entire API of the Rust implementation. It operates on borrowed //! strings and the return value of the diff algorithm is a vector of chunks //! pointing into slices of those input strings. //! //! ``` //! pub enum Chunk<'a> { //! Equal(&'a str), //! Delete(&'a str), //! Insert(&'a str), //! } //! //! # const IGNORE: &str = stringify! { //! pub fn diff(text1: &str, text2: &str) -> Vec; //! # }; //! ``` //! //! [Diff Match Patch]: https://github.com/google/diff-match-patch //! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf //! [semantic cleanups]: https://neil.fraser.name/writing/diff/ #![doc(html_root_url = "https://docs.rs/dissimilar/1.0.2")] #![allow( clippy::blocks_in_if_conditions, clippy::collapsible_if, clippy::comparison_chain, clippy::new_without_default )] mod find; mod range; #[cfg(test)] mod tests; use crate::range::{bytes, str, Range}; use std::cmp; use std::collections::VecDeque; use std::fmt::{self, Debug}; #[derive(Copy, Clone, PartialEq, Eq)] pub enum Chunk<'a> { Equal(&'a str), Delete(&'a str), Insert(&'a str), } #[derive(Copy, Clone)] enum Diff<'a, 'b> { Equal(Range<'a>, Range<'b>), Delete(Range<'a>), Insert(Range<'b>), } impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> { fn text(&self) -> Range<'tmp> { match *self { Diff::Equal(range, _) | Diff::Delete(range) | Diff::Insert(range) => range, } } fn grow_left(&mut self, increment: usize) { self.for_each(|range| { range.offset -= increment; range.len += increment; }); } fn grow_right(&mut self, increment: usize) { self.for_each(|range| range.len += increment); } fn shift_left(&mut self, increment: usize) { self.for_each(|range| range.offset -= increment); } fn shift_right(&mut self, increment: usize) { self.for_each(|range| range.offset += increment); } fn for_each(&mut self, f: impl Fn(&mut Range)) { match self { Diff::Equal(range1, range2) => { f(range1); f(range2); } Diff::Delete(range) => f(range), Diff::Insert(range) => f(range), } } } pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec> { let text1 = Range::new(text1, ..); let text2 = Range::new(text2, ..); let mut solution = main(text1, text2); cleanup_char_boundary(&mut solution); cleanup_semantic(&mut solution); cleanup_merge(&mut solution); solution.diffs.into_iter().map(Chunk::from).collect() } struct Solution<'a, 'b> { text1: Range<'a>, text2: Range<'b>, diffs: Vec>, utf8: bool, } fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> { let whole1 = text1; let whole2 = text2; // Trim off common prefix. let common_prefix_len = common_prefix_bytes(text1, text2); let common_prefix = Diff::Equal( text1.substring(..common_prefix_len), text2.substring(..common_prefix_len), ); text1 = text1.substring(common_prefix_len..); text2 = text2.substring(common_prefix_len..); // Trim off common suffix. let common_suffix_len = common_suffix_bytes(text1, text2); let common_suffix = Diff::Equal( text1.substring(text1.len - common_suffix_len..), text2.substring(text2.len - common_suffix_len..), ); text1 = text1.substring(..text1.len - common_suffix_len); text2 = text2.substring(..text2.len - common_suffix_len); // Compute the diff on the middle block. let mut solution = Solution { text1: whole1, text2: whole2, diffs: compute(text1, text2), utf8: false, }; // Restore the prefix and suffix. if common_prefix_len > 0 { solution.diffs.insert(0, common_prefix); } if common_suffix_len > 0 { solution.diffs.push(common_suffix); } cleanup_merge(&mut solution); solution } // Find the differences between two texts. Assumes that the texts do not have // any common prefix or suffix. fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec> { match (text1.is_empty(), text2.is_empty()) { (true, true) => return Vec::new(), (true, false) => return vec![Diff::Insert(text2)], (false, true) => return vec![Diff::Delete(text1)], (false, false) => {} } // Check for entire shorter text inside the longer text. if text1.len > text2.len { if let Some(i) = text1.find(text2) { return vec![ Diff::Delete(text1.substring(..i)), Diff::Equal(text1.substring(i..i + text2.len), text2), Diff::Delete(text1.substring(i + text2.len..)), ]; } } else { if let Some(i) = text2.find(text1) { return vec![ Diff::Insert(text2.substring(..i)), Diff::Equal(text1, text2.substring(i..i + text1.len)), Diff::Insert(text2.substring(i + text1.len..)), ]; } } if text1.len == 1 || text2.len == 1 { // Single character string. // After the previous check, the character can't be an equality. return vec![Diff::Delete(text1), Diff::Insert(text2)]; } bisect(text1, text2) } // Find the 'middle snake' of a diff, split the problem in two and return the // recursively constructed diff. // // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec> { let max_d = (text1.len + text2.len + 1) / 2; let v_offset = max_d; let v_len = 2 * max_d; let mut v1 = vec![-1isize; v_len]; let mut v2 = vec![-1isize; v_len]; v1[v_offset + 1] = 0; v2[v_offset + 1] = 0; let delta = text1.len as isize - text2.len as isize; // If the total number of characters is odd, then the front path will // collide with the reverse path. let front = delta % 2 != 0; // Offsets for start and end of k loop. // Prevents mapping of space beyond the grid. let mut k1start = 0; let mut k1end = 0; let mut k2start = 0; let mut k2end = 0; for d in 0..max_d as isize { // Walk the front path one step. let mut k1 = -d + k1start; while k1 <= d - k1end { let k1_offset = (v_offset as isize + k1) as usize; let mut x1 = if k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { v1[k1_offset + 1] } else { v1[k1_offset - 1] + 1 } as usize; let mut y1 = (x1 as isize - k1) as usize; if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) { let advance = common_prefix_bytes(s1, s2); x1 += advance; y1 += advance; } v1[k1_offset] = x1 as isize; if x1 > text1.len { // Ran off the right of the graph. k1end += 2; } else if y1 > text2.len { // Ran off the bottom of the graph. k1start += 2; } else if front { let k2_offset = v_offset as isize + delta - k1; if k2_offset >= 0 && k2_offset < v_len as isize && v2[k2_offset as usize] != -1 { // Mirror x2 onto top-left coordinate system. let x2 = text1.len as isize - v2[k2_offset as usize]; if x1 as isize >= x2 { // Overlap detected. return bisect_split(text1, text2, x1, y1); } } } k1 += 2; } // Walk the reverse path one step. let mut k2 = -d + k2start; while k2 <= d - k2end { let k2_offset = (v_offset as isize + k2) as usize; let mut x2 = if k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { v2[k2_offset + 1] } else { v2[k2_offset - 1] + 1 } as usize; let mut y2 = (x2 as isize - k2) as usize; if x2 < text1.len && y2 < text2.len { let advance = common_suffix_bytes( text1.substring(..text1.len - x2), text2.substring(..text2.len - y2), ); x2 += advance; y2 += advance; } v2[k2_offset] = x2 as isize; if x2 > text1.len { // Ran off the left of the graph. k2end += 2; } else if y2 > text2.len { // Ran off the top of the graph. k2start += 2; } else if !front { let k1_offset = v_offset as isize + delta - k2; if k1_offset >= 0 && k1_offset < v_len as isize && v1[k1_offset as usize] != -1 { let x1 = v1[k1_offset as usize] as usize; let y1 = v_offset + x1 - k1_offset as usize; // Mirror x2 onto top-left coordinate system. x2 = text1.len - x2; if x1 >= x2 { // Overlap detected. return bisect_split(text1, text2, x1, y1); } } } k2 += 2; } } // Number of diffs equals number of characters, no commonality at all. vec![Diff::Delete(text1), Diff::Insert(text2)] } // Given the location of the 'middle snake', split the diff in two parts and // recurse. fn bisect_split<'a, 'b>( text1: Range<'a>, text2: Range<'b>, x: usize, y: usize, ) -> Vec> { let (text1a, text1b) = text1.split_at(x); let (text2a, text2b) = text2.split_at(y); // Compute both diffs serially. let mut diffs = main(text1a, text2a).diffs; diffs.extend(main(text1b, text2b).diffs); diffs } // Determine the length of the common prefix of two strings. fn common_prefix(text1: Range, text2: Range) -> usize { for ((i, ch1), ch2) in text1.char_indices().zip(text2.chars()) { if ch1 != ch2 { return i; } } cmp::min(text1.len, text2.len) } // Determine the length of the common suffix of two strings. fn common_suffix(text1: Range, text2: Range) -> usize { for ((i, ch1), ch2) in text1.char_indices().rev().zip(text2.chars().rev()) { if ch1 != ch2 { return text1.len - i - ch1.len_utf8(); } } cmp::min(text1.len, text2.len) } fn common_prefix_bytes(text1: Range, text2: Range) -> usize { for (i, (b1, b2)) in text1.bytes().zip(text2.bytes()).enumerate() { if b1 != b2 { return i; } } cmp::min(text1.len, text2.len) } fn common_suffix_bytes(text1: Range, text2: Range) -> usize { for (i, (b1, b2)) in text1.bytes().rev().zip(text2.bytes().rev()).enumerate() { if b1 != b2 { return i; } } cmp::min(text1.len, text2.len) } // Determine if the suffix of one string is the prefix of another. // // Returns the number of characters common to the end of the first string and // the start of the second string. fn common_overlap(mut text1: Range, mut text2: Range) -> usize { // Eliminate the null case. if text1.is_empty() || text2.is_empty() { return 0; } // Truncate the longer string. if text1.len > text2.len { text1 = text1.substring(text1.len - text2.len..); } else if text1.len < text2.len { text2 = text2.substring(..text1.len); } // Quick check for the worst case. if bytes(text1) == bytes(text2) { return text1.len; } // Start by looking for a single character match // and increase length until no match is found. // Performance analysis: https://neil.fraser.name/news/2010/11/04/ let mut best = 0; let mut length = 1; loop { let pattern = text1.substring(text1.len - length..); let found = match text2.find(pattern) { Some(found) => found, None => return best, }; length += found; if found == 0 || bytes(text1.substring(text1.len - length..)) == bytes(text2.substring(..length)) { best = length; length += 1; } } } fn cleanup_char_boundary(solution: &mut Solution) { fn boundary_down(doc: &str, pos: usize) -> usize { let mut adjust = 0; while !doc.is_char_boundary(pos - adjust) { adjust += 1; } adjust } fn boundary_up(doc: &str, pos: usize) -> usize { let mut adjust = 0; while !doc.is_char_boundary(pos + adjust) { adjust += 1; } adjust } for diff in &mut solution.diffs { match diff { Diff::Equal(range1, range2) => { let adjust = boundary_up(range1.doc, range1.offset); range1.offset += adjust; range1.len -= adjust; range2.offset += adjust; range2.len -= adjust; let adjust = boundary_down(range1.doc, range1.offset + range1.len); range1.len -= adjust; range2.len -= adjust; } Diff::Delete(range) => { let adjust = boundary_down(range.doc, range.offset); range.offset -= adjust; range.len += adjust; let adjust = boundary_up(range.doc, range.offset + range.len); range.len += adjust; } Diff::Insert(range) => { let adjust = boundary_down(range.doc, range.offset); range.offset -= adjust; range.len += adjust; let adjust = boundary_up(range.doc, range.offset + range.len); range.len += adjust; } } } solution.utf8 = true; } // Reduce the number of edits by eliminating semantically trivial equalities. fn cleanup_semantic(solution: &mut Solution) { let mut diffs = &mut solution.diffs; if diffs.is_empty() { return; } let mut changes = false; let mut equalities = VecDeque::new(); // Double-ended queue of equalities. let mut last_equality = None; // Always equal to equalities.peek().text let mut pointer = 0; // Number of characters that changed prior to the equality. let mut len_insertions1 = 0; let mut len_deletions1 = 0; // Number of characters that changed after the equality. let mut len_insertions2 = 0; let mut len_deletions2 = 0; while let Some(&this_diff) = diffs.get(pointer) { match this_diff { Diff::Equal(text1, text2) => { equalities.push_back(pointer); len_insertions1 = len_insertions2; len_deletions1 = len_deletions2; len_insertions2 = 0; len_deletions2 = 0; last_equality = Some((text1, text2)); pointer += 1; continue; } Diff::Delete(text) => len_deletions2 += text.len, Diff::Insert(text) => len_insertions2 += text.len, } // Eliminate an equality that is smaller or equal to the edits on both // sides of it. if last_equality.map_or(false, |(last_equality, _)| { last_equality.len <= cmp::max(len_insertions1, len_deletions1) && last_equality.len <= cmp::max(len_insertions2, len_deletions2) }) { // Jump back to offending equality. pointer = equalities.pop_back().unwrap(); // Replace equality with a delete. diffs[pointer] = Diff::Delete(last_equality.unwrap().0); // Insert a corresponding insert. diffs.insert(pointer + 1, Diff::Insert(last_equality.unwrap().1)); len_insertions1 = 0; // Reset the counters. len_insertions2 = 0; len_deletions1 = 0; len_deletions2 = 0; last_equality = None; changes = true; // Throw away the previous equality (it needs to be reevaluated). equalities.pop_back(); if let Some(back) = equalities.back() { // There is a safe equality we can fall back to. pointer = *back; } else { // There are no previous equalities, jump back to the start. pointer = 0; continue; } } pointer += 1; } // Normalize the diff. if changes { cleanup_merge(solution); } cleanup_semantic_lossless(solution); diffs = &mut solution.diffs; // Find any overlaps between deletions and insertions. // e.g: abcxxxxxxdef // -> abcxxxdef // e.g: xxxabcdefxxx // -> defxxxabc // Only extract an overlap if it is as big as the edit ahead or behind it. let mut pointer = 1; while let Some(&this_diff) = diffs.get(pointer) { let prev_diff = diffs[pointer - 1]; if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) { let overlap_len1 = common_overlap(deletion, insertion); let overlap_len2 = common_overlap(insertion, deletion); let overlap_min = cmp::min(deletion.len, insertion.len); if overlap_len1 >= overlap_len2 && 2 * overlap_len1 >= overlap_min { // Overlap found. Insert an equality and trim the surrounding edits. diffs.insert( pointer, Diff::Equal( deletion.substring(deletion.len - overlap_len1..deletion.len), insertion.substring(..overlap_len1), ), ); diffs[pointer - 1] = Diff::Delete(deletion.substring(..deletion.len - overlap_len1)); diffs[pointer + 1] = Diff::Insert(insertion.substring(overlap_len1..)); } else if overlap_len1 < overlap_len2 && 2 * overlap_len2 >= overlap_min { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. diffs.insert( pointer, Diff::Equal( deletion.substring(..overlap_len2), insertion.substring(insertion.len - overlap_len2..insertion.len), ), ); diffs[pointer - 1] = Diff::Insert(insertion.substring(..insertion.len - overlap_len2)); diffs[pointer + 1] = Diff::Delete(deletion.substring(overlap_len2..)); } pointer += 1; } pointer += 1; } } // Look for single edits surrounded on both sides by equalities which can be // shifted sideways to align the edit to a word boundary. // // e.g: The cat came. -> The cat came. fn cleanup_semantic_lossless(solution: &mut Solution) { let diffs = &mut solution.diffs; let mut pointer = 1; while let Some(&next_diff) = diffs.get(pointer + 1) { let prev_diff = diffs[pointer - 1]; if let ( Diff::Equal(mut prev_equal1, mut prev_equal2), Diff::Equal(mut next_equal1, mut next_equal2), ) = (prev_diff, next_diff) { // This is a single edit surrounded by equalities. let mut edit = diffs[pointer]; // First, shift the edit as far left as possible. let common_offset = common_suffix(prev_equal1, edit.text()); let original_prev_len = prev_equal1.len; prev_equal1.len -= common_offset; prev_equal2.len -= common_offset; edit.shift_left(common_offset); next_equal1.offset -= common_offset; next_equal1.len += common_offset; next_equal2.offset -= common_offset; next_equal2.len += common_offset; // Second, step character by character right, looking for the best fit. let mut best_prev_equal = (prev_equal1, prev_equal2); let mut best_edit = edit; let mut best_next_equal = (next_equal1, next_equal2); let mut best_score = cleanup_semantic_score(prev_equal1, edit.text()) + cleanup_semantic_score(edit.text(), next_equal1); while !edit.text().is_empty() && !next_equal1.is_empty() && edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap() { let increment = edit.text().chars().next().unwrap().len_utf8(); prev_equal1.len += increment; prev_equal2.len += increment; edit.shift_right(increment); next_equal1.offset += increment; next_equal1.len -= increment; next_equal2.offset += increment; next_equal2.len -= increment; let score = cleanup_semantic_score(prev_equal1, edit.text()) + cleanup_semantic_score(edit.text(), next_equal1); // The >= encourages trailing rather than leading whitespace on edits. if score >= best_score { best_score = score; best_prev_equal = (prev_equal1, prev_equal2); best_edit = edit; best_next_equal = (next_equal1, next_equal2); } } if original_prev_len != best_prev_equal.0.len { // We have an improvement, save it back to the diff. if best_next_equal.0.is_empty() { diffs.remove(pointer + 1); } else { diffs[pointer + 1] = Diff::Equal(best_next_equal.0, best_next_equal.1); } diffs[pointer] = best_edit; if best_prev_equal.0.is_empty() { diffs.remove(pointer - 1); pointer -= 1; } else { diffs[pointer - 1] = Diff::Equal(best_prev_equal.0, best_prev_equal.1); } } } pointer += 1; } } // Given two strings, compute a score representing whether the internal boundary // falls on logical boundaries. // // Scores range from 6 (best) to 0 (worst). fn cleanup_semantic_score(one: Range, two: Range) -> usize { if one.is_empty() || two.is_empty() { // Edges are the best. return 6; } // Each port of this function behaves slightly differently due to subtle // differences in each language's definition of things like 'whitespace'. // Since this function's purpose is largely cosmetic, the choice has been // made to use each language's native features rather than force total // conformity. let char1 = one.chars().next_back().unwrap(); let char2 = two.chars().next().unwrap(); let non_alphanumeric1 = !char1.is_ascii_alphanumeric(); let non_alphanumeric2 = !char2.is_ascii_alphanumeric(); let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace(); let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace(); let line_break1 = whitespace1 && char1.is_control(); let line_break2 = whitespace2 && char2.is_control(); let blank_line1 = line_break1 && (one.ends_with("\n\n") || one.ends_with("\n\r\n")); let blank_line2 = line_break2 && (two.starts_with("\n\n") || two.starts_with("\r\n\r\n")); if blank_line1 || blank_line2 { // Five points for blank lines. 5 } else if line_break1 || line_break2 { // Four points for line breaks. 4 } else if non_alphanumeric1 && !whitespace1 && whitespace2 { // Three points for end of sentences. 3 } else if whitespace1 || whitespace2 { // Two points for whitespace. 2 } else if non_alphanumeric1 || non_alphanumeric2 { // One point for non-alphanumeric. 1 } else { 0 } } // Reorder and merge like edit sections. Merge equalities. Any edit section can // move as long as it doesn't cross an equality. fn cleanup_merge(solution: &mut Solution) { let diffs = &mut solution.diffs; let common_prefix = if solution.utf8 { common_prefix } else { common_prefix_bytes }; let common_suffix = if solution.utf8 { common_suffix } else { common_suffix_bytes }; loop { if diffs.is_empty() { return; } diffs.push(Diff::Equal( solution.text1.substring(solution.text1.len..), solution.text2.substring(solution.text2.len..), )); // Add a dummy entry at the end. let mut pointer = 0; let mut count_delete = 0; let mut count_insert = 0; let mut text_delete = Range::empty(); let mut text_insert = Range::empty(); while let Some(&this_diff) = diffs.get(pointer) { match this_diff { Diff::Insert(text) => { count_insert += 1; if text_insert.is_empty() { text_insert = text; } else { text_insert.len += text.len; } } Diff::Delete(text) => { count_delete += 1; if text_delete.is_empty() { text_delete = text; } else { text_delete.len += text.len; } } Diff::Equal(text, _) => { let count_both = count_delete + count_insert; if count_both > 1 { let both_types = count_delete != 0 && count_insert != 0; // Delete the offending records. diffs.splice(pointer - count_both..pointer, None); pointer -= count_both; if both_types { // Factor out any common prefix. let common_length = common_prefix(text_insert, text_delete); if common_length != 0 { if pointer > 0 { match &mut diffs[pointer - 1] { Diff::Equal(this_diff1, this_diff2) => { this_diff1.len += common_length; this_diff2.len += common_length; } _ => unreachable!( "previous diff should have been an equality" ), } } else { diffs.insert( pointer, Diff::Equal( text_delete.substring(..common_length), text_insert.substring(..common_length), ), ); pointer += 1; } text_insert = text_insert.substring(common_length..); text_delete = text_delete.substring(common_length..); } // Factor out any common suffix. let common_length = common_suffix(text_insert, text_delete); if common_length != 0 { diffs[pointer].grow_left(common_length); text_insert.len -= common_length; text_delete.len -= common_length; } } // Insert the merged records. if !text_delete.is_empty() { diffs.insert(pointer, Diff::Delete(text_delete)); pointer += 1; } if !text_insert.is_empty() { diffs.insert(pointer, Diff::Insert(text_insert)); pointer += 1; } } else if pointer > 0 { if let Some(Diff::Equal(prev_equal1, prev_equal2)) = diffs.get_mut(pointer - 1) { // Merge this equality with the previous one. prev_equal1.len += text.len; prev_equal2.len += text.len; diffs.remove(pointer); pointer -= 1; } } count_insert = 0; count_delete = 0; text_delete = Range::empty(); text_insert = Range::empty(); } } pointer += 1; } if diffs.last().unwrap().text().is_empty() { diffs.pop(); // Remove the dummy entry at the end. } // Second pass: look for single edits surrounded on both sides by equalities // which can be shifted sideways to eliminate an equality. // e.g: ABAC -> ABAC let mut changes = false; let mut pointer = 1; // Intentionally ignore the first and last element (don't need checking). while let Some(&next_diff) = diffs.get(pointer + 1) { let prev_diff = diffs[pointer - 1]; let this_diff = diffs[pointer]; if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) { // This is a single edit surrounded by equalities. if this_diff.text().ends_with(prev_diff) { // Shift the edit over the previous equality. diffs[pointer].shift_left(prev_diff.len); diffs[pointer + 1].grow_left(prev_diff.len); diffs.remove(pointer - 1); // Delete prev_diff. changes = true; } else if this_diff.text().starts_with(next_diff) { // Shift the edit over the next equality. diffs[pointer - 1].grow_right(next_diff.len); diffs[pointer].shift_right(next_diff.len); diffs.remove(pointer + 1); // Delete next_diff. changes = true; } } pointer += 1; } // If shifts were made, the diff needs reordering and another shift sweep. if !changes { return; } } } impl Debug for Chunk<'_> { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let (name, text) = match *self { Chunk::Equal(text) => ("Equal", text), Chunk::Delete(text) => ("Delete", text), Chunk::Insert(text) => ("Insert", text), }; write!(formatter, "{}({:?})", name, text) } } impl Debug for Diff<'_, '_> { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let (name, bytes) = match *self { Diff::Equal(range, _) => ("Equal", bytes(range)), Diff::Delete(range) => ("Delete", bytes(range)), Diff::Insert(range) => ("Insert", bytes(range)), }; let text = String::from_utf8_lossy(bytes); write!(formatter, "{}({:?})", name, text) } } impl<'a> From> for Chunk<'a> { fn from(diff: Diff<'a, 'a>) -> Self { match diff { Diff::Equal(range, _) => Chunk::Equal(str(range)), Diff::Delete(range) => Chunk::Delete(str(range)), Diff::Insert(range) => Chunk::Insert(str(range)), } } } dissimilar-1.0.2/src/range.rs010064400017500001750000000071231363422642000143310ustar0000000000000000use crate::find::find; use std::fmt::Debug; use std::ops::{self, RangeFrom, RangeFull, RangeTo}; use std::str::{CharIndices, Chars}; #[derive(Copy, Clone)] pub struct Range<'a> { pub doc: &'a str, pub offset: usize, pub len: usize, } impl<'a> Range<'a> { pub fn empty() -> Self { Range { doc: "", offset: 0, len: 0, } } pub fn new(doc: &'a str, bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(doc.len()); Range { doc, offset, len } } pub fn is_empty(&self) -> bool { self.len == 0 } pub fn substring(&self, bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(self.len); Range { doc: self.doc, offset: self.offset + offset, len, } } pub fn get(&self, bounds: impl RangeBounds) -> Option { let (offset, len) = bounds.try_index(self.len)?; Some(Range { doc: self.doc, offset: self.offset + offset, len, }) } pub fn split_at(&self, mid: usize) -> (Self, Self) { (self.substring(..mid), self.substring(mid..)) } pub fn chars(&self) -> Chars<'a> { str(*self).chars() } pub fn char_indices(&self) -> CharIndices<'a> { str(*self).char_indices() } pub fn bytes(&self) -> impl Iterator + DoubleEndedIterator + ExactSizeIterator + 'a { bytes(*self).iter().cloned() } pub fn starts_with(&self, prefix: impl AsRef<[u8]>) -> bool { bytes(*self).starts_with(prefix.as_ref()) } pub fn ends_with(&self, suffix: impl AsRef<[u8]>) -> bool { bytes(*self).ends_with(suffix.as_ref()) } pub fn find(&self, needle: impl AsRef<[u8]>) -> Option { find(bytes(*self), needle.as_ref()) } } pub fn str(range: Range) -> &str { if cfg!(debug) && range .doc .get(range.offset..range.offset + range.len) .is_none() { eprintln!( "doc={:?} offset={} len={}", range.doc, range.offset, range.len ); } &range.doc[range.offset..range.offset + range.len] } pub fn bytes(range: Range) -> &[u8] { &range.doc.as_bytes()[range.offset..range.offset + range.len] } impl AsRef<[u8]> for Range<'_> { fn as_ref(&self) -> &[u8] { bytes(*self) } } pub trait RangeBounds: Sized + Clone + Debug { // Returns (offset, len). fn try_index(self, len: usize) -> Option<(usize, usize)>; fn index(self, len: usize) -> (usize, usize) { match self.clone().try_index(len) { Some(range) => range, None => panic!("index out of range, index={:?}, len={}", self, len), } } } impl RangeBounds for ops::Range { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.start <= self.end && self.end <= len { Some((self.start, self.end - self.start)) } else { None } } } impl RangeBounds for RangeFrom { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.start <= len { Some((self.start, len - self.start)) } else { None } } } impl RangeBounds for RangeTo { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.end <= len { Some((0, self.end)) } else { None } } } impl RangeBounds for RangeFull { fn try_index(self, len: usize) -> Option<(usize, usize)> { Some((0, len)) } } dissimilar-1.0.2/src/tests.rs010064400017500001750000000404171363422642000144020ustar0000000000000000use super::*; macro_rules! diff_list { () => { Solution { text1: Range::empty(), text2: Range::empty(), diffs: Vec::new(), utf8: true, } }; ($($kind:ident($text:literal)),+ $(,)?) => {{ macro_rules! text1 { (Insert, $s:literal) => { "" }; (Delete, $s:literal) => { $s }; (Equal, $s:literal) => { $s }; } macro_rules! text2 { (Insert, $s:literal) => { $s }; (Delete, $s:literal) => { "" }; (Equal, $s:literal) => { $s }; } let text1 = concat!($(text1!($kind, $text)),*); let text2 = concat!($(text2!($kind, $text)),*); let (_i, _j) = (&mut 0, &mut 0); macro_rules! range { (Insert, $s:literal) => { Diff::Insert(range(text2, _j, $s)) }; (Delete, $s:literal) => { Diff::Delete(range(text1, _i, $s)) }; (Equal, $s:literal) => { Diff::Equal(range(text1, _i, $s), range(text2, _j, $s)) }; } Solution { text1: Range::new(text1, ..), text2: Range::new(text2, ..), diffs: vec![$(range!($kind, $text)),*], utf8: true, } }}; } fn range<'a>(doc: &'a str, offset: &mut usize, text: &str) -> Range<'a> { let range = Range { doc, offset: *offset, len: text.len(), }; *offset += text.len(); range } macro_rules! assert_diffs { ([$($kind:ident($text:literal)),* $(,)?], $solution:ident, $msg:expr $(,)?) => { let expected = &[$(Chunk::$kind($text)),*]; assert!( same_diffs(expected, &$solution.diffs), concat!($msg, "\nexpected={:#?}\nactual={:#?}"), expected, $solution.diffs, ); }; } fn same_diffs(expected: &[Chunk], actual: &[Diff]) -> bool { expected.len() == actual.len() && expected.iter().zip(actual).all(|pair| match pair { (Chunk::Insert(expected), Diff::Insert(actual)) => *expected == str(*actual), (Chunk::Delete(expected), Diff::Delete(actual)) => *expected == str(*actual), (Chunk::Equal(expected), Diff::Equal(actual1, actual2)) => { *expected == str(*actual1) && *expected == str(*actual2) } (_, _) => false, }) } #[test] fn test_common_prefix() { let text1 = Range::new("abc", ..); let text2 = Range::new("xyz", ..); assert_eq!(0, common_prefix_bytes(text1, text2), "Null case"); let text1 = Range::new("1234abcdef", ..); let text2 = Range::new("1234xyz", ..); assert_eq!(4, common_prefix_bytes(text1, text2), "Non-null case"); let text1 = Range::new("1234", ..); let text2 = Range::new("1234xyz", ..); assert_eq!(4, common_prefix_bytes(text1, text2), "Whole case"); } #[test] fn test_common_suffix() { let text1 = Range::new("abc", ..); let text2 = Range::new("xyz", ..); assert_eq!(0, common_suffix(text1, text2), "Null case"); assert_eq!(0, common_suffix_bytes(text1, text2), "Null case"); let text1 = Range::new("abcdef1234", ..); let text2 = Range::new("xyz1234", ..); assert_eq!(4, common_suffix(text1, text2), "Non-null case"); assert_eq!(4, common_suffix_bytes(text1, text2), "Non-null case"); let text1 = Range::new("1234", ..); let text2 = Range::new("xyz1234", ..); assert_eq!(4, common_suffix(text1, text2), "Whole case"); assert_eq!(4, common_suffix_bytes(text1, text2), "Whole case"); } #[test] fn test_common_overlap() { let text1 = Range::empty(); let text2 = Range::new("abcd", ..); assert_eq!(0, common_overlap(text1, text2), "Null case"); let text1 = Range::new("abc", ..); let text2 = Range::new("abcd", ..); assert_eq!(3, common_overlap(text1, text2), "Whole case"); let text1 = Range::new("123456", ..); let text2 = Range::new("abcd", ..); assert_eq!(0, common_overlap(text1, text2), "No overlap"); let text1 = Range::new("123456xxx", ..); let text2 = Range::new("xxxabcd", ..); assert_eq!(3, common_overlap(text1, text2), "Overlap"); // Some overly clever languages (C#) may treat ligatures as equal to their // component letters. E.g. U+FB01 == 'fi' let text1 = Range::new("fi", ..); let text2 = Range::new("\u{fb01}i", ..); assert_eq!(0, common_overlap(text1, text2), "Unicode"); } #[test] fn test_cleanup_merge() { let mut solution = diff_list![]; cleanup_merge(&mut solution); assert_diffs!([], solution, "Null case"); let mut solution = diff_list![Equal("a"), Delete("b"), Insert("c")]; cleanup_merge(&mut solution); assert_diffs!( [Equal("a"), Delete("b"), Insert("c")], solution, "No change case", ); let mut solution = diff_list![Equal("a"), Equal("b"), Equal("c")]; cleanup_merge(&mut solution); assert_diffs!([Equal("abc")], solution, "Merge equalities"); let mut solution = diff_list![Delete("a"), Delete("b"), Delete("c")]; cleanup_merge(&mut solution); assert_diffs!([Delete("abc")], solution, "Merge deletions"); let mut solution = diff_list![Insert("a"), Insert("b"), Insert("c")]; cleanup_merge(&mut solution); assert_diffs!([Insert("abc")], solution, "Merge insertions"); let mut solution = diff_list![ Delete("a"), Insert("b"), Delete("c"), Insert("d"), Equal("e"), Equal("f"), ]; cleanup_merge(&mut solution); assert_diffs!( [Delete("ac"), Insert("bd"), Equal("ef")], solution, "Merge interweave", ); let mut solution = diff_list![Delete("a"), Insert("abc"), Delete("dc")]; cleanup_merge(&mut solution); assert_diffs!( [Equal("a"), Delete("d"), Insert("b"), Equal("c")], solution, "Prefix and suffix detection", ); let mut solution = diff_list![ Equal("x"), Delete("a"), Insert("abc"), Delete("dc"), Equal("y"), ]; cleanup_merge(&mut solution); assert_diffs!( [Equal("xa"), Delete("d"), Insert("b"), Equal("cy")], solution, "Prefix and suffix detection with equalities", ); let mut solution = diff_list![Equal("a"), Insert("ba"), Equal("c")]; cleanup_merge(&mut solution); assert_diffs!([Insert("ab"), Equal("ac")], solution, "Slide edit left"); let mut solution = diff_list![Equal("c"), Insert("ab"), Equal("a")]; cleanup_merge(&mut solution); assert_diffs!([Equal("ca"), Insert("ba")], solution, "Slide edit right"); let mut solution = diff_list![ Equal("a"), Delete("b"), Equal("c"), Delete("ac"), Equal("x"), ]; cleanup_merge(&mut solution); assert_diffs!( [Delete("abc"), Equal("acx")], solution, "Slide edit left recursive", ); let mut solution = diff_list![ Equal("x"), Delete("ca"), Equal("c"), Delete("b"), Equal("a"), ]; cleanup_merge(&mut solution); assert_diffs!( [Equal("xca"), Delete("cba")], solution, "Slide edit right recursive", ); let mut solution = diff_list![Delete("b"), Insert("ab"), Equal("c")]; cleanup_merge(&mut solution); assert_diffs!([Insert("a"), Equal("bc")], solution, "Empty range"); let mut solution = diff_list![Equal(""), Insert("a"), Equal("b")]; cleanup_merge(&mut solution); assert_diffs!([Insert("a"), Equal("b")], solution, "Empty equality"); } #[test] fn test_cleanup_semantic_lossless() { let mut solution = diff_list![]; cleanup_semantic_lossless(&mut solution); assert_diffs!([], solution, "Null case"); let mut solution = diff_list![ Equal("AAA\r\n\r\nBBB"), Insert("\r\nDDD\r\n\r\nBBB"), Equal("\r\nEEE"), ]; cleanup_semantic_lossless(&mut solution); assert_diffs!( [ Equal("AAA\r\n\r\n"), Insert("BBB\r\nDDD\r\n\r\n"), Equal("BBB\r\nEEE"), ], solution, "Blank lines", ); let mut solution = diff_list![Equal("AAA\r\nBBB"), Insert(" DDD\r\nBBB"), Equal(" EEE")]; cleanup_semantic_lossless(&mut solution); assert_diffs!( [Equal("AAA\r\n"), Insert("BBB DDD\r\n"), Equal("BBB EEE")], solution, "Line boundaries", ); let mut solution = diff_list![Equal("The c"), Insert("ow and the c"), Equal("at.")]; cleanup_semantic_lossless(&mut solution); assert_diffs!( [Equal("The "), Insert("cow and the "), Equal("cat.")], solution, "Word boundaries", ); let mut solution = diff_list![Equal("The-c"), Insert("ow-and-the-c"), Equal("at.")]; cleanup_semantic_lossless(&mut solution); assert_diffs!( [Equal("The-"), Insert("cow-and-the-"), Equal("cat.")], solution, "Alphanumeric boundaries", ); let mut solution = diff_list![Equal("a"), Delete("a"), Equal("ax")]; cleanup_semantic_lossless(&mut solution); assert_diffs!([Delete("a"), Equal("aax")], solution, "Hitting the start"); let mut solution = diff_list![Equal("xa"), Delete("a"), Equal("a")]; cleanup_semantic_lossless(&mut solution); assert_diffs!([Equal("xaa"), Delete("a")], solution, "Hitting the end"); let mut solution = diff_list![Equal("The xxx. The "), Insert("zzz. The "), Equal("yyy.")]; cleanup_semantic_lossless(&mut solution); assert_diffs!( [Equal("The xxx."), Insert(" The zzz."), Equal(" The yyy.")], solution, "Sentence boundaries", ); } #[test] fn test_cleanup_semantic() { let mut solution = diff_list![]; cleanup_semantic(&mut solution); assert_diffs!([], solution, "Null case"); let mut solution = diff_list![Delete("ab"), Insert("cd"), Equal("12"), Delete("e")]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("ab"), Insert("cd"), Equal("12"), Delete("e")], solution, "No elimination #1", ); let mut solution = diff_list![Delete("abc"), Insert("ABC"), Equal("1234"), Delete("wxyz")]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("abc"), Insert("ABC"), Equal("1234"), Delete("wxyz")], solution, "No elimination #2", ); let mut solution = diff_list![Delete("a"), Equal("b"), Delete("c")]; cleanup_semantic(&mut solution); assert_diffs!([Delete("abc"), Insert("b")], solution, "Simple elimination",); let mut solution = diff_list![ Delete("ab"), Equal("cd"), Delete("e"), Equal("f"), Insert("g"), ]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("abcdef"), Insert("cdfg")], solution, "Backpass elimination", ); let mut solution = diff_list![ Insert("1"), Equal("A"), Delete("B"), Insert("2"), Equal("_"), Insert("1"), Equal("A"), Delete("B"), Insert("2"), ]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("AB_AB"), Insert("1A2_1A2")], solution, "Multiple elimination", ); let mut solution = diff_list![Equal("The c"), Delete("ow and the c"), Equal("at.")]; cleanup_semantic(&mut solution); assert_diffs!( [Equal("The "), Delete("cow and the "), Equal("cat.")], solution, "Word boundaries", ); let mut solution = diff_list![Delete("abcxx"), Insert("xxdef")]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("abcxx"), Insert("xxdef")], solution, "No overlap elimination", ); let mut solution = diff_list![Delete("abcxxx"), Insert("xxxdef")]; cleanup_semantic(&mut solution); assert_diffs!( [Delete("abc"), Equal("xxx"), Insert("def")], solution, "Overlap elimination", ); let mut solution = diff_list![Delete("xxxabc"), Insert("defxxx")]; cleanup_semantic(&mut solution); assert_diffs!( [Insert("def"), Equal("xxx"), Delete("abc")], solution, "Reverse overlap elimination", ); let mut solution = diff_list![ Delete("abcd1212"), Insert("1212efghi"), Equal("----"), Delete("A3"), Insert("3BC"), ]; cleanup_semantic(&mut solution); assert_diffs!( [ Delete("abcd"), Equal("1212"), Insert("efghi"), Equal("----"), Delete("A"), Equal("3"), Insert("BC"), ], solution, "Two overlap eliminations", ); } #[test] fn test_bisect() { let text1 = Range::new("cat", ..); let text2 = Range::new("map", ..); let solution = Solution { text1, text2, diffs: bisect(text1, text2), utf8: false, }; assert_diffs!( [ Delete("c"), Insert("m"), Equal("a"), Delete("t"), Insert("p"), ], solution, "Normal", ); } #[test] fn test_main() { let solution = main(Range::empty(), Range::empty()); assert_diffs!([], solution, "Null case"); let solution = main(Range::new("abc", ..), Range::new("abc", ..)); assert_diffs!([Equal("abc")], solution, "Equality"); let solution = main(Range::new("abc", ..), Range::new("ab123c", ..)); assert_diffs!( [Equal("ab"), Insert("123"), Equal("c")], solution, "Simple insertion", ); let solution = main(Range::new("a123bc", ..), Range::new("abc", ..)); assert_diffs!( [Equal("a"), Delete("123"), Equal("bc")], solution, "Simple deletion", ); let solution = main(Range::new("abc", ..), Range::new("a123b456c", ..)); assert_diffs!( [ Equal("a"), Insert("123"), Equal("b"), Insert("456"), Equal("c"), ], solution, "Two insertions", ); let solution = main(Range::new("a123b456c", ..), Range::new("abc", ..)); assert_diffs!( [ Equal("a"), Delete("123"), Equal("b"), Delete("456"), Equal("c"), ], solution, "Two deletions", ); let solution = main(Range::new("a", ..), Range::new("b", ..)); assert_diffs!([Delete("a"), Insert("b")], solution, "Simple case #1"); let solution = main( Range::new("Apples are a fruit.", ..), Range::new("Bananas are also fruit.", ..), ); assert_diffs!( [ Delete("Apple"), Insert("Banana"), Equal("s are a"), Insert("lso"), Equal(" fruit."), ], solution, "Simple case #2", ); let solution = main(Range::new("ax\t", ..), Range::new("\u{0680}x\000", ..)); assert_diffs!( [ Delete("a"), Insert("\u{0680}"), Equal("x"), Delete("\t"), Insert("\000"), ], solution, "Simple case #3", ); let solution = main(Range::new("1ayb2", ..), Range::new("abxab", ..)); assert_diffs!( [ Delete("1"), Equal("a"), Delete("y"), Equal("b"), Delete("2"), Insert("xab"), ], solution, "Overlap #1", ); let solution = main(Range::new("abcy", ..), Range::new("xaxcxabc", ..)); assert_diffs!( [Insert("xaxcx"), Equal("abc"), Delete("y")], solution, "Overlap #2", ); let solution = main( Range::new("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", ..), Range::new("a-bcd-efghijklmnopqrs", ..), ); assert_diffs!( [ Delete("ABCD"), Equal("a"), Delete("="), Insert("-"), Equal("bcd"), Delete("="), Insert("-"), Equal("efghijklmnopqrs"), Delete("EFGHIJKLMNOefg"), ], solution, "Overlap #3", ); let solution = main( Range::new("a [[Pennsylvania]] and [[New", ..), Range::new(" and [[Pennsylvania]]", ..), ); assert_diffs!( [ Insert(" "), Equal("a"), Insert("nd"), Equal(" [[Pennsylvania]]"), Delete(" and [[New"), ], solution, "Large equality", ); } dissimilar-1.0.2/tests/test.rs010064400017500001750000000012701363422642000145640ustar0000000000000000// Upstream diff-match-patch's test suite is imported as unit tests in // src/tests.rs, as they test APIs which are private in the Rust implementation. // // This directory is for Rust-specific integration tests and regression tests. use dissimilar::{diff, Chunk}; #[test] fn test_unicode() { // Unicode snowman and unicode comet have the same first two bytes. A // byte-based diff would produce a 2-byte Equal followed by 1-byte Delete // and Insert. let snowman = "\u{2603}"; let comet = "\u{2604}"; assert_eq!(snowman.as_bytes()[..2], comet.as_bytes()[..2]); let d = diff(snowman, comet); assert_eq!(d, vec![Chunk::Delete(snowman), Chunk::Insert(comet)]); }