diffr-0.1.5/.cargo_vcs_info.json0000644000000001360000000000100121310ustar { "git": { "sha1": "9ccebfc99a758bd0672536f9ccae74933717b814" }, "path_in_vcs": "" }diffr-0.1.5/.gitattributes000064400000000000000000000000541046102023000136130ustar 00000000000000*.rs text *.toml text *.lock text *.md text diffr-0.1.5/.gitignore000064400000000000000000000000361046102023000127100ustar 00000000000000**/target/** **/*.rs.bk *.cmd diffr-0.1.5/CHANGELOG.md000064400000000000000000000016241046102023000125350ustar 00000000000000## 0.1.3 (2020/03/19) - diffr: add --line-numbers flag to display the line numbers (Github #44, Athir Saleem). - diffr: --colors: allow to display italic faces (Github #45). ## 0.1.3 (2019/12/07) - diffr-lib: optimize_partition: new function. Postprocessing of the results of the LCS algorithm to reduce the number of segments of consecutive shared tokens. ## 0.1.2 (2019/09/07) - Split in two crates: diffr-lib contains reusable parts, while diffr only contains application logic. - Fix a bug in display code that messed up the colors in diffs with lines starting with dashes. - Configuration: default to use 16 colors everywhere (Github #16). ## 0.1.1 (2019/07/15) - Add --colors flag to customize faces propertized by diffr (Github #3). This changes the default colors used on linux and macOS. The default still works on windows. ## 0.1.0 (2019/07/01) Initial release. - Initial release. diffr-0.1.5/Cargo.lock0000644000000051400000000000100101040ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "atty" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" dependencies = [ "libc", "termion", "winapi", ] [[package]] name = "diffr" version = "0.1.5" dependencies = [ "atty", "termcolor", ] [[package]] name = "libc" version = "0.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6281b86796ba5e4366000be6e9e18bf35580adf9e63fbe2294aadb587613a319" [[package]] name = "numtoa" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" [[package]] name = "redox_syscall" version = "0.1.54" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12229c14a0f65c4f1cb046a3b52047cdd9da1f4b30f8a39c5063c8bae515e252" [[package]] name = "redox_termios" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" dependencies = [ "redox_syscall", ] [[package]] name = "termcolor" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" dependencies = [ "winapi-util", ] [[package]] name = "termion" version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8fb22f7cde82c8220e5aeacb3258ed7ce996142c77cba193f203515e26c330" dependencies = [ "libc", "numtoa", "redox_syscall", "redox_termios", ] [[package]] name = "winapi" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diffr-0.1.5/Cargo.toml0000644000000020200000000000100101210ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "diffr" version = "0.1.5" authors = ["Nathan Moreau "] description = """ An LCS based diff highlighting tool to ease code review from your terminal. """ homepage = "https://github.com/mookid/diffr" readme = "README.md" keywords = [ "diff", "code-review", "git", "console", "cli", ] categories = ["command-line-utilities"] license = "MIT" repository = "https://github.com/mookid/diffr" [profile.release] debug = true [dependencies.atty] version = "0.2" [dependencies.termcolor] version = "1.1" diffr-0.1.5/Cargo.toml.orig000064400000000000000000000010061046102023000136050ustar 00000000000000[package] name = "diffr" version = "0.1.5" authors = ["Nathan Moreau "] description = """ An LCS based diff highlighting tool to ease code review from your terminal. """ categories = ["command-line-utilities"] edition = "2018" homepage = "https://github.com/mookid/diffr" repository = "https://github.com/mookid/diffr" keywords = ["diff", "code-review", "git", "console", "cli"] license = "MIT" readme = "README.md" [profile.release] debug = true [dependencies] termcolor = "1.1" atty = "0.2" diffr-0.1.5/LICENSE.txt000064400000000000000000000020641046102023000125460ustar 00000000000000The MIT License (MIT) Copyright 2019 Nathan Moreau Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diffr-0.1.5/README.md000064400000000000000000000057101046102023000122030ustar 00000000000000## diffr Reviewing changes involves reading diffs. Sometimes, a line-oriented presentation of changes is not precise enough, especially when changes involve long lines or very similar consecutive lines. This program processes such diffs, and outputs them (in the console) with additional diff information on top of the unified diff format, using text attributes. It works hunk by hunk, recomputing the diff on a word-by-word basis. The current implementation uses [Myers' longest common subsequence](http://www.xmailserver.org/diff2.pdf) algorithm. [![crates.io](https://img.shields.io/crates/v/diffr.svg)](https://crates.io/crates/diffr) [![crates.io](https://img.shields.io/crates/d/diffr.svg)](https://crates.io/crates/diffr) [![Build Status](https://dev.azure.com/nathanmoreau/diffr/_apis/build/status/mookid.diffr?branchName=master)](https://dev.azure.com/nathanmoreau/diffr/_build/latest?definitionId=4&branchName=master) ![Demo](screenshots/example_simple_mac.png) ![Demo](screenshots/example_nonconsecutive.png) ![Demo](screenshots/example_cross_lines_common_tokens.png) ### Installation #### Arch Linux Install from the [AUR](https://aur.archlinux.org/packages/diffr/): ``` git clone https://aur.archlinux.org/diffr.git cd diffr makepkg -si ``` #### Homebrew ``` brew install diffr ``` #### From source You will need the [Rust compiler installed](https://www.rust-lang.org/tools/install). To install the latest published version: ``` cargo install diffr ``` Alternatively, you can build the development version: ``` git clone https://github.com/mookid/diffr.git cd diffr cargo install --path . ``` ### How to use it? diffr tries to be a well behaved Unix program: it reads its input from stdin and writes to stdout. #### One-off usage ``` git show HEAD | diffr ``` #### Integration with git Add the following section to your `.gitconfig` file: ``` [core] pager = diffr | less -R [interactive] diffFilter = diffr ``` Alternatively, you can run from the command line: ``` git config --global core.pager 'diffr | less -R' git config --global interactive.difffilter diffr ``` #### Color customization Use the --colors flag. You can customize the display of diffing and common segments of added and removed lines. For example, ``` diffr --colors refine-removed:background:200,0,0:foreground:white:bold ``` tweaks the red used for uniquely removed text; The configuration used in the first screenshot is ``` diffr --colors refine-added:none:background:0x33,0x99,0x33:bold --colors added:none:background:0x33,0x55,0x33 --colors refine-removed:none:background:0x99,0x33,0x33:bold --colors removed:none:background:0x55,0x33,0x33 ``` #### Display line numbers The ` --line-numbers` displays the line numbers of the hunk. ### Related projects This is improvement on the [diff-highlight](https://github.com/git/git/tree/master/contrib/diff-highlight) script distributed with git. git itself provides both `--word-diff` and `--color-words` options to several commands. diffr-0.1.5/assets/h.txt000064400000000000000000000011361046102023000132140ustar 00000000000000diffr $VERSION Nathan Moreau diffr adds word-level diff on top of unified diffs. word-level diff information is displayed using text attributes. USAGE: diffr reads from standard input and writes to standard output. Typical usage is for interactive use of diff: diff -u | diffr git show | diffr OPTIONS: --colors ... Configure color settings. --line-numbers Display line numbers. -h, --help Prints help information -V, --version Prints version information diffr-0.1.5/assets/help.txt000064400000000000000000000047571046102023000137310ustar 00000000000000diffr $VERSION Nathan Moreau diffr adds word-level diff on top of unified diffs. word-level diff information is displayed using text attributes. USAGE: diffr reads from standard input and writes to standard output. Typical usage is for interactive use of diff: diff -u | diffr git show | diffr OPTIONS: --colors ... Configure color settings for console ouput. There are four faces to customize: +----------------+--------------+----------------+ | line prefix | + | - | +----------------+--------------+----------------+ | common segment | added | removed | | unique segment | refine-added | refine-removed | +----------------+--------------+----------------+ The customization allows - to change the foreground or background color; - to set or unset the attributes 'bold', 'intense', 'underline'; - to clear all attributes. Customization is done passing a color_spec argument. This flag may be provided multiple times. The syntax is the following: color_spec = face-name + ':' + attributes attributes = attribute | attribute + ':' + attributes attribute = ('foreground' | 'background') + ':' + color | ( | 'no') + font-flag | 'none' font-flag = 'italic' | 'bold' | 'intense' | 'underline' color = 'none' | [0-255] | [0-255] + ',' + [0-255] + ',' + [0-255] | ('black', 'blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'white') For example, the color_spec 'refine-added:background:blue:bold' sets the color of unique added segments with a blue background, written with a bold font. --line-numbers Display line numbers. Style is optional. When style = 'compact', take as little width as possible. When style = 'aligned', align to tab stops (useful if tab is used for indentation). [default: compact] -h, --help Prints help information -V, --version Prints version information diffr-0.1.5/assets/usage.txt000064400000000000000000000002661046102023000140740ustar 00000000000000USAGE: diffr reads from standard input and writes to standard output. Typical usage is for interactive use of diff: diff -u | diffr git show | diffr diffr-0.1.5/src/cli_args.rs000064400000000000000000000234711046102023000136500ustar 00000000000000use super::AppConfig; use super::LineNumberStyle; use std::fmt::Display; use std::fmt::Error as FmtErr; use std::fmt::Formatter; use std::io::Write; use std::iter::Peekable; use std::process; use std::str::FromStr; use termcolor::Color; use termcolor::ColorSpec; use termcolor::ParseColorError; const FLAG_DEBUG: &str = "--debug"; const FLAG_HTML: &str = "--html"; const FLAG_COLOR: &str = "--colors"; const FLAG_LINE_NUMBERS: &str = "--line-numbers"; const BIN_NAME: &str = env!("CARGO_PKG_NAME"); const VERSION: &str = env!("CARGO_PKG_VERSION"); const USAGE: &str = include_str!("../assets/usage.txt"); const HELP_SHORT: &str = include_str!("../assets/h.txt"); const HELP_LONG: &str = include_str!("../assets/help.txt"); fn show_version() -> ! { eprintln!("{} {}", BIN_NAME, VERSION); process::exit(0); } #[derive(Debug, Clone, Copy)] enum FaceName { Added, RefineAdded, Removed, RefineRemoved, } fn missing_arg(arg: impl std::fmt::Display) -> ! { eprintln!("option requires an argument: '{}'", arg); process::exit(2); } fn interpolate(s: &str) -> String { s.replace("$VERSION", VERSION) } fn usage(code: i32) -> ! { let txt = interpolate(USAGE); let _ = std::io::stderr().write(txt.as_bytes()); process::exit(code); } fn help(long: bool) -> ! { let txt = if long { HELP_LONG } else { HELP_SHORT }; let txt = interpolate(txt); let _ = std::io::stdout().write(txt.as_bytes()); process::exit(0); } impl EnumString for FaceName { fn data() -> &'static [(&'static str, Self)] { use FaceName::*; &[ ("added", Added), ("refine-added", RefineAdded), ("removed", Removed), ("refine-removed", RefineRemoved), ] } } impl Display for FaceName { fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> { use FaceName::*; match self { Added => write!(f, "added"), RefineAdded => write!(f, "refine-added"), Removed => write!(f, "removed"), RefineRemoved => write!(f, "refine-removed"), } } } impl FaceName { fn get_face_mut<'a, 'b>(&'a self, config: &'b mut super::AppConfig) -> &'b mut ColorSpec { use FaceName::*; match self { Added => &mut config.added_face, RefineAdded => &mut config.refine_added_face, Removed => &mut config.removed_face, RefineRemoved => &mut config.refine_removed_face, } } } // custom parsing of Option struct ColorOpt(Option); impl FromStr for ColorOpt { type Err = ArgParsingError; fn from_str(input: &str) -> Result { if input == "none" { Ok(ColorOpt(None)) } else { match input.parse() { Ok(color) => Ok(ColorOpt(Some(color))), Err(err) => Err(ArgParsingError::Color(err)), } } } } trait EnumString: Copy { fn data() -> &'static [(&'static str, Self)]; } fn tryparse(input: &str) -> Result where T: EnumString + 'static, { T::data() .iter() .find(|p| p.0 == input) .map(|&p| p.1) .ok_or_else(|| { format!( "got '{}', expected {}", input, T::data().iter().map(|p| p.0).collect::>().join("|") ) }) } #[derive(Debug, Clone, Copy)] struct LineNumberStyleOpt(LineNumberStyle); impl EnumString for LineNumberStyleOpt { fn data() -> &'static [(&'static str, Self)] { use LineNumberStyle::*; &[ ("aligned", LineNumberStyleOpt(Aligned)), ("compact", LineNumberStyleOpt(Compact)), ] } } #[derive(Debug, Clone, Copy)] enum FaceColor { Foreground, Background, } #[derive(Debug, Clone, Copy)] enum AttributeName { Color(FaceColor), Italic(bool), Bold(bool), Intense(bool), Underline(bool), Reset, } impl EnumString for AttributeName { fn data() -> &'static [(&'static str, Self)] { use AttributeName::*; &[ ("foreground", Color(FaceColor::Foreground)), ("background", Color(FaceColor::Background)), ("italic", Italic(true)), ("noitalic", Italic(false)), ("bold", Bold(true)), ("nobold", Bold(false)), ("intense", Intense(true)), ("nointense", Intense(false)), ("underline", Underline(true)), ("nounderline", Underline(false)), ("none", Reset), ] } } #[derive(Debug)] enum ArgParsingError { FaceName(String), AttributeName(String), Color(ParseColorError), MissingValue(FaceName), LineNumberStyle(String), } impl Display for ArgParsingError { fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> { match self { ArgParsingError::FaceName(err) => write!(f, "unexpected face name: {}", err), ArgParsingError::AttributeName(err) => write!(f, "unexpected attribute name: {}", err), ArgParsingError::Color(err) => write!(f, "unexpected color value: {}", err), ArgParsingError::MissingValue(face_name) => write!( f, "error parsing color: missing color value for face '{}'", face_name ), ArgParsingError::LineNumberStyle(err) => { write!(f, "unexpected line number style: {}", err) } } } } impl FromStr for FaceName { type Err = ArgParsingError; fn from_str(input: &str) -> Result { tryparse(input).map_err(ArgParsingError::FaceName) } } impl FromStr for AttributeName { type Err = ArgParsingError; fn from_str(input: &str) -> Result { tryparse(input).map_err(ArgParsingError::AttributeName) } } impl FromStr for LineNumberStyleOpt { type Err = ArgParsingError; fn from_str(input: &str) -> Result { tryparse(input).map_err(ArgParsingError::LineNumberStyle) } } fn ignore(_: T) {} fn parse_line_number_style<'a>( config: &mut AppConfig, value: Option<&'a str>, ) -> Result<(), ArgParsingError> { let style = if let Some(style) = value { style.parse::()?.0 } else { LineNumberStyle::Compact }; config.line_numbers_style = Some(style); Ok(()) } fn parse_color_attributes<'a, Values>( config: &mut AppConfig, mut values: Values, face_name: FaceName, ) -> Result<(), ArgParsingError> where Values: Iterator, { use AttributeName::*; let face = face_name.get_face_mut(config); while let Some(value) = values.next() { let attribute_name = value.parse::()?; match attribute_name { Color(kind) => { if let Some(value) = values.next() { let ColorOpt(color) = value.parse::()?; match kind { FaceColor::Foreground => face.set_fg(color), FaceColor::Background => face.set_bg(color), }; } else { return Err(ArgParsingError::MissingValue(face_name)); } } Italic(italic) => ignore(face.set_italic(italic)), Bold(bold) => ignore(face.set_bold(bold)), Intense(intense) => ignore(face.set_intense(intense)), Underline(underline) => ignore(face.set_underline(underline)), Reset => *face = Default::default(), } } Ok(()) } fn parse_color_arg(value: &str, config: &mut AppConfig) -> Result<(), ArgParsingError> { let mut pieces = value.split(':'); Ok(if let Some(piece) = pieces.next() { let face_name = piece.parse::()?; parse_color_attributes(config, pieces, face_name)?; }) } fn die_error(result: Result) -> bool { if let Err(err) = result { eprintln!("{}", err); process::exit(-1); } true } fn color(config: &mut AppConfig, args: &mut Peekable>) -> bool { let arg = args.next().unwrap(); if let Some(spec) = args.next() { die_error(parse_color_arg(&spec, config)) } else { missing_arg(arg) } } fn line_numbers(config: &mut AppConfig, args: &mut Peekable>) -> bool { args.next(); let spec = if let Some(spec) = args.next() { parse_line_number_style(config, Some(&*spec)) } else { parse_line_number_style(config, None) }; die_error(spec) } fn html(config: &mut AppConfig, args: &mut Peekable>) -> bool { config.html = true; args.next(); true } fn debug(config: &mut AppConfig, args: &mut Peekable>) -> bool { config.debug = true; args.next(); true } fn bad_arg(arg: &str) -> ! { eprintln!("bad argument: '{}'", arg); usage(2); } fn parse_options( config: &mut AppConfig, args: &mut Peekable>, ) -> bool { if let Some(arg) = args.peek() { match &arg[..] { // generic flags "-h" | "--help" => help(&arg[..] == "--help"), "-V" | "--version" => show_version(), // documented flags FLAG_COLOR => color(config, args), FLAG_LINE_NUMBERS => line_numbers(config, args), // hidden flags FLAG_DEBUG => debug(config, args), FLAG_HTML => html(config, args), arg => bad_arg(arg), } } else { false } } pub fn parse_config() -> AppConfig { let mut config = AppConfig::default(); let mut args = std::env::args().skip(1).peekable(); while parse_options(&mut config, &mut args) {} if atty::is(atty::Stream::Stdin) { usage(-1); } config } diffr-0.1.5/src/diffr_lib/best_projection.rs000064400000000000000000000153651046102023000172010ustar 00000000000000use std::collections::hash_map::Entry::*; use std::collections::HashMap; use std::convert::TryFrom; use super::TokenId; use super::Tokenization; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Hash)] struct Coord { next_lcs: usize, next_seq: usize, } #[derive(Debug)] struct Context { seq_index: HashMap>, } impl Context { fn new<'a>(seq: &'a Tokenization<'a>, lcs: &'a Tokenization<'a>) -> Self { let mut seq_index = HashMap::new(); for v in lcs.tokens() { match seq_index.entry(*v) { Occupied(_) => (), Vacant(e) => { e.insert(vec![]); } } } for (i, v) in seq.tokens().iter().enumerate() { match seq_index.entry(*v) { Occupied(e) => { e.into_mut().push(i); } Vacant(_) => (), } } Context { seq_index } } fn get_indexes(&self, tok: TokenId, min_value: usize) -> &[usize] { match self.seq_index.get(&tok) { Some(values) => { let min_idx = match values.binary_search(&min_value) { Ok(i) | Err(i) => i, }; &values[min_idx..] } None => &[], } } } /// The result of `optimize_partition`. This is mostly used by `shared_segments`. #[derive(Debug)] pub struct NormalizationResult { pub path: Vec, pub starts_with_shared: bool, } impl NormalizationResult { /// The shared segments between both inputs of `optimize_partition`. /// The `seq` argument is the longest of the two inputs. pub fn shared_segments<'a>( &'a self, seq: &'a Tokenization, ) -> impl Iterator + 'a { SharedSegments::new(self, seq) } } fn snake_len(seq: &Tokenization, lcs: &Tokenization, start_lcs: usize, start_seq: usize) -> usize { let lcs_len = lcs.nb_tokens() - start_lcs; let seq_len = seq.nb_tokens() - start_seq; let max_snake_len = lcs_len.min(seq_len); let mut snake_len = 0; let seq = &seq.tokens()[start_seq..start_seq + max_snake_len]; let lcs = &lcs.tokens()[start_lcs..start_lcs + max_snake_len]; while snake_len < max_snake_len && lcs[snake_len] == seq[snake_len] { snake_len += 1 } snake_len } /// Minimize the number of elements when partitioning `seq` according to `lcs`. /// `lcs` is a subsequence of `seq`. pub fn optimize_partition(seq: &Tokenization, lcs: &Tokenization) -> NormalizationResult { let context = Context::new(&seq, &lcs); let root = Coord { next_lcs: 0, next_seq: 0, }; let target = Coord { next_lcs: lcs.nb_tokens(), next_seq: seq.nb_tokens(), }; let mut frontier = vec![root]; let mut new_frontier = vec![]; let mut prev = HashMap::new(); let mut found_seq = None; while !frontier.is_empty() && found_seq == None { new_frontier.clear(); for &coord in frontier.iter() { if coord.next_lcs == target.next_lcs { found_seq = Some(coord.next_seq); if coord.next_seq == target.next_seq { break; } else { // TODO do something more clever here continue; } } let start_lcs = coord.next_lcs; let lcs_len = lcs.nb_tokens() - start_lcs; let mut last_enqueued_snake_len = 0; for start_seq in context.get_indexes(lcs.nth_token(to_isize(coord.next_lcs)), coord.next_seq) { if start_seq + lcs_len > seq.nb_tokens() { break; } let snake_len = 1 + snake_len(&seq, &lcs, start_lcs + 1, start_seq + 1); let next_coord = Coord { next_lcs: start_lcs + snake_len, next_seq: start_seq + snake_len, }; if last_enqueued_snake_len < snake_len || next_coord == target { if next_coord.next_lcs == target.next_lcs && (next_coord.next_seq == target.next_seq || found_seq == None) { found_seq = Some(next_coord.next_seq); } match prev.entry(next_coord) { Occupied(_) => continue, Vacant(e) => e.insert(coord), }; new_frontier.push(next_coord); last_enqueued_snake_len = snake_len; } } } std::mem::swap(&mut frontier, &mut new_frontier) } let target = found_seq.map(|next_seq| Coord { next_lcs: lcs.nb_tokens(), next_seq, }); let mut path = vec![]; let mut starts_with_shared = false; let mut coord = target.as_ref(); let mut seq = seq.nb_tokens(); let mut lcs = lcs.nb_tokens(); while let Some(&coord_content) = coord { let next_seq = coord_content.next_seq; let next_lcs = coord_content.next_lcs; let snake_len = lcs - next_lcs; push_if_not_last(&mut path, to_isize(seq - snake_len)); starts_with_shared = !push_if_not_last(&mut path, to_isize(next_seq)); coord = prev.get(&coord_content); seq = next_seq; lcs = next_lcs; } path.reverse(); NormalizationResult { path, starts_with_shared, } } fn push_if_not_last(v: &mut Vec, val: isize) -> bool { let should_push = v.last() != Some(&val); if should_push { v.push(val); } should_push } fn to_isize(input: usize) -> isize { isize::try_from(input).unwrap() } /// The shared segments between both inputs of `optimize_partition`. struct SharedSegments<'a> { index: usize, normalization: &'a Vec, seq: &'a Tokenization<'a>, } impl<'a> SharedSegments<'a> { fn new(normalization: &'a NormalizationResult, seq: &'a Tokenization) -> Self { SharedSegments { index: if normalization.starts_with_shared { 0 } else { 1 }, normalization: &normalization.path, seq, } } } impl<'a> Iterator for SharedSegments<'a> { type Item = (usize, usize); fn next(&mut self) -> Option { if self.index + 1 < self.normalization.len() { let prev = self.normalization[self.index]; let curr = self.normalization[self.index + 1]; let from = self.seq.nth_span(prev).0; let to = self.seq.nth_span(curr - 1).1; self.index += 2; Some((from, to)) } else { None } } } diffr-0.1.5/src/diffr_lib/mod.rs000064400000000000000000000433051046102023000145620ustar 00000000000000//! Algorithms to compute diffs. //! //! This module implements various algorithms described in E. Myers //! paper: [An O(ND) Difference Algorithm and Its //! Variations](http://www.xmailserver.org/diff2.pdf). //! //! The main entrypoint is `diff`, which allows to compute the longest //! common subsequence between two sequences of byte slices. use std::collections::hash_map::Entry::*; use std::collections::HashMap; use std::convert::TryFrom; use std::fmt::Debug; use std::fmt::{Error as FmtErr, Formatter}; mod best_projection; pub use best_projection::optimize_partition; type Span = (usize, usize); type TokenId = u64; pub struct TokenMap<'a>(HashMap<&'a [u8], TokenId>); impl<'a> TokenMap<'a> { pub fn new(input: &mut [(impl Iterator, &'a [u8])]) -> Self { let mut m = HashMap::new(); let mut counter = 0; for (spans, data) in input.iter_mut() { for span in spans { let data = &data[span.0..span.1]; match m.entry(data) { Vacant(e) => { e.insert(counter); counter += 1 } Occupied(_) => {} } } } TokenMap(m) } fn get(&self, slice: &'a [u8]) -> TokenId { self.0.get(slice).unwrap().clone() } } pub struct Tokenization<'a> { data: &'a [u8], spans: &'a [Span], token_ids: Vec, } impl Debug for Tokenization<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { let Self { data, spans, .. } = self; let data_pp = String::from_utf8_lossy(data); let tokens_pp = spans .iter() .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1])) .collect::>(); f.debug_struct("Tokenization") .field("data", &data_pp) .field("tokens", &tokens_pp) .finish() } } struct TokenizationRange<'a> { t: &'a Tokenization<'a>, start_index: isize, one_past_end_index: isize, } impl<'a> Debug for TokenizationRange<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { let Self { t: Tokenization { data, spans, .. }, start_index, one_past_end_index, } = self; let data_pp = String::from_utf8_lossy(data); let tokens_pp = spans[to_usize(*start_index)..to_usize(*one_past_end_index)] .iter() .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1])) .collect::>(); f.debug_struct("TokenizationRange") .field("data", &data_pp) .field("tokens", &tokens_pp) .finish() } } impl<'a> Tokenization<'a> { pub fn new(data: &'a [u8], spans: &'a [Span], token_map: &TokenMap) -> Self { let mut token_ids = Vec::with_capacity(spans.len()); for span in spans { token_ids.push(token_map.get(&data[span.0..span.1])); } Tokenization { data, spans, token_ids, } } pub fn data(&self) -> &[u8] { self.data } pub fn nb_tokens(&self) -> usize { self.spans.len() } pub fn nth_span(&self, n: isize) -> Span { self.spans[to_usize(n)] } pub fn tokens(&self) -> &[TokenId] { &self.token_ids } pub fn nth_token(&self, n: isize) -> TokenId { self.token_ids[to_usize(n)] } } impl<'a> TokenizationRange<'a> { fn new(t: &'a Tokenization<'a>) -> Self { TokenizationRange { t, start_index: 0, one_past_end_index: to_isize(t.spans.len()), } } /// Split `self` in two tokenizations: /// * the first one from the start to `lo`; /// * the second one from `hi` to the end. fn split_at(&self, lo: isize, hi: isize) -> (Self, Self) { let start = self.start_index; let end = self.one_past_end_index; assert!(start <= lo); assert!(lo <= hi); assert!(hi <= end); ( TokenizationRange { one_past_end_index: lo, ..*self }, TokenizationRange { start_index: hi, ..*self }, ) } /// Get `self`'s number of tokens. fn nb_tokens(&self) -> usize { to_usize(self.one_past_end_index - self.start_index) } /// Get `self`'s `n`th token. fn nth_token(&self, n: isize) -> TokenId { self.t.token_ids[to_usize(self.start_index + n)] } } /// A pair of `TokenizationRange`s to compare. #[derive(Debug)] pub struct DiffInput<'a> { added: TokenizationRange<'a>, removed: TokenizationRange<'a>, } impl<'a> DiffInput<'a> { pub fn new(added: &'a Tokenization<'a>, removed: &'a Tokenization<'a>) -> Self { DiffInput { added: TokenizationRange::new(added), removed: TokenizationRange::new(removed), } } pub fn to_owned(&'a self) -> Self { Self::new(self.added(), self.removed()) } pub fn added(&self) -> &Tokenization<'a> { self.added.t } pub fn removed(&self) -> &Tokenization<'a> { self.removed.t } fn split_at(&self, (x0, y0): (isize, isize), (x1, y1): (isize, isize)) -> (Self, Self) { let (removed1, removed2) = self.removed.split_at(x0, x1); let (added1, added2) = self.added.split_at(y0, y1); ( DiffInput { added: added1, removed: removed1, }, DiffInput { added: added2, removed: removed2, }, ) } fn n(&self) -> usize { self.removed.nb_tokens() } fn m(&self) -> usize { self.added.nb_tokens() } fn seq_a(&self, index: isize) -> TokenId { self.removed.nth_token(index) } fn seq_b(&self, index: isize) -> TokenId { self.added.nth_token(index) } } struct DiffTraversal<'a> { v: &'a mut [isize], max: usize, _end: (isize, isize), } impl<'a> DiffTraversal<'a> { fn from_slice(input: &'a DiffInput<'a>, v: &'a mut [isize], forward: bool, max: usize) -> Self { let start = (input.removed.start_index, input.added.start_index); let end = ( input.removed.one_past_end_index, input.added.one_past_end_index, ); assert!(max * 2 + 1 <= v.len()); let (start, end) = if forward { (start, end) } else { (end, start) }; let mut res = DiffTraversal { v, max, _end: end }; if max != 0 { *res.v_mut(1) = start.0 - input.removed.start_index } res } #[cfg(test)] fn from_vector( input: &'a DiffInput<'a>, v: &'a mut Vec, forward: bool, max: usize, ) -> Self { v.resize(max * 2 + 1, 0); Self::from_slice(input, v, forward, max) } fn v(&self, index: isize) -> isize { self.v[to_usize(index + to_isize(self.max))] } fn v_mut(&mut self, index: isize) -> &mut isize { &mut self.v[to_usize(index + to_isize(self.max))] } } #[cfg(test)] fn diff_sequences_kernel_forward( input: &DiffInput, ctx: &mut DiffTraversal, d: usize, ) -> Option { let n = to_isize(input.n()); let m = to_isize(input.m()); assert!(d < ctx.max); let d = to_isize(d); for k in (-d..=d).step_by(2) { let mut x = if k == -d || k != d && ctx.v(k - 1) < ctx.v(k + 1) { ctx.v(k + 1) } else { ctx.v(k - 1) + 1 }; let mut y = x - k; while x < n && y < m && input.seq_a(x) == input.seq_b(y) { x += 1; y += 1; } *ctx.v_mut(k) = x; if ctx._end == (x, y) { return Some(to_usize(d)); } } None } #[cfg(test)] fn diff_sequences_kernel_backward( input: &DiffInput, ctx: &mut DiffTraversal, d: usize, ) -> Option { let n = to_isize(input.n()); let m = to_isize(input.m()); let delta = n - m; assert!(d < ctx.max); let d = to_isize(d); for k in (-d..=d).step_by(2) { let mut x = if k == -d || k != d && ctx.v(k + 1) < ctx.v(k - 1) { ctx.v(k + 1) } else { ctx.v(k - 1) + 1 }; let mut y = x - (k + delta); while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) { x -= 1; y -= 1; } *ctx.v_mut(k) = x - 1; if ctx._end == (x, y) { return Some(to_usize(d)); } } None } /// A wrapper around a vector of bytes that keeps track of end of lines. #[derive(Debug, Default)] pub struct LineSplit { data: Vec, line_lengths: Vec, } impl LineSplit { pub fn iter<'a>(&'a self) -> impl Iterator + 'a { LineSplitIter { line_split: &self, index: 0, start_of_slice: 0, } } pub fn data<'a>(&'a self) -> &'a [u8] { &self.data } pub fn append_line(&mut self, line: &[u8]) { if self.data.last().cloned() == Some(b'\n') { self.line_lengths.push(line.len()); } else { match self.line_lengths.last_mut() { Some(len) => *len += line.len(), None => self.line_lengths.push(line.len()), } } self.data.extend_from_slice(line) } pub fn clear(&mut self) { self.data.clear(); self.line_lengths.clear(); } pub fn len(&self) -> usize { self.data.len() } } struct LineSplitIter<'a> { line_split: &'a LineSplit, start_of_slice: usize, index: usize, } impl<'a> Iterator for LineSplitIter<'a> { type Item = (usize, usize); fn next(&mut self) -> Option { let &mut LineSplitIter { line_split: LineSplit { data: _, line_lengths, }, index, start_of_slice, } = self; if index < line_lengths.len() { let len = line_lengths[index]; self.start_of_slice += len; self.index += 1; Some((start_of_slice, start_of_slice + len)) } else { None } } } /// A pair of spans with the same content in two different slices. #[derive(Clone, Debug, Default)] pub struct Snake { /// The start of the span in the removed bytes. pub x0: isize, /// The start of the span in the added bytes. pub y0: isize, /// The length of the span. pub len: isize, } impl Snake { fn from(mut self, x0: isize, y0: isize) -> Self { self.x0 = x0; self.y0 = y0; self } fn len(mut self, len: isize) -> Self { self.len = len; self } } fn diff_sequences_kernel_bidirectional( input: &DiffInput, ctx_fwd: &mut DiffTraversal, ctx_bwd: &mut DiffTraversal, d: usize, ) -> Option<(Snake, isize)> { let n = to_isize(input.n()); let m = to_isize(input.m()); let delta = n - m; let odd = delta % 2 != 0; assert!(d < ctx_fwd.max); assert!(d < ctx_bwd.max); let d = to_isize(d); for k in (-d..=d).step_by(2) { let mut x = if k == -d || k != d && ctx_fwd.v(k - 1) < ctx_fwd.v(k + 1) { ctx_fwd.v(k + 1) } else { ctx_fwd.v(k - 1) + 1 }; let mut y = x - k; let (x0, y0) = (x, y); while x < n && y < m && input.seq_a(x) == input.seq_b(y) { x += 1; y += 1; } if odd && (k - delta).abs() <= d - 1 && x > ctx_bwd.v(k - delta) { return Some((Snake::default().from(x0, y0).len(x - x0), 2 * d - 1)); } *ctx_fwd.v_mut(k) = x; } for k in (-d..=d).step_by(2) { let mut x = if k == -d || k != d && ctx_bwd.v(k + 1) < ctx_bwd.v(k - 1) { ctx_bwd.v(k + 1) } else { ctx_bwd.v(k - 1) + 1 }; let mut y = x - (k + delta); let x1 = x; while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) { x -= 1; y -= 1; } if !odd && (k + delta).abs() <= d && x - 1 < ctx_fwd.v(k + delta) { return Some((Snake::default().from(x, y).len(x1 - x), 2 * d)); } *ctx_bwd.v_mut(k) = x - 1; } None } /// Compute the length of the edit script for `input`. /// This is the forward version. #[cfg(test)] fn diff_sequences_simple_forward(input: &DiffInput, v: &mut Vec) -> usize { diff_sequences_simple(input, v, true) } /// Compute the length of the edit script for `input`. /// This is the backward version. #[cfg(test)] fn diff_sequences_simple_backward(input: &DiffInput, v: &mut Vec) -> usize { diff_sequences_simple(input, v, false) } #[cfg(test)] fn diff_sequences_simple(input: &DiffInput, v: &mut Vec, forward: bool) -> usize { let max_result = input.n() + input.m(); let ctx = &mut DiffTraversal::from_vector(input, v, forward, max_result); (0..max_result) .filter_map(|d| { if forward { diff_sequences_kernel_forward(input, ctx, d) } else { diff_sequences_kernel_backward(input, ctx, d) } }) .next() .unwrap_or(max_result) } /// Compute the longest common subsequence for `input` into `dst`. pub fn diff(input: &DiffInput, v: &mut Vec, dst: &mut Vec) { dst.clear(); enum Task<'a> { Diff(DiffInput<'a>), PushSnake(Snake), } use Task::*; let mut todo = vec![Diff(input.to_owned())]; while let Some(task) = todo.pop() { match task { Diff(input) => { let n = to_isize(input.n()); fn trivial_diff(tok: &TokenizationRange) -> bool { tok.one_past_end_index <= tok.start_index } if trivial_diff(&input.removed) || trivial_diff(&input.added) { continue; } let (snake, d) = diff_sequences_bidirectional_snake(&input, v); let &Snake { x0, y0, len } = &snake; if 1 < d { let (input1, input2) = input.split_at((x0, y0), (x0 + len, y0 + len)); todo.push(Diff(input2)); if len != 0 { todo.push(PushSnake(snake)); } todo.push(Diff(input1)); } else { let SplittingPoint { sp, dx, dy } = find_splitting_point(&input); let x0 = input.removed.start_index; let y0 = input.added.start_index; if sp != 0 { dst.push(Snake::default().from(x0, y0).len(sp)); } let len = n - sp - dx; if len != 0 { dst.push(Snake::default().from(x0 + sp + dx, y0 + sp + dy).len(len)); } } } PushSnake(snake) => dst.push(snake), } } } struct SplittingPoint { sp: isize, dx: isize, dy: isize, } // Find the splitting point when two sequences differ by one element. fn find_splitting_point(input: &DiffInput) -> SplittingPoint { let n = to_isize(input.n()); let m = to_isize(input.m()); let (short, long, nb_tokens, dx, dy) = if n < m { (&input.removed, &input.added, n, 0, 1) } else if m < n { (&input.added, &input.removed, m, 1, 0) } else { (&input.added, &input.removed, m, 0, 0) }; let mut sp = nb_tokens; for i in 0..nb_tokens { if long.nth_token(i) != short.nth_token(i) { sp = i; break; } } SplittingPoint { sp, dx, dy } } /// Compute the length of the edit script for `input`. /// This is the bidirectional version. #[cfg(test)] fn diff_sequences_bidirectional(input: &DiffInput, v: &mut Vec) -> usize { if input.n() + input.m() == 0 { return 0; } to_usize(diff_sequences_bidirectional_snake(input, v).1) } fn diff_sequences_bidirectional_snake(input: &DiffInput, v: &mut Vec) -> (Snake, isize) { let max = (input.n() + input.m() + 1) / 2 + 1; let iter_len = 2 * max + 1; v.resize(2 * iter_len, 0); let (v1, v2) = v.split_at_mut(iter_len); let ctx_fwd = &mut DiffTraversal::from_slice(input, v1, true, max); let ctx_bwd = &mut DiffTraversal::from_slice(input, v2, false, max); let mut result = (0..max) .filter_map(|d| diff_sequences_kernel_bidirectional(input, ctx_fwd, ctx_bwd, d)) .next() .expect("snake not found"); result.0.x0 += input.removed.start_index; result.0.y0 += input.added.start_index; result } fn to_isize(input: usize) -> isize { isize::try_from(input).unwrap() } fn to_usize(input: isize) -> usize { usize::try_from(input).unwrap() } #[derive(PartialEq, Eq, Clone, Copy, Debug)] enum TokenKind { Other, Word, Spaces, } /// Tokenize data from `src` from the position `ofs` into `tokens`. pub fn tokenize(src: &[u8], ofs: usize, tokens: &mut Vec) { let mut push = |lo: usize, hi: usize| { if lo < hi { tokens.push((lo, hi)) } }; let mut lo = ofs; let mut kind = TokenKind::Other; for hi in ofs..src.len() { let oldkind = kind; kind = classify_byte(src[hi]); if kind != oldkind || oldkind == TokenKind::Other { push(lo, hi); lo = hi } } push(lo, src.len()); } fn classify_byte(b: u8) -> TokenKind { match b { b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' => TokenKind::Word, b'\t' | b' ' => TokenKind::Spaces, _ => TokenKind::Other, } } #[cfg(test)] mod tests_lib; diffr-0.1.5/src/diffr_lib/tests_lib.rs000064400000000000000000000503001046102023000157640ustar 00000000000000use super::*; use DiffKind::*; impl<'a> TokenizationRange<'a> { fn nth_span(&self, n: isize) -> Span { self.t.spans[to_usize(self.start_index + n)] } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum DiffKind { Keep, Added, Removed, } fn string_of_bytes(buf: &[u8]) -> String { String::from_utf8_lossy(buf).into() } fn to_strings(buf: &[u8], tokens: It) -> Vec where It: Iterator, { mk_vec(tokens.map(|range| string_of_bytes(&buf[range.0..range.1]))) } fn mk_vec(it: It) -> Vec where It: Iterator, { it.collect() } fn nth_token<'a>(input: &'a TokenizationRange, idx: isize) -> &'a [u8] { let span = input.nth_span(idx); &input.t.data()[span.0..span.1] } fn compress_path(values: &Vec<(Vec, DiffKind)>) -> Vec<(Vec, DiffKind)> { let mut values = values.clone(); let mut it = values.iter_mut(); let mut result = vec![]; let mut current = it.next(); while let Some(next) = it.next() { match current { Some(ref mut c) => { if c.1 == next.1 { c.0.extend_from_slice(&*next.0) } else { result.push(c.clone()); *c = next; } } None => panic!(), } } if let Some(last) = current { result.push(last.clone()); } result } fn dummy_tokenize<'a>(data: &'a [u8]) -> Vec { let mut toks = vec![]; for i in 0..data.len() { toks.push((i, i + 1)); } toks } fn really_tokenize<'a>(data: &'a [u8]) -> Vec { let mut toks = vec![]; tokenize(data, 0, &mut toks); toks } fn diff_sequences_test(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) { diff_sequences_test_aux(expected, seq_a, seq_b, dummy_tokenize) } fn diff_sequences_test_tokenized(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) { diff_sequences_test_aux(expected, seq_a, seq_b, really_tokenize) } fn diff_sequences_test_aux( expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8], tok: impl Fn(&[u8]) -> Vec, ) { let toks_a = tok(&seq_a); let toks_b = tok(&seq_b); let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); let tok_a = Tokenization::new(seq_a, &toks_a, &m); let tok_b = Tokenization::new(seq_b, &toks_b, &m); let input = DiffInput::new(&tok_b, &tok_a); let input_r = DiffInput::new(&tok_a, &tok_b); let mut v = vec![]; let result = diff_sequences_simple_forward(&input, &mut v); let result_bwd = diff_sequences_simple_backward(&input, &mut v); let result_bidi = diff_sequences_bidirectional(&input, &mut v); let result_r = diff_sequences_simple(&input_r, &mut v, true); let result_r_bwd = diff_sequences_simple(&input_r, &mut v, false); let result_r_bidi = diff_sequences_bidirectional(&input_r, &mut v); let mut result_complete = vec![]; diff(&input, &mut v, &mut result_complete); let mut result_r_complete = vec![]; diff(&input_r, &mut v, &mut result_r_complete); let d = expected .iter() .map(|(buf, kind)| match kind { Added | Removed => tok(buf).len(), Keep => 0, }) .fold(0, |acc, len| acc + len); assert_eq!(d, result); assert_eq!(d, result_r); assert_eq!(d, result_bwd); assert_eq!(d, result_r_bwd); assert_eq!(d, result_bidi); assert_eq!(d, result_r_bidi); for complete in &[&result_complete, &result_r_complete] { let all_snakes = complete.iter().fold(0, |acc, s| acc + s.len); let d_calc = input.n() + input.m() - 2 * to_usize(all_snakes); assert_eq!(d, d_calc); } // construct edit script let mut x0 = 0; let mut y0 = 0; let mut script = vec![]; for snake in result_complete { let Snake { x0: x, y0: y, len, .. } = snake; if x0 != x { assert!(x0 < x); let lo = input.removed.nth_span(x0).0; let hi = input.removed.nth_span(x - 1).1; script.push((input.removed.t.data[lo..hi].to_vec(), Removed)); } if y0 != y { assert!(y0 < y); let lo = input.added.nth_span(y0).0; let hi = input.added.nth_span(y - 1).1; script.push((input.added.t.data[lo..hi].to_vec(), Added)); } let mut added = vec![]; let mut removed = vec![]; for i in 0..len { let r = input.removed.nth_span(x + i); removed.extend_from_slice(&input.removed.t.data[r.0..r.1]); let r = input.added.nth_span(y + i); added.extend_from_slice(&input.added.t.data[r.0..r.1]); } assert_eq!(added, removed, "{:?}", snake); script.push((added.to_vec(), Keep)); x0 = x + len; y0 = y + len; } let x = input.removed.nb_tokens(); let x0 = to_usize(x0); if x0 != x { assert!(x0 < x); script.push((input.removed.t.data[x0..x].to_vec(), Removed)); } let y = input.added.nb_tokens(); let y0 = to_usize(y0); if y0 != y { assert!(y0 < y); script.push((input.added.t.data[y0..y].to_vec(), Added)); } assert_eq!( &*mk_vec(expected.iter().map(|p| (string_of_bytes(p.0), p.1))), &*mk_vec(script.iter().map(|p| (string_of_bytes(&p.0), p.1))), ); } #[test] fn compress_path_test() { let test = |expected: Vec<(Vec, DiffKind)>, input| { assert_eq!(expected, compress_path(&input)); }; test(vec![], vec![]); test( vec![(b"abc".to_vec(), Added)], vec![(b"abc".to_vec(), Added)], ); test( vec![(b"abcdef".to_vec(), Added)], vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Added)], ); test( vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)], vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)], ); test( vec![ (b"abc".to_vec(), Added), (b"defghijkl".to_vec(), Removed), (b"xyz".to_vec(), Keep), ], vec![ (b"abc".to_vec(), Added), (b"def".to_vec(), Removed), (b"ghi".to_vec(), Removed), (b"jkl".to_vec(), Removed), (b"xyz".to_vec(), Keep), ], ); } #[test] fn diff_sequences_test_1() { diff_sequences_test( &[ (b"a", Removed), (b"c", Added), (b"b", Keep), (b"c", Removed), (b"ab", Keep), (b"b", Removed), (b"a", Keep), (b"c", Added), ], b"abcabba", b"cbabac", ) } #[test] fn diff_sequences_test_2() { diff_sequences_test( &[(b"xaxbx", Added), (b"abc", Keep), (b"y", Removed)], b"abcy", b"xaxbxabc", ) } #[test] fn diff_sequences_test_3() { diff_sequences_test(&[(b"abc", Removed), (b"defgh", Added)], b"abc", b"defgh") } #[test] fn diff_sequences_test_4() { diff_sequences_test( &[(b"abc", Removed), (b"defg", Added), (b"zzz", Keep)], b"abczzz", b"defgzzz", ) } #[test] fn diff_sequences_test_5() { diff_sequences_test( &[(b"zzz", Keep), (b"abcd", Removed), (b"efgh", Added)], b"zzzabcd", b"zzzefgh", ) } #[test] fn diff_sequences_test_6() { diff_sequences_test(&[(b"abcd", Added)], b"", b"abcd") } #[test] fn diff_sequences_test_7() { diff_sequences_test(&[], b"", b"") } #[test] fn diff_sequences_test_8() { // This tests the recursion in diff diff_sequences_test( &[ (b"a", Removed), (b"c", Added), (b"b", Keep), (b"c", Removed), (b"a", Keep), (b"b", Removed), (b"ba", Keep), (b"a", Removed), (b"cc", Added), (b"b", Keep), (b"c", Removed), (b"ab", Keep), (b"b", Removed), (b"a", Keep), (b"a", Removed), (b"cc", Added), (b"b", Keep), (b"c", Removed), // this is weird; the 2 next should be combined? (b"a", Keep), (b"b", Keep), (b"b", Removed), (b"a", Keep), (b"c", Added), ], b"abcabbaabcabbaabcabba", b"cbabaccbabaccbabac", ) } #[test] fn range_equality_test() { let range_a = [1, 2, 3]; let range_b = [1, 2, 3]; let range_c = [1, 2, 4]; assert!(range_a == range_b); assert!(range_a != range_c); } #[test] fn tokenize_test() { fn test(expected: &[&str], buf: &[u8]) { let mut tokens = vec![]; tokenize(buf, 0, &mut tokens); assert_eq!( buf.len(), tokens.iter().map(|range| range.1 - range.0).sum() ); for token in &tokens { assert!(token.0 < token.1) } assert_eq!( mk_vec(buf.iter()), mk_vec(tokens.iter().flat_map(|range| &buf[range.0..range.1])) ); let foo = mk_vec( tokens .iter() .map(|range| &buf[range.0..range.1]) .map(|buf| string_of_bytes(buf)), ); let foo = mk_vec(foo.iter().map(|str| &**str)); assert_eq!(&*expected, &*foo); // TODO let tokens = tokens.iter().map(|hsr| (hsr.0, hsr.1)); assert_eq!(expected, &to_strings(&buf, tokens)[..]); } test(&[], b""); test(&[" "], b" "); test(&["a"], b"a"); test(&["abcd", " ", "defg", " "], b"abcd defg "); test(&["abcd", " ", "defg"], b"abcd defg"); test(&["abcd", " ", "defg"], b"abcd defg"); test(&["abcd", "\t ", "defg"], b"abcd\t defg"); test( &["*", "(", "abcd", ")", " ", "#", "[", "efgh", "]"], b"*(abcd) #[efgh]", ); } #[test] fn find_splitting_point_test() { fn test(expected: isize, seq_a: &[u8], seq_b: &[u8]) { let toks_a = dummy_tokenize(&seq_a); let toks_b = dummy_tokenize(&seq_b); let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); let tok_a = Tokenization::new(seq_a, &toks_a, &m); let tok_b = Tokenization::new(seq_b, &toks_b, &m); let input = DiffInput::new(&tok_b, &tok_a); assert_eq!(expected, find_splitting_point(&input).sp); for i in 0..expected { assert_eq!(input.removed.nth_token(i), input.added.nth_token(i)); } for i in expected..to_isize(input.removed.nb_tokens()) { assert_eq!(input.removed.nth_token(i), input.added.nth_token(i + 1)); } } test(0, b"abc", b"zabc"); test(1, b"abc", b"azbc"); test(2, b"abc", b"abzc"); test(3, b"abc", b"abcz"); } fn get_lcs(seq_a: &[u8], seq_b: &[u8]) -> Vec> { fn subsequences(seq_a: &[u8]) -> Vec> { let res: Vec> = { if seq_a.len() == 0 { vec![vec![]] } else if seq_a.len() == 1 { vec![vec![], seq_a.to_owned()] } else { let (seq_a1, seq_a2) = seq_a.split_at(seq_a.len() / 2); let mut res = vec![]; for part1 in subsequences(&seq_a1) { for part2 in subsequences(seq_a2) { let mut nth_token = vec![]; nth_token.extend_from_slice(&part1); nth_token.extend_from_slice(&part2); res.push(nth_token); } } res } }; assert_eq!(res.len(), 1 << seq_a.len()); res } fn is_subseq(subseq: &[u8], nth_token: &[u8]) -> bool { if subseq.len() == 0 { true } else { let target = subseq[0]; for i in 0..nth_token.len() { if nth_token[i] == target { return is_subseq(&subseq[1..], &nth_token[i + 1..]); } } false } } let mut bests = vec![]; let mut best_len = 0; for subseq in subsequences(seq_a) { if subseq.len() < best_len || !is_subseq(&*subseq, seq_b) { continue; } if best_len < subseq.len() { bests.clear(); best_len = subseq.len(); } if best_len <= subseq.len() { bests.push(subseq) } } bests } #[test] fn test_get_lcs() { dbg!(get_lcs(b"abcd", b"cdef")); let expected: &[u8] = b"cd"; assert_eq!( expected, &**get_lcs(b"abcd", b"cdef").iter().next().unwrap() ) } #[test] fn test_lcs_random() { fn test_lcs(seq_a: &[u8], seq_b: &[u8]) { let toks_a = dummy_tokenize(&seq_a); let toks_b = dummy_tokenize(&seq_b); let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); let tok_a = Tokenization::new(seq_a, &toks_a, &m); let tok_b = Tokenization::new(seq_b, &toks_b, &m); let input = DiffInput::new(&tok_a, &tok_b); let mut v = vec![]; let mut dst = vec![]; diff(&input, &mut v, &mut dst); // check that dst content defines a subsequence of seq_a and seq_b let mut diff_lcs = vec![]; for Snake { x0, y0, len, .. } in dst { let part_seq_a = (x0..x0 + len) .flat_map(|idx| nth_token(&input.removed, idx).iter().cloned()) .collect::>(); let part_seq_b = (y0..y0 + len) .flat_map(|idx| nth_token(&input.added, idx).iter().cloned()) .collect::>(); assert_eq!(&*part_seq_a, &*part_seq_b); diff_lcs.extend_from_slice(&*part_seq_a); } // bruteforce check that it is the longest assert!(get_lcs(seq_a, seq_b) .iter() .filter(|nth_token| **nth_token == diff_lcs) .next() .is_some()); } let len_a = 6; let len_b = 6; let nletters = 3_u8; let mut seq_a = vec![b'1'; len_a]; let mut seq_b = vec![b'1'; len_b]; for i in 0..len_a { for j in 0..len_b { for la in 0..nletters { for lb in 0..nletters { seq_a[i] = la; seq_b[j] = lb; test_lcs(&seq_a, &seq_b); } } } } } #[should_panic] #[test] fn to_usize_checked_negative_test() { to_usize(-1_isize); } #[test] fn split_lines_test() { let input: &[u8] = b"abcd\nefgh\nij"; let split = LineSplit { data: input.to_vec(), line_lengths: vec![5, 5, 2], }; check_split(input, &split) } #[test] fn split_lines_append_test() { let input: &[u8] = b"abcd\nefgh\nij"; let mut split = LineSplit::default(); split.append_line(&input[..3]); split.append_line(&input[3..6]); split.append_line(&input[6..]); check_split(input, &split) } fn check_split(input: &[u8], split: &LineSplit) { assert_eq!( input, &*split.iter().fold(vec![], |mut acc, (lo, hi)| { acc.extend_from_slice(&input[lo..hi]); acc }) ); } #[test] fn issue15() { diff_sequences_test_tokenized( &[ (b"+ ", Added), (b"-", Keep), (b" -", Removed), (b"01234;\r\n", Keep), (b"+ ", Added), (b"-", Keep), (b" ", Removed), (b"-", Keep), (b"-", Removed), (b"abc;\r\n", Keep), (b"- ", Removed), (b"+ ", Added), (b"--", Keep), (b"def;\r\n", Keep), (b"- ", Removed), (b"+ ", Added), (b"--jkl;\r\n", Keep), (b"+ ", Added), (b"-", Keep), (b" ", Removed), (b"-", Keep), (b"-", Removed), (b"poi;\r\n", Keep), ], b"- -01234;\r\n- --abc;\r\n- --def;\r\n- --jkl;\r\n- --poi;\r\n", b"+ -01234;\r\n+ --abc;\r\n+ --def;\r\n+ --jkl;\r\n+ --poi;\r\n", ) } #[test] fn issue15_2() { diff_sequences_test_tokenized( &[ (b"-", Removed), (b"+", Added), (b" --include \'+ */\'", Keep), (b" ", Added), (b"\r\n", Keep), ], b"- --include '+ */'\r\n", b"+ --include '+ */' \r\n", ) } #[test] fn issue27() { diff_sequences_test( &[ (b"note: ", Keep), (b"AAA", Removed), (b"BBB CCC", Added), (b"\r\n", Keep), ], b"note: AAA\r\n", b"note: BBB CCC\r\n", ); diff_sequences_test( &[(b"^", Added), (b"^^^^^^^^^^", Keep), (b"^^^^", Added)], b"^^^^^^^^^^", b"^^^^^^^^^^^^^^^", ); diff_sequences_test( &[ (b"a", Keep), (b"cbc", Added), (b"bcz", Keep), (b"c", Added), (b"z", Keep), (b"abz", Added), ], b"abczz", b"acbcbczczabz", ); } #[derive(Debug)] struct TestNormalizePartitionExpected<'a> { expected: &'a [&'a [u8]], expected_starts_with_shared: bool, } fn test_optimize_alternatives( alternatives: &[TestNormalizePartitionExpected], seq: &[u8], lcs: &[u8], ) { let toks_seq = dummy_tokenize(&seq); let toks_lcs = dummy_tokenize(&lcs); let m = TokenMap::new(&mut [(toks_seq.iter(), &seq), (toks_lcs.iter(), &lcs)]); let seq = Tokenization::new(&seq, &toks_seq, &m); let lcs = Tokenization::new(&lcs, &toks_lcs, &m); let opt_result = optimize_partition(&seq, &lcs); let seq = TokenizationRange::new(&seq); let mut it = opt_result.path.iter().cloned(); let mut prev = match it.next() { None => { assert!(alternatives.iter().any(|e| e.expected.is_empty())); return; } Some(val) => val, }; let mut partition = vec![]; for i in it { let mut part = vec![]; for j in prev..i { part.extend_from_slice(&nth_token(&seq, j as isize)); } partition.push(part); prev = i; } assert!( alternatives.iter().any(|e| { let expected = e .expected .iter() .map(|slice| slice.to_vec()) .collect::>(); expected == &*partition && e.expected_starts_with_shared == opt_result.starts_with_shared }), "alternatives:\n\t{:?}\n\nactual:\n\t{:?}", &alternatives, (&partition, opt_result.starts_with_shared), ) } fn test_optimize_partition1( expected: &[&[u8]], expected_starts_with_shared: bool, seq: &[u8], lcs: &[u8], ) { let expected = vec![TestNormalizePartitionExpected { expected, expected_starts_with_shared, }]; test_optimize_alternatives(&expected, seq, lcs) } #[test] fn test_optimize_partition() { test_optimize_partition1(&[b"abcd"], true, b"abcd", b"abcd"); test_optimize_partition1(&[b"abcd"], false, b"abcd", b""); test_optimize_partition1(&[b"a", b"xyz", b"bc"], true, b"axyzbc", b"abc"); test_optimize_partition1(&[b"zab", b"a"], false, b"zaba", b"a"); test_optimize_partition1(&[b"k", b"a", b"xyz", b"bc"], false, b"kaxyzbc", b"abc"); test_optimize_partition1( &[b"k", b"a", b"xyz", b"bc", b"x"], false, b"kaxyzbcx", b"abc", ); test_optimize_partition1( &[b"a", b"cbc", b"bcz", b"czab", b"z"], true, b"acbcbczczabz", b"abczz", ); test_optimize_alternatives( &[ TestNormalizePartitionExpected { expected: &[b"^^^^^^^^^^", b"^^^^^"], expected_starts_with_shared: true, }, TestNormalizePartitionExpected { expected: &[b"^^^^^", b"^^^^^^^^^^"], expected_starts_with_shared: false, }, ], b"^^^^^^^^^^^^^^^", b"^^^^^^^^^^", ); test_optimize_partition1( &[b"note: ", b"AAA", b"\r\n"], true, b"note: AAA\r\n", b"note: \r\n", ); test_optimize_partition1( &[b"note: ", b"BBB CCC", b"\r\n"], true, b"note: BBB CCC\r\n", b"note: \r\n", ); } diffr-0.1.5/src/main.rs000064400000000000000000000664361046102023000130210ustar 00000000000000use std::fmt::{Debug, Display, Error as FmtErr, Formatter}; use std::io::{self, BufRead, Write}; use std::iter::Peekable; use std::time::SystemTime; use termcolor::{ Color::{self, Green, Red, Rgb}, ColorChoice, ColorSpec, StandardStream, WriteColor, }; use diffr_lib::*; mod cli_args; mod diffr_lib; #[derive(Debug, Clone, Copy)] pub enum LineNumberStyle { Compact, Aligned, } #[derive(Debug)] pub struct AppConfig { debug: bool, html: bool, line_numbers_style: Option, added_face: ColorSpec, refine_added_face: ColorSpec, removed_face: ColorSpec, refine_removed_face: ColorSpec, } impl Default for AppConfig { fn default() -> Self { // The ANSI white is actually gray on many implementations. The actual white // that seem to work on all implementations is "bright white". `termcolor` // crate has no enum member for it, so we create it with Rgb. let bright_white = Rgb(255, 255, 255); AppConfig { debug: false, html: false, line_numbers_style: None, added_face: color_spec(Some(Green), None, false), refine_added_face: color_spec(Some(bright_white), Some(Green), true), removed_face: color_spec(Some(Red), None, false), refine_removed_face: color_spec(Some(bright_white), Some(Red), true), } } } impl AppConfig { fn has_line_numbers(&self) -> bool { self.line_numbers_style.is_some() } fn line_numbers_aligned(&self) -> bool { if let Some(LineNumberStyle::Aligned) = self.line_numbers_style { return true; } false } } struct HtmlColorWriter { inner: W, current_color: Option, } impl HtmlColorWriter { fn new(inner: W) -> Self { HtmlColorWriter { inner, current_color: None, } } } fn write_color(w: &mut W, color: &Color) -> std::io::Result<()> where W: Write, { use Color::*; match color { Black => { w.write(b"black")?; } Blue => { w.write(b"blue")?; } Green => { w.write(b"green")?; } Red => { w.write(b"red")?; } Cyan => { w.write(b"cyan")?; } Magenta => { w.write(b"magenta")?; } Yellow => { w.write(b"yellow")?; } White => { w.write(b"white")?; } Rgb(r, g, b) => { write!(w, "#{:x}{:x}{:x}", r, g, b)?; } _ => panic!("not implemented"), } Ok(()) } impl WriteColor for HtmlColorWriter where W: Write, { fn supports_color(&self) -> bool { true } fn set_color(&mut self, spec: &ColorSpec) -> std::io::Result<()> { if spec.is_none() { self.current_color = None; } else { let w = &mut self.inner; w.write(b"")?; self.current_color = Some(spec.clone()); } Ok(()) } fn reset(&mut self) -> std::io::Result<()> { if self.current_color.is_some() { self.inner.write(b"")?; self.current_color = None; } Ok(()) } } impl Write for HtmlColorWriter where W: Write, { fn write(&mut self, buf: &[u8]) -> std::io::Result { let w = &mut self.inner; let n = buf.len(); let table: [(u8, &[u8]); 5] = [ (b'<', b"<"), (b'>', b">"), (b'"', b"""), (b'\'', b"'"), (b'&', b"&"), ]; for b in buf { if let Some((_, escaped)) = table.iter().find(|p| p.0 == *b) { w.write(escaped)?; } else { w.write(&[*b])?; } } w.flush()?; Ok(n) } fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() } } fn main() { let config = cli_args::parse_config(); let mut hunk_buffer = HunkBuffer::new(&config); match hunk_buffer.run() { Ok(()) => (), Err(ref err) if err.kind() == io::ErrorKind::BrokenPipe => (), Err(ref err) => { eprintln!("io error: {}", err); std::process::exit(-1) } } } fn now(do_timings: bool) -> Option { if do_timings { Some(SystemTime::now()) } else { None } } fn duration_ms_since(time: &Option) -> u128 { if let Some(time) = time { if let Ok(elapsed) = time.elapsed() { elapsed.as_millis() } else { // some non monotonically increasing clock // this is a short period of time anyway, // let us map it to 0 0 } } else { 0 } } fn color_spec(fg: Option, bg: Option, bold: bool) -> ColorSpec { let mut colorspec: ColorSpec = ColorSpec::default(); colorspec.set_fg(fg); colorspec.set_bg(bg); colorspec.set_bold(bold); colorspec } #[derive(Default)] struct ExecStats { time_computing_diff_ms: u128, time_lcs_ms: u128, time_opt_lcs_ms: u128, total_time_ms: u128, program_start: Option, } impl ExecStats { fn new(debug: bool) -> Self { ExecStats { time_computing_diff_ms: 0, time_lcs_ms: 0, time_opt_lcs_ms: 0, total_time_ms: 0, program_start: now(debug), } } /// Should we call SystemTime::now at all? fn do_timings(&self) -> bool { self.program_start.is_some() } fn stop(&mut self) { if self.do_timings() { self.total_time_ms = duration_ms_since(&self.program_start); } } fn report(&self) -> std::io::Result<()> { self.report_into(&mut std::io::stderr()) } fn report_into(&self, w: &mut W) -> std::io::Result<()> where W: std::io::Write, { const WORD_PADDING: usize = 35; const FIELD_PADDING: usize = 15; if self.do_timings() { let format_header = |name| format!("{} (ms)", name); let format_ratio = |dt: u128| { format!( "({:3.3}%)", 100.0 * (dt as f64) / (self.total_time_ms as f64) ) }; let mut report = |name: &'static str, dt: u128| { writeln!( w, "{:>w$} {:>f$} {:>f$}", format_header(name), dt, format_ratio(dt), w = WORD_PADDING, f = FIELD_PADDING, ) }; report("hunk processing time", self.time_computing_diff_ms)?; report("-- compute lcs", self.time_lcs_ms)?; report("-- optimize lcs", self.time_opt_lcs_ms)?; writeln!( w, "{:>w$} {:>f$}", format_header("total processing time"), self.total_time_ms, w = WORD_PADDING, f = FIELD_PADDING, )?; } Ok(()) } } struct HunkBuffer<'a> { v: Vec, diff_buffer: Vec, added_tokens: Vec<(usize, usize)>, removed_tokens: Vec<(usize, usize)>, line_number_info: Option, lines: LineSplit, config: &'a AppConfig, margin: Vec, warning_lines: Vec, stats: ExecStats, } #[derive(Default)] struct Margin<'a> { lino_minus: usize, lino_plus: usize, margin: &'a mut [u8], half_margin: usize, } const MARGIN_TAB_STOP: usize = 8; impl<'a> Margin<'a> { fn new(header: &'a HunkHeader, margin: &'a mut [u8]) -> Self { let full_margin = header.width(); let half_margin = full_margin / 2; // If line number is 0, the column is empty and // shouldn't be printed let margin_size = if header.minus_range.0 == 0 || header.plus_range.0 == 0 { half_margin } else { full_margin }; assert!(margin.len() >= margin_size); Margin { lino_plus: header.plus_range.0, lino_minus: header.minus_range.0, margin: &mut margin[..margin_size], half_margin, } } fn write_margin_padding(&mut self, out: &mut impl WriteColor) -> io::Result<()> { if self.margin.len() % MARGIN_TAB_STOP != 0 { write!(out, "\t")?; } Ok(()) } fn write_margin_changed( &mut self, is_plus: bool, config: &AppConfig, out: &mut impl WriteColor, ) -> io::Result<()> { let mut margin_buf = &mut self.margin[..]; let color; if is_plus { color = &config.added_face; if self.lino_minus != 0 { write!(margin_buf, "{:w$} ", ' ', w = self.half_margin)?; } write!(margin_buf, "{:w$}", self.lino_plus, w = self.half_margin)?; self.lino_plus += 1; } else { color = &config.removed_face; write!(margin_buf, "{:w$}", self.lino_minus, w = self.half_margin)?; if self.lino_plus != 0 { write!(margin_buf, " {:w$}", ' ', w = self.half_margin)?; } self.lino_minus += 1; }; output(self.margin, 0, self.margin.len(), color, out)?; if config.line_numbers_aligned() { self.write_margin_padding(out)?; } Ok(()) } fn write_margin_context( &mut self, config: &AppConfig, out: &mut impl WriteColor, ) -> io::Result<()> { if self.lino_minus != self.lino_plus { write!(out, "{:w$}", self.lino_minus, w = self.half_margin)?; } else { write!(out, "{:w$}", ' ', w = self.half_margin)?; } write!(out, " {:w$}", self.lino_plus, w = self.half_margin)?; if config.line_numbers_aligned() { self.write_margin_padding(out)?; } self.lino_minus += 1; self.lino_plus += 1; Ok(()) } } fn shared_spans(added_tokens: &Tokenization, diff_buffer: &Vec) -> Vec<(usize, usize)> { let mut shared_spans = vec![]; for snake in diff_buffer.iter() { for i in 0..snake.len { shared_spans.push(added_tokens.nth_span(snake.y0 + i)); } } shared_spans } const MAX_MARGIN: usize = 41; impl<'a> HunkBuffer<'a> { fn new(config: &'a AppConfig) -> Self { let debug = config.debug; HunkBuffer { v: vec![], diff_buffer: vec![], added_tokens: vec![], removed_tokens: vec![], line_number_info: None, lines: Default::default(), config, margin: vec![0; MAX_MARGIN], warning_lines: vec![], stats: ExecStats::new(debug), } } // Returns the number of completely printed snakes fn paint_line( data: &[u8], &(data_lo, data_hi): &(usize, usize), no_highlight: &ColorSpec, highlight: &ColorSpec, shared: &mut Peekable, out: &mut Stream, ) -> io::Result<()> where Stream: WriteColor, Positions: Iterator, { let mut y = data_lo + 1; // XXX: skip leading token and leading spaces while y < data_hi && data[y].is_ascii_whitespace() { y += 1 } let mut pending = (data_lo, y, false); let mut trailing_ws = ColorSpec::new(); trailing_ws.set_bg(Some(Color::Red)); let color = |h| if h { &highlight } else { &no_highlight }; let mut output1 = |lo, hi, highlighted| -> std::io::Result<()> { if lo == hi { return Ok(()); } let (lo1, hi1, highlighted1) = pending; let color = if &data[lo..hi] == b"\n" && data[lo1..hi1].iter().all(|b| b.is_ascii_whitespace()) { &trailing_ws } else { color(highlighted1) }; output(data, lo1, hi1, color, out)?; pending = (lo, hi, highlighted); Ok(()) }; // special case: all whitespaces if y == data_hi { output(data, data_lo, data_lo + 1, &no_highlight, out)?; output(data, data_lo + 1, data_hi, &trailing_ws, out)?; return Ok(()); } while let Some((lo, hi)) = shared.peek() { if data_hi <= y { break; } let last_iter = data_hi <= *hi; let lo = (*lo).min(data_hi).max(y); let hi = (*hi).min(data_hi); if hi <= data_lo { shared.next(); continue; } if hi < lo { continue; } output1(y, lo, true)?; output1(lo, hi, false)?; y = hi; if last_iter { break; } else { shared.next(); } } output1(y, data_hi, true)?; let (lo1, hi1, highlighted1) = pending; output(data, lo1, hi1, color(highlighted1), out)?; Ok(()) } fn process_with_stats(&mut self, out: &mut Stream) -> io::Result<()> where Stream: WriteColor, { let start = now(self.stats.do_timings()); let result = self.process(out); self.stats.time_computing_diff_ms += duration_ms_since(&start); result } fn process(&mut self, out: &mut Stream) -> io::Result<()> where Stream: WriteColor, { let Self { v, diff_buffer, added_tokens, removed_tokens, line_number_info, lines, config, margin, warning_lines, stats, } = self; let mut margin = match line_number_info { Some(lni) => Margin::new(lni, margin), None => Default::default(), }; let data = lines.data(); let m = TokenMap::new(&mut [(removed_tokens.iter(), data), (added_tokens.iter(), data)]); let removed = Tokenization::new(data, removed_tokens, &m); let added = Tokenization::new(data, added_tokens, &m); let tokens = DiffInput::new(&added, &removed); let start = now(stats.do_timings()); diffr_lib::diff(&tokens, v, diff_buffer); // TODO output the lcs directly out of `diff` instead let shared_spans = shared_spans(&added, &diff_buffer); let lcs = Tokenization::new(data, &shared_spans, &m); stats.time_lcs_ms += duration_ms_since(&start); let start = now(stats.do_timings()); let normalized_lcs_added = optimize_partition(&added, &lcs); let normalized_lcs_removed = optimize_partition(&removed, &lcs); stats.time_opt_lcs_ms += duration_ms_since(&start); let mut shared_added = normalized_lcs_added.shared_segments(&added).peekable(); let mut shared_removed = normalized_lcs_removed.shared_segments(&removed).peekable(); let mut warnings = warning_lines.iter().peekable(); let defaultspec = ColorSpec::default(); for (i, range) in lines.iter().enumerate() { if let Some(&&nline) = warnings.peek() { if nline == i { let w = &lines.data()[range.0..range.1]; output(w, 0, w.len(), &defaultspec, out)?; warnings.next(); continue; } } let first = data[range.0]; match first { b'-' | b'+' => { let is_plus = first == b'+'; let (nhl, hl, toks, shared) = if is_plus { ( &config.added_face, &config.refine_added_face, tokens.added(), &mut shared_added, ) } else { ( &config.removed_face, &config.refine_removed_face, tokens.removed(), &mut shared_removed, ) }; if config.has_line_numbers() { margin.write_margin_changed(is_plus, config, out)? } Self::paint_line(toks.data(), &range, nhl, hl, shared, out)?; } _ => { if config.has_line_numbers() { margin.write_margin_context(config, out)? } output(data, range.0, range.1, &defaultspec, out)? } } } assert!(warnings.peek() == None); drop(shared_removed); drop(shared_added); lines.clear(); added_tokens.clear(); removed_tokens.clear(); warning_lines.clear(); Ok(()) } fn push_added(&mut self, line: &[u8]) { self.push_aux(line, true) } fn push_removed(&mut self, line: &[u8]) { self.push_aux(line, false) } fn push_aux(&mut self, line: &[u8], added: bool) { // XXX: skip leading token let mut ofs = self.lines.len() + 1; add_raw_line(&mut self.lines, line); // get back the line sanitized from escape codes: let line = &self.lines.data()[ofs..]; // skip leading spaces ofs += line .iter() .take_while(|ch| ch.is_ascii_whitespace()) .count(); diffr_lib::tokenize( &self.lines.data(), ofs, if added { &mut self.added_tokens } else { &mut self.removed_tokens }, ); } fn run(&mut self) -> io::Result<()> { let stdin = io::stdin(); let stdout = StandardStream::stdout(ColorChoice::Always); let mut buffer = vec![]; let mut stdin = stdin.lock(); let mut stdout = stdout.lock(); let mut stdout: Box = if self.config.html { write!(stdout, "
")?;
            Box::new(HtmlColorWriter::new(stdout))
        } else {
            Box::new(stdout)
        };
        let mut in_hunk = false;
        let mut hunk_line_number = 0;

        // process hunks
        loop {
            stdin.read_until(b'\n', &mut buffer)?;
            if buffer.is_empty() {
                break;
            }

            let first = first_after_escape(&buffer);
            if in_hunk {
                hunk_line_number += 1;
                match first {
                    Some(b'+') => self.push_added(&buffer),
                    Some(b'-') => self.push_removed(&buffer),
                    Some(b' ') => add_raw_line(&mut self.lines, &buffer),
                    Some(b'\\') => {
                        add_raw_line(&mut self.lines, &buffer);
                        self.warning_lines.push(hunk_line_number - 1);
                    }
                    _ => {
                        self.process_with_stats(&mut stdout)?;
                        in_hunk = false;
                    }
                }
            }
            if !in_hunk {
                hunk_line_number = 0;
                in_hunk = first == Some(b'@');
                if self.config.has_line_numbers() && in_hunk {
                    self.line_number_info = parse_line_number(&buffer);
                }
                output(&buffer, 0, buffer.len(), &ColorSpec::default(), &mut stdout)?;
            }

            buffer.clear();
        }

        // flush remaining hunk
        self.process_with_stats(&mut stdout)?;
        self.stats.stop();
        self.stats.report()?;
        Ok(())
    }
}

// TODO count whitespace characters as well here
fn add_raw_line(dst: &mut LineSplit, line: &[u8]) {
    let mut i = 0;
    let len = line.len();
    while i < len {
        i += skip_all_escape_code(&line[i..]);
        let tok_len = skip_token(&line[i..]);
        dst.append_line(&line[i..i + tok_len]);
        i += tok_len;
    }
}

fn output(
    buf: &[u8],
    from: usize,
    to: usize,
    colorspec: &ColorSpec,
    out: &mut Stream,
) -> io::Result<()>
where
    Stream: WriteColor,
{
    let to = to.min(buf.len());
    if from >= to {
        return Ok(());
    }
    let buf = &buf[from..to];
    let ends_with_newline = buf.last().cloned() == Some(b'\n');
    let buf = if ends_with_newline {
        &buf[..buf.len() - 1]
    } else {
        buf
    };
    out.set_color(colorspec)?;
    out.write_all(&buf)?;
    out.reset()?;
    if ends_with_newline {
        out.write_all(b"\n")?;
    }
    Ok(())
}

/// Returns the number of bytes of escape code that start the slice.
fn skip_all_escape_code(buf: &[u8]) -> usize {
    // Skip one sequence
    fn skip_escape_code(buf: &[u8]) -> Option {
        if 2 <= buf.len() && &buf[..2] == b"\x1b[" {
            // "\x1b[" + sequence body + "m" => 3 additional bytes
            Some(index_of(&buf[2..], b'm')? + 3)
        } else {
            None
        }
    }
    let mut buf = buf;
    let mut sum = 0;
    while let Some(nbytes) = skip_escape_code(&buf) {
        buf = &buf[nbytes..];
        sum += nbytes
    }
    sum
}

/// Returns the first byte of the slice, after skipping the escape
/// code bytes.
fn first_after_escape(buf: &[u8]) -> Option {
    let nbytes = skip_all_escape_code(&buf);
    buf.iter().skip(nbytes).cloned().next()
}

/// Scan the slice looking for the given byte, returning the index of
/// its first appearance.
fn index_of(buf: &[u8], target: u8) -> Option {
    let mut it = buf.iter().enumerate();
    loop {
        match it.next() {
            Some((index, c)) => {
                if *c == target {
                    return Some(index);
                }
            }
            None => return None,
        }
    }
}

/// Computes the number of bytes until either the next escape code, or
/// the end of buf.
fn skip_token(buf: &[u8]) -> usize {
    match buf.len() {
        0 => 0,
        len => {
            for i in 0..buf.len() - 1 {
                if &buf[i..i + 2] == b"\x1b[" {
                    return i;
                }
            }
            len
        }
    }
}

// TODO: extend to the multiple range case
#[derive(Default, PartialEq, Eq)]
struct HunkHeader {
    // range are (ofs,len) for the interval [ofs, ofs + len)
    minus_range: (usize, usize),
    plus_range: (usize, usize),
}

const WIDTH: [u64; 20] = [
    0,
    9,
    99,
    999,
    9999,
    99999,
    999999,
    9999999,
    99999999,
    999999999,
    9999999999,
    99999999999,
    999999999999,
    9999999999999,
    99999999999999,
    999999999999999,
    9999999999999999,
    99999999999999999,
    999999999999999999,
    9999999999999999999,
];

fn width1(x: u64) -> usize {
    let result = WIDTH.binary_search(&x);
    match result {
        Ok(i) | Err(i) => i,
    }
}

impl HunkHeader {
    fn new(minus_range: (usize, usize), plus_range: (usize, usize)) -> Self {
        HunkHeader {
            minus_range,
            plus_range,
        }
    }

    fn width(&self) -> usize {
        2 * width1((self.minus_range.0 + self.minus_range.1) as u64)
            .max(width1((self.plus_range.0 + self.plus_range.1) as u64))
            + 1
    }
}

impl Debug for HunkHeader {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
        f.write_fmt(format_args!(
            "-{},{} +{},{}",
            self.minus_range.0, self.minus_range.1, self.plus_range.0, self.plus_range.1,
        ))
    }
}

impl Display for HunkHeader {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
        Debug::fmt(&self, f)
    }
}

struct LineNumberParser<'a> {
    buf: &'a [u8],
    i: usize,
}

impl<'a> LineNumberParser<'a> {
    fn new(buf: &'a [u8]) -> Self {
        LineNumberParser { buf, i: 0 }
    }

    fn skip_escape_code(&mut self) {
        if self.i < self.buf.len() {
            let to_skip = skip_all_escape_code(&self.buf[self.i..]);
            self.i += to_skip;
        }
    }

    fn looking_at(&mut self, matcher: M) -> bool
    where
        M: Fn(u8) -> bool,
    {
        self.skip_escape_code();
        self.i < self.buf.len() && matcher(self.buf[self.i])
    }

    fn read_digit(&mut self) -> Option {
        if self.looking_at(|x| x.is_ascii_digit()) {
            let cur = self.buf[self.i];
            self.i += 1;
            Some((cur - b'0') as usize)
        } else {
            None
        }
    }

    fn skip_whitespaces(&mut self) {
        while self.looking_at(|x| x.is_ascii_whitespace()) {
            self.i += 1;
        }
    }

    fn expect_multiple(&mut self, matcher: M) -> Option
    where
        M: Fn(u8) -> bool,
    {
        self.skip_escape_code();
        let iorig = self.i;
        while self.looking_at(&matcher) {
            self.i += 1;
        }
        if self.i == iorig {
            None
        } else {
            Some(self.i - iorig)
        }
    }

    fn expect(&mut self, target: u8) -> Option<()> {
        if self.looking_at(|x| x == target) {
            self.i += 1;
            Some(())
        } else {
            None
        }
    }

    fn parse_usize(&mut self) -> Option {
        let mut res = 0usize;
        let mut any = false;
        while let Some(digit) = self.read_digit() {
            any = true;
            res = res.checked_mul(10)?;
            res = res.checked_add(digit)?;
        }
        if any {
            Some(res)
        } else {
            None
        }
    }

    fn parse_pair(&mut self) -> Option<(usize, usize)> {
        let p0 = self.parse_usize()?;
        if self.expect(b',').is_none() {
            return Some((p0, 1));
        }
        let p1 = self.parse_usize()?;
        Some((p0, p1))
    }

    fn expect_multiple_minus_ranges(&mut self) -> Option<(usize, usize)> {
        let next = |that: &mut Self| {
            that.expect(b'-')?;
            that.parse_pair()
        };
        let mut res = None;
        for i in 0.. {
            if i != 0 {
                self.expect_multiple(|x| x.is_ascii_whitespace())?;
            }
            match next(self) {
                next @ Some(_) => res = next,
                None => break,
            }
        }
        res
    }

    fn parse_line_number(&mut self) -> Option {
        self.skip_whitespaces();
        self.expect_multiple(|x| x == b'@')?;
        self.expect_multiple(|x| x.is_ascii_whitespace())?;
        let minus_range = self.expect_multiple_minus_ranges()?;
        self.expect(b'+')?;
        let plus_range = self.parse_pair()?;
        self.expect_multiple(|x| x.is_ascii_whitespace())?;
        self.expect_multiple(|x| x == b'@')?;
        Some(HunkHeader::new(minus_range, plus_range))
    }
}

fn parse_line_number(buf: &[u8]) -> Option {
    LineNumberParser::new(&buf).parse_line_number()
}

#[cfg(test)]
mod tests_app;

#[cfg(test)]
mod tests_cli;
diffr-0.1.5/src/tests_app.rs000064400000000000000000000054001046102023000140570ustar  00000000000000use super::*;

#[test]
fn skip_all_escape_code_test() {
    assert_eq!(5, skip_all_escape_code(b"\x1b[42m@@@"));
    assert_eq!(10, skip_all_escape_code(b"\x1b[42m\x1b[33m@@@"));
    assert_eq!(0, skip_all_escape_code(b"\x1b[42@@@"));
}

#[test]
fn first_after_escape_test() {
    assert_eq!(Some(b'+'), first_after_escape(b"+abc"));
    assert_eq!(Some(b'+'), first_after_escape(b"\x1b[42m\x1b[33m+abc"));
    assert_eq!(None, first_after_escape(b"\x1b[42m"));
}

// TODO test index_of?

#[test]
fn skip_token_test() {
    assert_eq!(4, skip_token(b"abc\x1b"));
    assert_eq!(3, skip_token(b"abc\x1b["));
    assert_eq!(3, skip_token(b"abc"));
    assert_eq!(1, skip_token(b"\x1b"));
    assert_eq!(0, skip_token(b""));
}

#[test]
fn parse_line_number_test() {
    let test_ok = |ofs1, len1, ofs2, len2, input| {
        eprintln!("test_ok {}...", String::from_utf8_lossy(input));
        assert_eq!(
            Some(HunkHeader {
                minus_range: (ofs1, len1),
                plus_range: (ofs2, len2),
            }),
            parse_line_number(input)
        );
    };
    let test_fail = |input| {
        eprintln!("test_fail {}...", String::from_utf8_lossy(input));
        assert_eq!(None, parse_line_number(input));
    };
    test_ok(133, 6, 133, 8, b"@@ -133,6 +133,8 @@");
    test_ok(0, 0, 1, 1, b"@@ -0,0 +1 @@");
    test_ok(0, 0, 1, 1, b"  @@ -0,0 +1 @@");
    test_ok(0, 0, 1, 1, b"@@   -0,0 +1 @@");
    // last one wins
    test_ok(0, 2, 0, 3, b"@@@ -0,0 -0,2 +0,3 @@@");
    test_fail(b"@@-0,0 +1 @@");
    test_fail(b"@@ -0,0+1 @@");
    test_fail(b"@@ -0,0 +1@@");
    test_fail(b"@@ -0,0 +1 ");
    test_fail(b"-0,0 +1");
    test_fail(b"@@ 0,0 +1 @@");
    test_fail(b"@@ -0,0 1 @@");
    test_fail(b"@@@ -0,0 +0,2 +0,3 @@@");

    // overflow
    test_fail(b"@@ -0,0 +19999999999999999999 @@");

    // with escape code
    test_ok(0, 0, 1, 1, b"\x1b[42;43m@\x1b[42;43m@\x1b[42;43m \x1b[42;43m-\x1b[42;43m0\x1b[42;43m,\x1b[42;43m0\x1b[42;43m \x1b[42;43m+1 @@");
}

#[test]
fn test_width() {
    for (i, x) in WIDTH.iter().enumerate() {
        if x < &u64::max_value() {
            assert_eq!(format!("{}", x + 1).len(), i + 1);
        }
    }
    assert_eq!(0, width1(0));
    fn test(x: u64) {
        assert_eq!(format!("{}", x).len(), width1(x));
    }
    for i in 1..=10000 {
        test(i);
    }
    test(9999999999);
    test(10000000000);
    test(14284238234);
    for i in 0..64 {
        test(1 << i);
    }
    test(u64::max_value());

    assert_eq!("123:456".len(), HunkHeader::new((123, 5), (456, 9)).width());
    assert_eq!(
        "1122: 456".len(),
        HunkHeader::new((123, 999), (456, 9)).width()
    );
    assert_eq!("   :456".len(), HunkHeader::new((0, 0), (456, 9)).width());
    assert_eq!(MAX_MARGIN, 2 * width1(u64::max_value()) + 1);
}
diffr-0.1.5/src/tests_cli.rs000064400000000000000000000142271046102023000140550ustar  00000000000000use std::env;
use std::path::PathBuf;
use std::process::{Command, Stdio};
use StringTest::*;

enum StringTest {
    Empty,
    AtLeast(&'static str),
    Exactly(&'static str),
}

fn quote_or_empty(msg: &str) -> String {
    if msg.is_empty() {
        "".to_owned()
    } else {
        format!("\"{}\"", msg)
    }
}

impl StringTest {
    fn test(&self, actual: &str, prefix: &str) {
        match self {
            Empty => assert!(
                actual.is_empty(),
                format!(
                    "{}: expected empty, got\n\n{}",
                    quote_or_empty(prefix),
                    quote_or_empty(actual)
                )
            ),
            AtLeast(exp) => assert!(
                actual.contains(exp),
                format!(
                    "{}: expected at least\n\n{}\n\ngot\n\n{}",
                    prefix,
                    quote_or_empty(exp),
                    quote_or_empty(actual)
                )
            ),
            Exactly(exp) => assert!(
                actual.trim() == exp.trim(),
                format!(
                    "{}: expected\n\n{}\n\ngot\n\n{}",
                    prefix,
                    quote_or_empty(exp),
                    quote_or_empty(actual)
                )
            ),
        }
    }
}

struct ProcessTest {
    args: &'static [&'static str],
    out: StringTest,
    err: StringTest,
    is_success: bool,
}

fn diffr_path_default() -> PathBuf {
    let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
    dir.push("target");
    dir.push("debug");
    dir.push(if cfg!(windows) { "diffr.exe" } else { "diffr" });
    dir
}

fn diffr_path() -> PathBuf {
    match env::var("DIFFR_TESTS_BINARY_PATH") {
        Err(_) => diffr_path_default(),
        Ok(path) => PathBuf::from(path),
    }
}

fn test_cli(descr: ProcessTest) {
    let mut cmd = Command::new(diffr_path());
    cmd.stdout(Stdio::piped());
    cmd.stderr(Stdio::piped());
    cmd.stdin(Stdio::piped());
    for arg in descr.args {
        cmd.arg(&*arg);
    }
    let child = cmd.spawn().expect("spawn");
    let output = child.wait_with_output().expect("wait_with_output");
    fn string_of_status(code: bool) -> &'static str {
        if code {
            "success"
        } else {
            "failure"
        }
    };
    assert!(
        descr.is_success == output.status.success(),
        format!(
            "unexpected status: expected {} got {}",
            string_of_status(descr.is_success),
            string_of_status(output.status.success()),
        )
    );
    descr
        .out
        .test(&String::from_utf8_lossy(&output.stdout), "stdout");
    descr
        .err
        .test(&String::from_utf8_lossy(&output.stderr), "stderr");
}

#[test]
fn debug_flag() {
    test_cli(ProcessTest {
        args: &["--debug"],
        out: Empty,
        err: AtLeast("hunk processing time (ms)"),
        is_success: true,
    })
}

#[test]
fn color_invalid_face_name() {
    test_cli(ProcessTest {
        args: &["--colors", "notafacename"],
        out: Empty,
        err: Exactly("unexpected face name: got 'notafacename', expected added|refine-added|removed|refine-removed"),
        is_success: false,
    })
}

#[test]
fn color_only_face_name() {
    test_cli(ProcessTest {
        args: &["--colors", "added"],
        out: Empty,
        err: Exactly(""),
        is_success: true,
    })
}

#[test]
fn color_invalid_attribute_name() {
    test_cli(ProcessTest {
        args: &["--colors", "added:bar"],
        out: Empty,
        err: Exactly("unexpected attribute name: got 'bar', expected foreground|background|italic|noitalic|bold|nobold|intense|nointense|underline|nounderline|none"),
        is_success: false,
    })
}

#[test]
fn color_invalid_color_value_name() {
    test_cli(ProcessTest {
        args: &["--colors", "added:foreground:baz"],
        out: Empty,
        err: Exactly("unexpected color value: unrecognized color name 'baz'. Choose from: black, blue, green, red, cyan, magenta, yellow, white"),
        is_success: false,
    })
}

#[test]
fn color_invalid_color_value_ansi() {
    test_cli(ProcessTest {
        args: &["--colors", "added:foreground:777"],
        out: Empty,
        err: AtLeast("unexpected color value: unrecognized ansi256 color number"),
        is_success: false,
    })
}

#[test]
fn color_invalid_color_value_rgb() {
    test_cli(ProcessTest {
        args: &["--colors", "added:foreground:0,0,777"],
        out: Empty,
        err: AtLeast("unexpected color value: unrecognized RGB color triple"),
        is_success: false,
    })
}

#[test]
fn color_invalid_color_not_done() {
    test_cli(ProcessTest {
        args: &["--colors", "added:foreground"],
        out: Empty,
        err: Exactly("error parsing color: missing color value for face 'added'"),
        is_success: false,
    })
}

#[test]
fn color_ok() {
    test_cli(ProcessTest {
        args: &["--colors", "added:foreground:0"],
        out: Empty,
        err: Exactly(""),
        is_success: true,
    })
}

#[test]
fn color_ok_multiple() {
    test_cli(ProcessTest {
        args: &[
            "--colors",
            "added:foreground:0",
            "--colors",
            "removed:background:red",
        ],
        out: Empty,
        err: Exactly(""),
        is_success: true,
    })
}

#[test]
fn line_numbers_style() {
    // TODO  check config?

    // ok
    test_cli(ProcessTest {
        args: &["--line-numbers"],
        out: Empty,
        err: Empty,
        is_success: true,
    });
    test_cli(ProcessTest {
        args: &["--line-numbers", "compact"],
        out: Empty,
        err: Empty,
        is_success: true,
    });
    test_cli(ProcessTest {
        args: &["--line-numbers", "aligned"],
        out: Empty,
        err: Empty,
        is_success: true,
    });

    // fail
    test_cli(ProcessTest {
        args: &["--line-numbers", "foo"],
        out: Empty,
        err: Exactly("unexpected line number style: got 'foo', expected aligned|compact"),
        is_success: false,
    });
}

#[test]
fn test_bad_argument() {
    test_cli(ProcessTest {
        args: &["--invalid-option"],
        out: Empty,
        err: AtLeast("bad argument: '--invalid-option'"),
        is_success: false,
    });
}