grep-printer-0.1.6/.cargo_vcs_info.json0000644000000001120000000000100134500ustar { "git": { "sha1": "57ce623a57b304a72381872e5671f52671d593ff" } } grep-printer-0.1.6/Cargo.toml0000644000000030730000000000100114570ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "grep-printer" version = "0.1.6" authors = ["Andrew Gallant "] description = "An implementation of the grep crate's Sink trait that provides standard\nprinting of search results, similar to grep itself.\n" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer" documentation = "https://docs.rs/grep-printer" readme = "README.md" keywords = ["grep", "pattern", "print", "printer", "sink"] license = "Unlicense/MIT" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer" [dependencies.base64] version = "0.13.0" optional = true [dependencies.bstr] version = "0.2.0" [dependencies.grep-matcher] version = "0.1.5" [dependencies.grep-searcher] version = "0.1.8" [dependencies.serde] version = "1.0.77" features = ["derive"] optional = true [dependencies.serde_json] version = "1.0.27" optional = true [dependencies.termcolor] version = "1.0.4" [dev-dependencies.grep-regex] version = "0.1.9" [features] default = ["serde1"] serde1 = ["base64", "serde", "serde_json"] grep-printer-0.1.6/Cargo.toml.orig000064400000000000000000000020640072674642500151670ustar 00000000000000[package] name = "grep-printer" version = "0.1.6" #:version authors = ["Andrew Gallant "] description = """ An implementation of the grep crate's Sink trait that provides standard printing of search results, similar to grep itself. """ documentation = "https://docs.rs/grep-printer" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer" readme = "README.md" keywords = ["grep", "pattern", "print", "printer", "sink"] license = "Unlicense/MIT" edition = "2018" [features] default = ["serde1"] serde1 = ["base64", "serde", "serde_json"] [dependencies] base64 = { version = "0.13.0", optional = true } bstr = "0.2.0" grep-matcher = { version = "0.1.5", path = "../matcher" } grep-searcher = { version = "0.1.8", path = "../searcher" } termcolor = "1.0.4" serde = { version = "1.0.77", optional = true, features = ["derive"] } serde_json = { version = "1.0.27", optional = true } [dev-dependencies] grep-regex = { version = "0.1.9", path = "../regex" } grep-printer-0.1.6/LICENSE-MIT000064400000000000000000000020710072674642500137320ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Andrew Gallant Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. grep-printer-0.1.6/README.md000064400000000000000000000013460072674642500135610ustar 00000000000000grep-printer ------------ Print results from line oriented searching in a human readable, aggregate or JSON Lines format. [![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) [![](https://img.shields.io/crates/v/grep-printer.svg)](https://crates.io/crates/grep-printer) Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation [https://docs.rs/grep-printer](https://docs.rs/grep-printer) **NOTE:** You probably don't want to use this crate directly. Instead, you should prefer the facade defined in the [`grep`](https://docs.rs/grep) crate. ### Usage Add this to your `Cargo.toml`: ```toml [dependencies] grep-printer = "0.1" ``` grep-printer-0.1.6/UNLICENSE000064400000000000000000000022730072674642500135520ustar 00000000000000This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to grep-printer-0.1.6/src/color.rs000064400000000000000000000311710072674642500145540ustar 00000000000000use std::error; use std::fmt; use std::str::FromStr; use termcolor::{Color, ColorSpec, ParseColorError}; /// Returns a default set of color specifications. /// /// This may change over time, but the color choices are meant to be fairly /// conservative that work across terminal themes. /// /// Additional color specifications can be added to the list returned. More /// recently added specifications override previously added specifications. pub fn default_color_specs() -> Vec { vec![ #[cfg(unix)] "path:fg:magenta".parse().unwrap(), #[cfg(windows)] "path:fg:cyan".parse().unwrap(), "line:fg:green".parse().unwrap(), "match:fg:red".parse().unwrap(), "match:style:bold".parse().unwrap(), ] } /// An error that can occur when parsing color specifications. #[derive(Clone, Debug, Eq, PartialEq)] pub enum ColorError { /// This occurs when an unrecognized output type is used. UnrecognizedOutType(String), /// This occurs when an unrecognized spec type is used. UnrecognizedSpecType(String), /// This occurs when an unrecognized color name is used. UnrecognizedColor(String, String), /// This occurs when an unrecognized style attribute is used. UnrecognizedStyle(String), /// This occurs when the format of a color specification is invalid. InvalidFormat(String), } impl error::Error for ColorError { fn description(&self) -> &str { match *self { ColorError::UnrecognizedOutType(_) => "unrecognized output type", ColorError::UnrecognizedSpecType(_) => "unrecognized spec type", ColorError::UnrecognizedColor(_, _) => "unrecognized color name", ColorError::UnrecognizedStyle(_) => "unrecognized style attribute", ColorError::InvalidFormat(_) => "invalid color spec", } } } impl ColorError { fn from_parse_error(err: ParseColorError) -> ColorError { ColorError::UnrecognizedColor( err.invalid().to_string(), err.to_string(), ) } } impl fmt::Display for ColorError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { ColorError::UnrecognizedOutType(ref name) => write!( f, "unrecognized output type '{}'. Choose from: \ path, line, column, match.", name, ), ColorError::UnrecognizedSpecType(ref name) => write!( f, "unrecognized spec type '{}'. Choose from: \ fg, bg, style, none.", name, ), ColorError::UnrecognizedColor(_, ref msg) => write!(f, "{}", msg), ColorError::UnrecognizedStyle(ref name) => write!( f, "unrecognized style attribute '{}'. Choose from: \ nobold, bold, nointense, intense, nounderline, \ underline.", name, ), ColorError::InvalidFormat(ref original) => write!( f, "invalid color spec format: '{}'. Valid format \ is '(path|line|column|match):(fg|bg|style):(value)'.", original, ), } } } /// A merged set of color specifications. /// /// This set of color specifications represents the various color types that /// are supported by the printers in this crate. A set of color specifications /// can be created from a sequence of /// [`UserColorSpec`s](struct.UserColorSpec.html). #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct ColorSpecs { path: ColorSpec, line: ColorSpec, column: ColorSpec, matched: ColorSpec, } /// A single color specification provided by the user. /// /// ## Format /// /// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each /// component is defined as follows: /// /// * `{type}` can be one of `path`, `line`, `column` or `match`. /// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also /// be the special value `none`, in which case, `{value}` can be omitted. /// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction. /// /// `{type}` controls which part of the output should be styled. /// /// When `{attribute}` is `none`, then this should cause any existing style /// settings to be cleared for the specified `type`. /// /// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it /// should be a style instruction when `{attribute}` is `style`. When /// `{attribute}` is `none`, `{value}` must be omitted. /// /// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`, /// `yellow`, `white`. Extended colors can also be specified, and are formatted /// as `x` (for 256-bit colors) or `x,x,x` (for 24-bit true color), where /// `x` is a number between 0 and 255 inclusive. `x` may be given as a normal /// decimal number of a hexadecimal number, where the latter is prefixed by /// `0x`. /// /// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`, /// `underline`, `nounderline`. /// /// ## Example /// /// The standard way to build a `UserColorSpec` is to parse it from a string. /// Once multiple `UserColorSpec`s have been constructed, they can be provided /// to the standard printer where they will automatically be applied to the /// output. /// /// A `UserColorSpec` can also be converted to a `termcolor::ColorSpec`: /// /// ```rust /// # fn main() { /// use termcolor::{Color, ColorSpec}; /// use grep_printer::UserColorSpec; /// /// let user_spec1: UserColorSpec = "path:fg:blue".parse().unwrap(); /// let user_spec2: UserColorSpec = "match:bg:0xff,0x7f,0x00".parse().unwrap(); /// /// let spec1 = user_spec1.to_color_spec(); /// let spec2 = user_spec2.to_color_spec(); /// /// assert_eq!(spec1.fg(), Some(&Color::Blue)); /// assert_eq!(spec2.bg(), Some(&Color::Rgb(0xFF, 0x7F, 0x00))); /// # } /// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct UserColorSpec { ty: OutType, value: SpecValue, } impl UserColorSpec { /// Convert this user provided color specification to a specification that /// can be used with `termcolor`. This drops the type of this specification /// (where the type indicates where the color is applied in the standard /// printer, e.g., to the file path or the line numbers, etc.). pub fn to_color_spec(&self) -> ColorSpec { let mut spec = ColorSpec::default(); self.value.merge_into(&mut spec); spec } } /// The actual value given by the specification. #[derive(Clone, Debug, Eq, PartialEq)] enum SpecValue { None, Fg(Color), Bg(Color), Style(Style), } /// The set of configurable portions of ripgrep's output. #[derive(Clone, Debug, Eq, PartialEq)] enum OutType { Path, Line, Column, Match, } /// The specification type. #[derive(Clone, Debug, Eq, PartialEq)] enum SpecType { Fg, Bg, Style, None, } /// The set of available styles for use in the terminal. #[derive(Clone, Debug, Eq, PartialEq)] enum Style { Bold, NoBold, Intense, NoIntense, Underline, NoUnderline, } impl ColorSpecs { /// Create color specifications from a list of user supplied /// specifications. pub fn new(specs: &[UserColorSpec]) -> ColorSpecs { let mut merged = ColorSpecs::default(); for spec in specs { match spec.ty { OutType::Path => spec.merge_into(&mut merged.path), OutType::Line => spec.merge_into(&mut merged.line), OutType::Column => spec.merge_into(&mut merged.column), OutType::Match => spec.merge_into(&mut merged.matched), } } merged } /// Create a default set of specifications that have color. /// /// This is distinct from `ColorSpecs`'s `Default` implementation in that /// this provides a set of default color choices, where as the `Default` /// implementation provides no color choices. pub fn default_with_color() -> ColorSpecs { ColorSpecs::new(&default_color_specs()) } /// Return the color specification for coloring file paths. pub fn path(&self) -> &ColorSpec { &self.path } /// Return the color specification for coloring line numbers. pub fn line(&self) -> &ColorSpec { &self.line } /// Return the color specification for coloring column numbers. pub fn column(&self) -> &ColorSpec { &self.column } /// Return the color specification for coloring matched text. pub fn matched(&self) -> &ColorSpec { &self.matched } } impl UserColorSpec { /// Merge this spec into the given color specification. fn merge_into(&self, cspec: &mut ColorSpec) { self.value.merge_into(cspec); } } impl SpecValue { /// Merge this spec value into the given color specification. fn merge_into(&self, cspec: &mut ColorSpec) { match *self { SpecValue::None => cspec.clear(), SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); } SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); } SpecValue::Style(ref style) => match *style { Style::Bold => { cspec.set_bold(true); } Style::NoBold => { cspec.set_bold(false); } Style::Intense => { cspec.set_intense(true); } Style::NoIntense => { cspec.set_intense(false); } Style::Underline => { cspec.set_underline(true); } Style::NoUnderline => { cspec.set_underline(false); } }, } } } impl FromStr for UserColorSpec { type Err = ColorError; fn from_str(s: &str) -> Result { let pieces: Vec<&str> = s.split(':').collect(); if pieces.len() <= 1 || pieces.len() > 3 { return Err(ColorError::InvalidFormat(s.to_string())); } let otype: OutType = pieces[0].parse()?; match pieces[1].parse()? { SpecType::None => { Ok(UserColorSpec { ty: otype, value: SpecValue::None }) } SpecType::Style => { if pieces.len() < 3 { return Err(ColorError::InvalidFormat(s.to_string())); } let style: Style = pieces[2].parse()?; Ok(UserColorSpec { ty: otype, value: SpecValue::Style(style) }) } SpecType::Fg => { if pieces.len() < 3 { return Err(ColorError::InvalidFormat(s.to_string())); } let color: Color = pieces[2].parse().map_err(ColorError::from_parse_error)?; Ok(UserColorSpec { ty: otype, value: SpecValue::Fg(color) }) } SpecType::Bg => { if pieces.len() < 3 { return Err(ColorError::InvalidFormat(s.to_string())); } let color: Color = pieces[2].parse().map_err(ColorError::from_parse_error)?; Ok(UserColorSpec { ty: otype, value: SpecValue::Bg(color) }) } } } } impl FromStr for OutType { type Err = ColorError; fn from_str(s: &str) -> Result { match &*s.to_lowercase() { "path" => Ok(OutType::Path), "line" => Ok(OutType::Line), "column" => Ok(OutType::Column), "match" => Ok(OutType::Match), _ => Err(ColorError::UnrecognizedOutType(s.to_string())), } } } impl FromStr for SpecType { type Err = ColorError; fn from_str(s: &str) -> Result { match &*s.to_lowercase() { "fg" => Ok(SpecType::Fg), "bg" => Ok(SpecType::Bg), "style" => Ok(SpecType::Style), "none" => Ok(SpecType::None), _ => Err(ColorError::UnrecognizedSpecType(s.to_string())), } } } impl FromStr for Style { type Err = ColorError; fn from_str(s: &str) -> Result { match &*s.to_lowercase() { "bold" => Ok(Style::Bold), "nobold" => Ok(Style::NoBold), "intense" => Ok(Style::Intense), "nointense" => Ok(Style::NoIntense), "underline" => Ok(Style::Underline), "nounderline" => Ok(Style::NoUnderline), _ => Err(ColorError::UnrecognizedStyle(s.to_string())), } } } grep-printer-0.1.6/src/counter.rs000064400000000000000000000041120072674642500151100ustar 00000000000000use std::io::{self, Write}; use termcolor::{ColorSpec, WriteColor}; /// A writer that counts the number of bytes that have been successfully /// written. #[derive(Clone, Debug)] pub struct CounterWriter { wtr: W, count: u64, total_count: u64, } impl CounterWriter { pub fn new(wtr: W) -> CounterWriter { CounterWriter { wtr: wtr, count: 0, total_count: 0 } } } impl CounterWriter { /// Returns the total number of bytes written since construction or the /// last time `reset` was called. pub fn count(&self) -> u64 { self.count } /// Returns the total number of bytes written since construction. pub fn total_count(&self) -> u64 { self.total_count + self.count } /// Resets the number of bytes written to `0`. pub fn reset_count(&mut self) { self.total_count += self.count; self.count = 0; } /// Clear resets all counting related state for this writer. /// /// After this call, the total count of bytes written to the underlying /// writer is erased and reset. #[allow(dead_code)] pub fn clear(&mut self) { self.count = 0; self.total_count = 0; } #[allow(dead_code)] pub fn get_ref(&self) -> &W { &self.wtr } pub fn get_mut(&mut self) -> &mut W { &mut self.wtr } pub fn into_inner(self) -> W { self.wtr } } impl Write for CounterWriter { fn write(&mut self, buf: &[u8]) -> Result { let n = self.wtr.write(buf)?; self.count += n as u64; Ok(n) } fn flush(&mut self) -> Result<(), io::Error> { self.wtr.flush() } } impl WriteColor for CounterWriter { fn supports_color(&self) -> bool { self.wtr.supports_color() } fn set_color(&mut self, spec: &ColorSpec) -> io::Result<()> { self.wtr.set_color(spec) } fn reset(&mut self) -> io::Result<()> { self.wtr.reset() } fn is_synchronous(&self) -> bool { self.wtr.is_synchronous() } } grep-printer-0.1.6/src/json.rs000064400000000000000000001071770072674642500144210ustar 00000000000000use std::io::{self, Write}; use std::path::Path; use std::time::Instant; use grep_matcher::{Match, Matcher}; use grep_searcher::{ Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch, }; use serde_json as json; use crate::counter::CounterWriter; use crate::jsont; use crate::stats::Stats; use crate::util::find_iter_at_in_context; /// The configuration for the JSON printer. /// /// This is manipulated by the JSONBuilder and then referenced by the actual /// implementation. Once a printer is build, the configuration is frozen and /// cannot changed. #[derive(Debug, Clone)] struct Config { pretty: bool, max_matches: Option, always_begin_end: bool, } impl Default for Config { fn default() -> Config { Config { pretty: false, max_matches: None, always_begin_end: false } } } /// A builder for a JSON lines printer. /// /// The builder permits configuring how the printer behaves. The JSON printer /// has fewer configuration options than the standard printer because it is /// a structured format, and the printer always attempts to find the most /// information possible. /// /// Some configuration options, such as whether line numbers are included or /// whether contextual lines are shown, are drawn directly from the /// `grep_searcher::Searcher`'s configuration. /// /// Once a `JSON` printer is built, its configuration cannot be changed. #[derive(Clone, Debug)] pub struct JSONBuilder { config: Config, } impl JSONBuilder { /// Return a new builder for configuring the JSON printer. pub fn new() -> JSONBuilder { JSONBuilder { config: Config::default() } } /// Create a JSON printer that writes results to the given writer. pub fn build(&self, wtr: W) -> JSON { JSON { config: self.config.clone(), wtr: CounterWriter::new(wtr), matches: vec![], } } /// Print JSON in a pretty printed format. /// /// Enabling this will no longer produce a "JSON lines" format, in that /// each JSON object printed may span multiple lines. /// /// This is disabled by default. pub fn pretty(&mut self, yes: bool) -> &mut JSONBuilder { self.config.pretty = yes; self } /// Set the maximum amount of matches that are printed. /// /// If multi line search is enabled and a match spans multiple lines, then /// that match is counted exactly once for the purposes of enforcing this /// limit, regardless of how many lines it spans. pub fn max_matches(&mut self, limit: Option) -> &mut JSONBuilder { self.config.max_matches = limit; self } /// When enabled, the `begin` and `end` messages are always emitted, even /// when no match is found. /// /// When disabled, the `begin` and `end` messages are only shown if there /// is at least one `match` or `context` message. /// /// This is disabled by default. pub fn always_begin_end(&mut self, yes: bool) -> &mut JSONBuilder { self.config.always_begin_end = yes; self } } /// The JSON printer, which emits results in a JSON lines format. /// /// This type is generic over `W`, which represents any implementation of /// the standard library `io::Write` trait. /// /// # Format /// /// This section describes the JSON format used by this printer. /// /// To skip the rigamarole, take a look at the /// [example](#example) /// at the end. /// /// ## Overview /// /// The format of this printer is the [JSON Lines](https://jsonlines.org/) /// format. Specifically, this printer emits a sequence of messages, where /// each message is encoded as a single JSON value on a single line. There are /// four different types of messages (and this number may expand over time): /// /// * **begin** - A message that indicates a file is being searched. /// * **end** - A message the indicates a file is done being searched. This /// message also include summary statistics about the search. /// * **match** - A message that indicates a match was found. This includes /// the text and offsets of the match. /// * **context** - A message that indicates a contextual line was found. /// This includes the text of the line, along with any match information if /// the search was inverted. /// /// Every message is encoded in the same envelope format, which includes a tag /// indicating the message type along with an object for the payload: /// /// ```json /// { /// "type": "{begin|end|match|context}", /// "data": { ... } /// } /// ``` /// /// The message itself is encoded in the envelope's `data` key. /// /// ## Text encoding /// /// Before describing each message format, we first must briefly discuss text /// encoding, since it factors into every type of message. In particular, JSON /// may only be encoded in UTF-8, UTF-16 or UTF-32. For the purposes of this /// printer, we need only worry about UTF-8. The problem here is that searching /// is not limited to UTF-8 exclusively, which in turn implies that matches /// may be reported that contain invalid UTF-8. Moreover, this printer may /// also print file paths, and the encoding of file paths is itself not /// guarnateed to be valid UTF-8. Therefore, this printer must deal with the /// presence of invalid UTF-8 somehow. The printer could silently ignore such /// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8 /// by replacing all invalid sequences with the Unicode replacement character. /// However, this would prevent consumers of this format from accessing the /// original data in a non-lossy way. /// /// Therefore, this printer will emit valid UTF-8 encoded bytes as normal /// JSON strings and otherwise base64 encode data that isn't valid UTF-8. To /// communicate whether this process occurs or not, strings are keyed by the /// name `text` where as arbitrary bytes are keyed by `bytes`. /// /// For example, when a path is included in a message, it is formatted like so, /// if and only if the path is valid UTF-8: /// /// ```json /// { /// "path": { /// "text": "/home/ubuntu/lib.rs" /// } /// } /// ``` /// /// If instead our path was `/home/ubuntu/lib\xFF.rs`, where the `\xFF` byte /// makes it invalid UTF-8, the path would instead be encoded like so: /// /// ```json /// { /// "path": { /// "bytes": "L2hvbWUvdWJ1bnR1L2xpYv8ucnM=" /// } /// } /// ``` /// /// This same representation is used for reporting matches as well. /// /// The printer guarantees that the `text` field is used whenever the /// underlying bytes are valid UTF-8. /// /// ## Wire format /// /// This section documents the wire format emitted by this printer, starting /// with the four types of messages. /// /// Each message has its own format, and is contained inside an envelope that /// indicates the type of message. The envelope has these fields: /// /// * **type** - A string indicating the type of this message. It may be one /// of four possible strings: `begin`, `end`, `match` or `context`. This /// list may expand over time. /// * **data** - The actual message data. The format of this field depends on /// the value of `type`. The possible message formats are /// [`begin`](#message-begin), /// [`end`](#message-end), /// [`match`](#message-match), /// [`context`](#message-context). /// /// #### Message: **begin** /// /// This message indicates that a search has begun. It has these fields: /// /// * **path** - An /// [arbitrary data object](#object-arbitrary-data) /// representing the file path corresponding to the search, if one is /// present. If no file path is available, then this field is `null`. /// /// #### Message: **end** /// /// This message indicates that a search has finished. It has these fields: /// /// * **path** - An /// [arbitrary data object](#object-arbitrary-data) /// representing the file path corresponding to the search, if one is /// present. If no file path is available, then this field is `null`. /// * **binary_offset** - The absolute offset in the data searched /// corresponding to the place at which binary data was detected. If no /// binary data was detected (or if binary detection was disabled), then this /// field is `null`. /// * **stats** - A [`stats` object](#object-stats) that contains summary /// statistics for the previous search. /// /// #### Message: **match** /// /// This message indicates that a match has been found. A match generally /// corresponds to a single line of text, although it may correspond to /// multiple lines if the search can emit matches over multiple lines. It /// has these fields: /// /// * **path** - An /// [arbitrary data object](#object-arbitrary-data) /// representing the file path corresponding to the search, if one is /// present. If no file path is available, then this field is `null`. /// * **lines** - An /// [arbitrary data object](#object-arbitrary-data) /// representing one or more lines contained in this match. /// * **line_number** - If the searcher has been configured to report line /// numbers, then this corresponds to the line number of the first line /// in `lines`. If no line numbers are available, then this is `null`. /// * **absolute_offset** - The absolute byte offset corresponding to the start /// of `lines` in the data being searched. /// * **submatches** - An array of [`submatch` objects](#object-submatch) /// corresponding to matches in `lines`. The offsets included in each /// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64 /// encoded, then the byte offsets correspond to the data after base64 /// decoding.) The `submatch` objects are guaranteed to be sorted by their /// starting offsets. Note that it is possible for this array to be empty, /// for example, when searching reports inverted matches. /// /// #### Message: **context** /// /// This message indicates that a contextual line has been found. A contextual /// line is a line that doesn't contain a match, but is generally adjacent to /// a line that does contain a match. The precise way in which contextual lines /// are reported is determined by the searcher. It has these fields, which are /// exactly the same fields found in a [`match`](#message-match): /// /// * **path** - An /// [arbitrary data object](#object-arbitrary-data) /// representing the file path corresponding to the search, if one is /// present. If no file path is available, then this field is `null`. /// * **lines** - An /// [arbitrary data object](#object-arbitrary-data) /// representing one or more lines contained in this context. This includes /// line terminators, if they're present. /// * **line_number** - If the searcher has been configured to report line /// numbers, then this corresponds to the line number of the first line /// in `lines`. If no line numbers are available, then this is `null`. /// * **absolute_offset** - The absolute byte offset corresponding to the start /// of `lines` in the data being searched. /// * **submatches** - An array of [`submatch` objects](#object-submatch) /// corresponding to matches in `lines`. The offsets included in each /// `submatch` correspond to byte offsets into `lines`. (If `lines` is base64 /// encoded, then the byte offsets correspond to the data after base64 /// decoding.) The `submatch` objects are guaranteed to be sorted by /// their starting offsets. Note that it is possible for this array to be /// non-empty, for example, when searching reports inverted matches such that /// the original matcher could match things in the contextual lines. /// /// #### Object: **submatch** /// /// This object describes submatches found within `match` or `context` /// messages. The `start` and `end` fields indicate the half-open interval on /// which the match occurs (`start` is included, but `end` is not). It is /// guaranteed that `start <= end`. It has these fields: /// /// * **match** - An /// [arbitrary data object](#object-arbitrary-data) /// corresponding to the text in this submatch. /// * **start** - A byte offset indicating the start of this match. This offset /// is generally reported in terms of the parent object's data. For example, /// the `lines` field in the /// [`match`](#message-match) or [`context`](#message-context) /// messages. /// * **end** - A byte offset indicating the end of this match. This offset /// is generally reported in terms of the parent object's data. For example, /// the `lines` field in the /// [`match`](#message-match) or [`context`](#message-context) /// messages. /// /// #### Object: **stats** /// /// This object is included in messages and contains summary statistics about /// a search. It has these fields: /// /// * **elapsed** - A [`duration` object](#object-duration) describing the /// length of time that elapsed while performing the search. /// * **searches** - The number of searches that have run. For this printer, /// this value is always `1`. (Implementations may emit additional message /// types that use this same `stats` object that represents summary /// statistics over multiple searches.) /// * **searches_with_match** - The number of searches that have run that have /// found at least one match. This is never more than `searches`. /// * **bytes_searched** - The total number of bytes that have been searched. /// * **bytes_printed** - The total number of bytes that have been printed. /// This includes everything emitted by this printer. /// * **matched_lines** - The total number of lines that participated in a /// match. When matches may contain multiple lines, then this includes every /// line that is part of every match. /// * **matches** - The total number of matches. There may be multiple matches /// per line. When matches may contain multiple lines, each match is counted /// only once, regardless of how many lines it spans. /// /// #### Object: **duration** /// /// This object includes a few fields for describing a duration. Two of its /// fields, `secs` and `nanos`, can be combined to give nanosecond precision /// on systems that support it. It has these fields: /// /// * **secs** - A whole number of seconds indicating the length of this /// duration. /// * **nanos** - A fractional part of this duration represent by nanoseconds. /// If nanosecond precision isn't supported, then this is typically rounded /// up to the nearest number of nanoseconds. /// * **human** - A human readable string describing the length of the /// duration. The format of the string is itself unspecified. /// /// #### Object: **arbitrary data** /// /// This object is used whenever arbitrary data needs to be represented as a /// JSON value. This object contains two fields, where generally only one of /// the fields is present: /// /// * **text** - A normal JSON string that is UTF-8 encoded. This field is /// populated if and only if the underlying data is valid UTF-8. /// * **bytes** - A normal JSON string that is a base64 encoding of the /// underlying bytes. /// /// More information on the motivation for this representation can be seen in /// the section [text encoding](#text-encoding) above. /// /// ## Example /// /// This section shows a small example that includes all message types. /// /// Here's the file we want to search, located at `/home/andrew/sherlock`: /// /// ```text /// For the Doctor Watsons of this world, as opposed to the Sherlock /// Holmeses, success in the province of detective work must always /// be, to a very large extent, the result of luck. Sherlock Holmes /// can extract a clew from a wisp of straw or a flake of cigar ash; /// but Doctor Watson has to have it taken out for him and dusted, /// and exhibited clearly, with a label attached. /// ``` /// /// Searching for `Watson` with a `before_context` of `1` with line numbers /// enabled shows something like this using the standard printer: /// /// ```text /// sherlock:1:For the Doctor Watsons of this world, as opposed to the Sherlock /// -- /// sherlock-4-can extract a clew from a wisp of straw or a flake of cigar ash; /// sherlock:5:but Doctor Watson has to have it taken out for him and dusted, /// ``` /// /// Here's what the same search looks like using the JSON wire format described /// above, where in we show semi-prettified JSON (instead of a strict JSON /// Lines format), for illustrative purposes: /// /// ```json /// { /// "type": "begin", /// "data": { /// "path": {"text": "/home/andrew/sherlock"}} /// } /// } /// { /// "type": "match", /// "data": { /// "path": {"text": "/home/andrew/sherlock"}, /// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"}, /// "line_number": 1, /// "absolute_offset": 0, /// "submatches": [ /// {"match": {"text": "Watson"}, "start": 15, "end": 21} /// ] /// } /// } /// { /// "type": "context", /// "data": { /// "path": {"text": "/home/andrew/sherlock"}, /// "lines": {"text": "can extract a clew from a wisp of straw or a flake of cigar ash;\n"}, /// "line_number": 4, /// "absolute_offset": 193, /// "submatches": [] /// } /// } /// { /// "type": "match", /// "data": { /// "path": {"text": "/home/andrew/sherlock"}, /// "lines": {"text": "but Doctor Watson has to have it taken out for him and dusted,\n"}, /// "line_number": 5, /// "absolute_offset": 258, /// "submatches": [ /// {"match": {"text": "Watson"}, "start": 11, "end": 17} /// ] /// } /// } /// { /// "type": "end", /// "data": { /// "path": {"text": "/home/andrew/sherlock"}, /// "binary_offset": null, /// "stats": { /// "elapsed": {"secs": 0, "nanos": 36296, "human": "0.0000s"}, /// "searches": 1, /// "searches_with_match": 1, /// "bytes_searched": 367, /// "bytes_printed": 1151, /// "matched_lines": 2, /// "matches": 2 /// } /// } /// } /// ``` #[derive(Debug)] pub struct JSON { config: Config, wtr: CounterWriter, matches: Vec, } impl JSON { /// Return a JSON lines printer with a default configuration that writes /// matches to the given writer. pub fn new(wtr: W) -> JSON { JSONBuilder::new().build(wtr) } /// Return an implementation of `Sink` for the JSON printer. /// /// This does not associate the printer with a file path, which means this /// implementation will never print a file path along with the matches. pub fn sink<'s, M: Matcher>( &'s mut self, matcher: M, ) -> JSONSink<'static, 's, M, W> { JSONSink { matcher: matcher, json: self, path: None, start_time: Instant::now(), match_count: 0, after_context_remaining: 0, binary_byte_offset: None, begin_printed: false, stats: Stats::new(), } } /// Return an implementation of `Sink` associated with a file path. /// /// When the printer is associated with a path, then it may, depending on /// its configuration, print the path along with the matches found. pub fn sink_with_path<'p, 's, M, P>( &'s mut self, matcher: M, path: &'p P, ) -> JSONSink<'p, 's, M, W> where M: Matcher, P: ?Sized + AsRef, { JSONSink { matcher: matcher, json: self, path: Some(path.as_ref()), start_time: Instant::now(), match_count: 0, after_context_remaining: 0, binary_byte_offset: None, begin_printed: false, stats: Stats::new(), } } /// Write the given message followed by a new line. The new line is /// determined from the configuration of the given searcher. fn write_message( &mut self, message: &jsont::Message<'_>, ) -> io::Result<()> { if self.config.pretty { json::to_writer_pretty(&mut self.wtr, message)?; } else { json::to_writer(&mut self.wtr, message)?; } self.wtr.write(&[b'\n'])?; Ok(()) } } impl JSON { /// Returns true if and only if this printer has written at least one byte /// to the underlying writer during any of the previous searches. pub fn has_written(&self) -> bool { self.wtr.total_count() > 0 } /// Return a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { self.wtr.get_mut() } /// Consume this printer and return back ownership of the underlying /// writer. pub fn into_inner(self) -> W { self.wtr.into_inner() } } /// An implementation of `Sink` associated with a matcher and an optional file /// path for the JSON printer. /// /// This type is generic over a few type parameters: /// /// * `'p` refers to the lifetime of the file path, if one is provided. When /// no file path is given, then this is `'static`. /// * `'s` refers to the lifetime of the /// [`JSON`](struct.JSON.html) /// printer that this type borrows. /// * `M` refers to the type of matcher used by /// `grep_searcher::Searcher` that is reporting results to this sink. /// * `W` refers to the underlying writer that this printer is writing its /// output to. #[derive(Debug)] pub struct JSONSink<'p, 's, M: Matcher, W> { matcher: M, json: &'s mut JSON, path: Option<&'p Path>, start_time: Instant, match_count: u64, after_context_remaining: u64, binary_byte_offset: Option, begin_printed: bool, stats: Stats, } impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> { /// Returns true if and only if this printer received a match in the /// previous search. /// /// This is unaffected by the result of searches before the previous /// search. pub fn has_match(&self) -> bool { self.match_count > 0 } /// Return the total number of matches reported to this sink. /// /// This corresponds to the number of times `Sink::matched` is called. pub fn match_count(&self) -> u64 { self.match_count } /// If binary data was found in the previous search, this returns the /// offset at which the binary data was first detected. /// /// The offset returned is an absolute offset relative to the entire /// set of bytes searched. /// /// This is unaffected by the result of searches before the previous /// search. e.g., If the search prior to the previous search found binary /// data but the previous search found no binary data, then this will /// return `None`. pub fn binary_byte_offset(&self) -> Option { self.binary_byte_offset } /// Return a reference to the stats produced by the printer for all /// searches executed on this sink. pub fn stats(&self) -> &Stats { &self.stats } /// Execute the matcher over the given bytes and record the match /// locations if the current configuration demands match granularity. fn record_matches( &mut self, searcher: &Searcher, bytes: &[u8], range: std::ops::Range, ) -> io::Result<()> { self.json.matches.clear(); // If printing requires knowing the location of each individual match, // then compute and stored those right now for use later. While this // adds an extra copy for storing the matches, we do amortize the // allocation for it and this greatly simplifies the printing logic to // the extent that it's easy to ensure that we never do more than // one search to find the matches. let matches = &mut self.json.matches; find_iter_at_in_context( searcher, &self.matcher, bytes, range.clone(), |m| { let (s, e) = (m.start() - range.start, m.end() - range.start); matches.push(Match::new(s, e)); true }, )?; // Don't report empty matches appearing at the end of the bytes. if !matches.is_empty() && matches.last().unwrap().is_empty() && matches.last().unwrap().start() >= bytes.len() { matches.pop().unwrap(); } Ok(()) } /// Returns true if this printer should quit. /// /// This implements the logic for handling quitting after seeing a certain /// amount of matches. In most cases, the logic is simple, but we must /// permit all "after" contextual lines to print after reaching the limit. fn should_quit(&self) -> bool { let limit = match self.json.config.max_matches { None => return false, Some(limit) => limit, }; if self.match_count < limit { return false; } self.after_context_remaining == 0 } /// Returns whether the current match count exceeds the configured limit. /// If there is no limit, then this always returns false. fn match_more_than_limit(&self) -> bool { let limit = match self.json.config.max_matches { None => return false, Some(limit) => limit, }; self.match_count > limit } /// Write the "begin" message. fn write_begin_message(&mut self) -> io::Result<()> { if self.begin_printed { return Ok(()); } let msg = jsont::Message::Begin(jsont::Begin { path: self.path }); self.json.write_message(&msg)?; self.begin_printed = true; Ok(()) } } impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> { type Error = io::Error; fn matched( &mut self, searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { self.write_begin_message()?; self.match_count += 1; // When we've exceeded our match count, then the remaining context // lines should not be reset, but instead, decremented. This avoids a // bug where we display more matches than a configured limit. The main // idea here is that 'matched' might be called again while printing // an after-context line. In that case, we should treat this as a // contextual line rather than a matching line for the purposes of // termination. if self.match_more_than_limit() { self.after_context_remaining = self.after_context_remaining.saturating_sub(1); } else { self.after_context_remaining = searcher.after_context() as u64; } self.record_matches( searcher, mat.buffer(), mat.bytes_range_in_buffer(), )?; self.stats.add_matches(self.json.matches.len() as u64); self.stats.add_matched_lines(mat.lines().count() as u64); let submatches = SubMatches::new(mat.bytes(), &self.json.matches); let msg = jsont::Message::Match(jsont::Match { path: self.path, lines: mat.bytes(), line_number: mat.line_number(), absolute_offset: mat.absolute_byte_offset(), submatches: submatches.as_slice(), }); self.json.write_message(&msg)?; Ok(!self.should_quit()) } fn context( &mut self, searcher: &Searcher, ctx: &SinkContext<'_>, ) -> Result { self.write_begin_message()?; self.json.matches.clear(); if ctx.kind() == &SinkContextKind::After { self.after_context_remaining = self.after_context_remaining.saturating_sub(1); } let submatches = if searcher.invert_match() { self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?; SubMatches::new(ctx.bytes(), &self.json.matches) } else { SubMatches::empty() }; let msg = jsont::Message::Context(jsont::Context { path: self.path, lines: ctx.bytes(), line_number: ctx.line_number(), absolute_offset: ctx.absolute_byte_offset(), submatches: submatches.as_slice(), }); self.json.write_message(&msg)?; Ok(!self.should_quit()) } fn begin(&mut self, _searcher: &Searcher) -> Result { self.json.wtr.reset_count(); self.start_time = Instant::now(); self.match_count = 0; self.after_context_remaining = 0; self.binary_byte_offset = None; if self.json.config.max_matches == Some(0) { return Ok(false); } if !self.json.config.always_begin_end { return Ok(true); } self.write_begin_message()?; Ok(true) } fn finish( &mut self, _searcher: &Searcher, finish: &SinkFinish, ) -> Result<(), io::Error> { if !self.begin_printed { return Ok(()); } self.binary_byte_offset = finish.binary_byte_offset(); self.stats.add_elapsed(self.start_time.elapsed()); self.stats.add_searches(1); if self.match_count > 0 { self.stats.add_searches_with_match(1); } self.stats.add_bytes_searched(finish.byte_count()); self.stats.add_bytes_printed(self.json.wtr.count()); let msg = jsont::Message::End(jsont::End { path: self.path, binary_offset: finish.binary_byte_offset(), stats: self.stats.clone(), }); self.json.write_message(&msg)?; Ok(()) } } /// SubMatches represents a set of matches in a contiguous range of bytes. /// /// A simpler representation for this would just simply be `Vec`, /// but the common case is exactly one match per range of bytes, which we /// specialize here using a fixed size array without any allocation. enum SubMatches<'a> { Empty, Small([jsont::SubMatch<'a>; 1]), Big(Vec>), } impl<'a> SubMatches<'a> { /// Create a new set of match ranges from a set of matches and the /// corresponding bytes that those matches apply to. fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> { if matches.len() == 1 { let mat = matches[0]; SubMatches::Small([jsont::SubMatch { m: &bytes[mat], start: mat.start(), end: mat.end(), }]) } else { let mut match_ranges = vec![]; for &mat in matches { match_ranges.push(jsont::SubMatch { m: &bytes[mat], start: mat.start(), end: mat.end(), }); } SubMatches::Big(match_ranges) } } /// Create an empty set of match ranges. fn empty() -> SubMatches<'static> { SubMatches::Empty } /// Return this set of match ranges as a slice. fn as_slice(&self) -> &[jsont::SubMatch<'_>] { match *self { SubMatches::Empty => &[], SubMatches::Small(ref x) => x, SubMatches::Big(ref x) => x, } } } #[cfg(test)] mod tests { use grep_matcher::LineTerminator; use grep_regex::{RegexMatcher, RegexMatcherBuilder}; use grep_searcher::SearcherBuilder; use super::{JSONBuilder, JSON}; const SHERLOCK: &'static [u8] = b"\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; fn printer_contents(printer: &mut JSON>) -> String { String::from_utf8(printer.get_mut().to_owned()).unwrap() } #[test] fn binary_detection() { use grep_searcher::BinaryDetection; const BINARY: &'static [u8] = b"\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew \x00 from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached.\ "; let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = JSONBuilder::new().build(vec![]); SearcherBuilder::new() .binary_detection(BinaryDetection::quit(b'\x00')) .heap_limit(Some(80)) .build() .search_reader(&matcher, BINARY, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 3); let last = got.lines().last().unwrap(); assert!(last.contains(r#""binary_offset":212,"#)); } #[test] fn max_matches() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = JSONBuilder::new().max_matches(Some(1)).build(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 3); } #[test] fn max_matches_after_context() { let haystack = "\ a b c d e d e d e d e "; let matcher = RegexMatcher::new(r"d").unwrap(); let mut printer = JSONBuilder::new().max_matches(Some(1)).build(vec![]); SearcherBuilder::new() .after_context(2) .build() .search_reader( &matcher, haystack.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 5); } #[test] fn no_match() { let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap(); let mut printer = JSONBuilder::new().build(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert!(got.is_empty()); } #[test] fn always_begin_end_no_match() { let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap(); let mut printer = JSONBuilder::new().always_begin_end(true).build(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 2); assert!(got.contains("begin") && got.contains("end")); } #[test] fn missing_crlf() { let haystack = "test\r\n".as_bytes(); let matcher = RegexMatcherBuilder::new().build("test").unwrap(); let mut printer = JSONBuilder::new().build(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, haystack, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 3); assert!( got.lines().nth(1).unwrap().contains(r"test\r\n"), r"missing 'test\r\n' in '{}'", got.lines().nth(1).unwrap(), ); let matcher = RegexMatcherBuilder::new().crlf(true).build("test").unwrap(); let mut printer = JSONBuilder::new().build(vec![]); SearcherBuilder::new() .line_terminator(LineTerminator::crlf()) .build() .search_reader(&matcher, haystack, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq!(got.lines().count(), 3); assert!( got.lines().nth(1).unwrap().contains(r"test\r\n"), r"missing 'test\r\n' in '{}'", got.lines().nth(1).unwrap(), ); } } grep-printer-0.1.6/src/jsont.rs000064400000000000000000000077500072674642500146010ustar 00000000000000// This module defines the types we use for JSON serialization. We specifically // omit deserialization, partially because there isn't a clear use case for // them at this time, but also because deserialization will complicate things. // Namely, the types below are designed in a way that permits JSON // serialization with little or no allocation. Allocation is often quite // convenient for deserialization however, so these types would become a bit // more complex. use std::borrow::Cow; use std::path::Path; use std::str; use base64; use serde::{Serialize, Serializer}; use crate::stats::Stats; #[derive(Serialize)] #[serde(tag = "type", content = "data")] #[serde(rename_all = "snake_case")] pub enum Message<'a> { Begin(Begin<'a>), End(End<'a>), Match(Match<'a>), Context(Context<'a>), } #[derive(Serialize)] pub struct Begin<'a> { #[serde(serialize_with = "ser_path")] pub path: Option<&'a Path>, } #[derive(Serialize)] pub struct End<'a> { #[serde(serialize_with = "ser_path")] pub path: Option<&'a Path>, pub binary_offset: Option, pub stats: Stats, } #[derive(Serialize)] pub struct Match<'a> { #[serde(serialize_with = "ser_path")] pub path: Option<&'a Path>, #[serde(serialize_with = "ser_bytes")] pub lines: &'a [u8], pub line_number: Option, pub absolute_offset: u64, pub submatches: &'a [SubMatch<'a>], } #[derive(Serialize)] pub struct Context<'a> { #[serde(serialize_with = "ser_path")] pub path: Option<&'a Path>, #[serde(serialize_with = "ser_bytes")] pub lines: &'a [u8], pub line_number: Option, pub absolute_offset: u64, pub submatches: &'a [SubMatch<'a>], } #[derive(Serialize)] pub struct SubMatch<'a> { #[serde(rename = "match")] #[serde(serialize_with = "ser_bytes")] pub m: &'a [u8], pub start: usize, pub end: usize, } /// Data represents things that look like strings, but may actually not be /// valid UTF-8. To handle this, `Data` is serialized as an object with one /// of two keys: `text` (for valid UTF-8) or `bytes` (for invalid UTF-8). /// /// The happy path is valid UTF-8, which streams right through as-is, since /// it is natively supported by JSON. When invalid UTF-8 is found, then it is /// represented as arbitrary bytes and base64 encoded. #[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)] #[serde(untagged)] enum Data<'a> { Text { text: Cow<'a, str>, }, Bytes { #[serde(serialize_with = "to_base64")] bytes: &'a [u8], }, } impl<'a> Data<'a> { fn from_bytes(bytes: &[u8]) -> Data<'_> { match str::from_utf8(bytes) { Ok(text) => Data::Text { text: Cow::Borrowed(text) }, Err(_) => Data::Bytes { bytes }, } } #[cfg(unix)] fn from_path(path: &Path) -> Data<'_> { use std::os::unix::ffi::OsStrExt; match path.to_str() { Some(text) => Data::Text { text: Cow::Borrowed(text) }, None => Data::Bytes { bytes: path.as_os_str().as_bytes() }, } } #[cfg(not(unix))] fn from_path(path: &Path) -> Data { // Using lossy conversion means some paths won't round trip precisely, // but it's not clear what we should actually do. Serde rejects // non-UTF-8 paths, and OsStr's are serialized as a sequence of UTF-16 // code units on Windows. Neither seem appropriate for this use case, // so we do the easy thing for now. Data::Text { text: path.to_string_lossy() } } } fn to_base64(bytes: T, ser: S) -> Result where T: AsRef<[u8]>, S: Serializer, { ser.serialize_str(&base64::encode(&bytes)) } fn ser_bytes(bytes: T, ser: S) -> Result where T: AsRef<[u8]>, S: Serializer, { Data::from_bytes(bytes.as_ref()).serialize(ser) } fn ser_path(path: &Option

, ser: S) -> Result where P: AsRef, S: Serializer, { path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser) } grep-printer-0.1.6/src/lib.rs000064400000000000000000000066740072674642500142160ustar 00000000000000/*! This crate provides featureful and fast printers that interoperate with the [`grep-searcher`](https://docs.rs/grep-searcher) crate. # Brief overview The [`Standard`](struct.Standard.html) printer shows results in a human readable format, and is modeled after the formats used by standard grep-like tools. Features include, but are not limited to, cross platform terminal coloring, search & replace, multi-line result handling and reporting summary statistics. The [`JSON`](struct.JSON.html) printer shows results in a machine readable format. To facilitate a stream of search results, the format uses [JSON Lines](https://jsonlines.org/) by emitting a series of messages as search results are found. The [`Summary`](struct.Summary.html) printer shows *aggregate* results for a single search in a human readable format, and is modeled after similar formats found in standard grep-like tools. This printer is useful for showing the total number of matches and/or printing file paths that either contain or don't contain matches. # Example This example shows how to create a "standard" printer and execute a search. ``` use std::error::Error; use grep_regex::RegexMatcher; use grep_printer::Standard; use grep_searcher::Searcher; const SHERLOCK: &'static [u8] = b"\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; # fn main() { example().unwrap(); } fn example() -> Result<(), Box> { let matcher = RegexMatcher::new(r"Sherlock")?; let mut printer = Standard::new_no_color(vec![]); Searcher::new().search_slice(&matcher, SHERLOCK, printer.sink(&matcher))?; // into_inner gives us back the underlying writer we provided to // new_no_color, which is wrapped in a termcolor::NoColor. Thus, a second // into_inner gives us back the actual buffer. let output = String::from_utf8(printer.into_inner().into_inner())?; let expected = "\ 1:For the Doctor Watsons of this world, as opposed to the Sherlock 3:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(output, expected); Ok(()) } ``` */ #![deny(missing_docs)] pub use crate::color::{ default_color_specs, ColorError, ColorSpecs, UserColorSpec, }; #[cfg(feature = "serde1")] pub use crate::json::{JSONBuilder, JSONSink, JSON}; pub use crate::standard::{Standard, StandardBuilder, StandardSink}; pub use crate::stats::Stats; pub use crate::summary::{Summary, SummaryBuilder, SummaryKind, SummarySink}; pub use crate::util::PrinterPath; // The maximum number of bytes to execute a search to account for look-ahead. // // This is an unfortunate kludge since PCRE2 doesn't provide a way to search // a substring of some input while accounting for look-ahead. In theory, we // could refactor the various 'grep' interfaces to account for it, but it would // be a large change. So for now, we just let PCRE2 go looking a bit for a // match without searching the entire rest of the contents. // // Note that this kludge is only active in multi-line mode. const MAX_LOOK_AHEAD: usize = 128; #[macro_use] mod macros; mod color; mod counter; #[cfg(feature = "serde1")] mod json; #[cfg(feature = "serde1")] mod jsont; mod standard; mod stats; mod summary; mod util; grep-printer-0.1.6/src/macros.rs000064400000000000000000000012360072674642500147210ustar 00000000000000/// Like assert_eq, but nicer output for long strings. #[cfg(test)] #[macro_export] macro_rules! assert_eq_printed { ($expected:expr, $got:expr) => { let expected = &*$expected; let got = &*$got; if expected != got { panic!(" printed outputs differ! expected: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ got: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ", expected, got); } } } grep-printer-0.1.6/src/standard.rs000064400000000000000000003663210072674642500152460ustar 00000000000000use std::cell::{Cell, RefCell}; use std::cmp; use std::io::{self, Write}; use std::path::Path; use std::sync::Arc; use std::time::Instant; use bstr::ByteSlice; use grep_matcher::{Match, Matcher}; use grep_searcher::{ LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch, }; use termcolor::{ColorSpec, NoColor, WriteColor}; use crate::color::ColorSpecs; use crate::counter::CounterWriter; use crate::stats::Stats; use crate::util::{ find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, PrinterPath, Replacer, Sunk, }; /// The configuration for the standard printer. /// /// This is manipulated by the StandardBuilder and then referenced by the /// actual implementation. Once a printer is build, the configuration is frozen /// and cannot changed. #[derive(Debug, Clone)] struct Config { colors: ColorSpecs, stats: bool, heading: bool, path: bool, only_matching: bool, per_match: bool, per_match_one_line: bool, replacement: Arc>>, max_columns: Option, max_columns_preview: bool, max_matches: Option, column: bool, byte_offset: bool, trim_ascii: bool, separator_search: Arc>>, separator_context: Arc>>, separator_field_match: Arc>, separator_field_context: Arc>, separator_path: Option, path_terminator: Option, } impl Default for Config { fn default() -> Config { Config { colors: ColorSpecs::default(), stats: false, heading: false, path: true, only_matching: false, per_match: false, per_match_one_line: false, replacement: Arc::new(None), max_columns: None, max_columns_preview: false, max_matches: None, column: false, byte_offset: false, trim_ascii: false, separator_search: Arc::new(None), separator_context: Arc::new(Some(b"--".to_vec())), separator_field_match: Arc::new(b":".to_vec()), separator_field_context: Arc::new(b"-".to_vec()), separator_path: None, path_terminator: None, } } } /// A builder for the "standard" grep-like printer. /// /// The builder permits configuring how the printer behaves. Configurable /// behavior includes, but is not limited to, limiting the number of matches, /// tweaking separators, executing pattern replacements, recording statistics /// and setting colors. /// /// Some configuration options, such as the display of line numbers or /// contextual lines, are drawn directly from the /// `grep_searcher::Searcher`'s configuration. /// /// Once a `Standard` printer is built, its configuration cannot be changed. #[derive(Clone, Debug)] pub struct StandardBuilder { config: Config, } impl StandardBuilder { /// Return a new builder for configuring the standard printer. pub fn new() -> StandardBuilder { StandardBuilder { config: Config::default() } } /// Build a printer using any implementation of `termcolor::WriteColor`. /// /// The implementation of `WriteColor` used here controls whether colors /// are used or not when colors have been configured using the /// `color_specs` method. /// /// For maximum portability, callers should generally use either /// `termcolor::StandardStream` or `termcolor::BufferedStandardStream` /// where appropriate, which will automatically enable colors on Windows /// when possible. /// /// However, callers may also provide an arbitrary writer using the /// `termcolor::Ansi` or `termcolor::NoColor` wrappers, which always enable /// colors via ANSI escapes or always disable colors, respectively. /// /// As a convenience, callers may use `build_no_color` to automatically /// select the `termcolor::NoColor` wrapper to avoid needing to import /// from `termcolor` explicitly. pub fn build(&self, wtr: W) -> Standard { Standard { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), matches: vec![], } } /// Build a printer from any implementation of `io::Write` and never emit /// any colors, regardless of the user color specification settings. /// /// This is a convenience routine for /// `StandardBuilder::build(termcolor::NoColor::new(wtr))`. pub fn build_no_color( &self, wtr: W, ) -> Standard> { self.build(NoColor::new(wtr)) } /// Set the user color specifications to use for coloring in this printer. /// /// A [`UserColorSpec`](struct.UserColorSpec.html) can be constructed from /// a string in accordance with the color specification format. See the /// `UserColorSpec` type documentation for more details on the format. /// A [`ColorSpecs`](struct.ColorSpecs.html) can then be generated from /// zero or more `UserColorSpec`s. /// /// Regardless of the color specifications provided here, whether color /// is actually used or not is determined by the implementation of /// `WriteColor` provided to `build`. For example, if `termcolor::NoColor` /// is provided to `build`, then no color will ever be printed regardless /// of the color specifications provided here. /// /// This completely overrides any previous color specifications. This does /// not add to any previously provided color specifications on this /// builder. pub fn color_specs(&mut self, specs: ColorSpecs) -> &mut StandardBuilder { self.config.colors = specs; self } /// Enable the gathering of various aggregate statistics. /// /// When this is enabled (it's disabled by default), statistics will be /// gathered for all uses of `Standard` printer returned by `build`, /// including but not limited to, the total number of matches, the total /// number of bytes searched and the total number of bytes printed. /// /// Aggregate statistics can be accessed via the sink's /// [`StandardSink::stats`](struct.StandardSink.html#method.stats) /// method. /// /// When this is enabled, this printer may need to do extra work in order /// to compute certain statistics, which could cause the search to take /// longer. /// /// For a complete description of available statistics, see /// [`Stats`](struct.Stats.html). pub fn stats(&mut self, yes: bool) -> &mut StandardBuilder { self.config.stats = yes; self } /// Enable the use of "headings" in the printer. /// /// When this is enabled, and if a file path has been given to the printer, /// then the file path will be printed once on its own line before showing /// any matches. If the heading is not the first thing emitted by the /// printer, then a line terminator is printed before the heading. /// /// By default, this option is disabled. When disabled, the printer will /// not show any heading and will instead print the file path (if one is /// given) on the same line as each matching (or context) line. pub fn heading(&mut self, yes: bool) -> &mut StandardBuilder { self.config.heading = yes; self } /// When enabled, if a path was given to the printer, then it is shown in /// the output (either as a heading or as a prefix to each matching line). /// When disabled, then no paths are ever included in the output even when /// a path is provided to the printer. /// /// This is enabled by default. pub fn path(&mut self, yes: bool) -> &mut StandardBuilder { self.config.path = yes; self } /// Only print the specific matches instead of the entire line containing /// each match. Each match is printed on its own line. When multi line /// search is enabled, then matches spanning multiple lines are printed /// such that only the matching portions of each line are shown. pub fn only_matching(&mut self, yes: bool) -> &mut StandardBuilder { self.config.only_matching = yes; self } /// Print at least one line for every match. /// /// This is similar to the `only_matching` option, except the entire line /// is printed for each match. This is typically useful in conjunction with /// the `column` option, which will show the starting column number for /// every match on every line. /// /// When multi-line mode is enabled, each match is printed, including every /// line in the match. As with single line matches, if a line contains /// multiple matches (even if only partially), then that line is printed /// once for each match it participates in, assuming it's the first line in /// that match. In multi-line mode, column numbers only indicate the start /// of a match. Subsequent lines in a multi-line match always have a column /// number of `1`. /// /// When a match contains multiple lines, enabling `per_match_one_line` /// will cause only the first line each in match to be printed. pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder { self.config.per_match = yes; self } /// Print at most one line per match when `per_match` is enabled. /// /// By default, every line in each match found is printed when `per_match` /// is enabled. However, this is sometimes undesirable, e.g., when you /// only ever want one line per match. /// /// This is only applicable when multi-line matching is enabled, since /// otherwise, matches are guaranteed to span one line. /// /// This is disabled by default. pub fn per_match_one_line(&mut self, yes: bool) -> &mut StandardBuilder { self.config.per_match_one_line = yes; self } /// Set the bytes that will be used to replace each occurrence of a match /// found. /// /// The replacement bytes given may include references to capturing groups, /// which may either be in index form (e.g., `$2`) or can reference named /// capturing groups if present in the original pattern (e.g., `$foo`). /// /// For documentation on the full format, please see the `Capture` trait's /// `interpolate` method in the /// [grep-printer](https://docs.rs/grep-printer) crate. pub fn replacement( &mut self, replacement: Option>, ) -> &mut StandardBuilder { self.config.replacement = Arc::new(replacement); self } /// Set the maximum number of columns allowed for each line printed. A /// single column is heuristically defined as a single byte. /// /// If a line is found which exceeds this maximum, then it is replaced /// with a message indicating that the line has been omitted. /// /// The default is to not specify a limit, in which each matching or /// contextual line is printed regardless of how long it is. pub fn max_columns(&mut self, limit: Option) -> &mut StandardBuilder { self.config.max_columns = limit; self } /// When enabled, if a line is found to be over the configured maximum /// column limit (measured in terms of bytes), then a preview of the long /// line will be printed instead. /// /// The preview will correspond to the first `N` *grapheme clusters* of /// the line, where `N` is the limit configured by `max_columns`. /// /// If no limit is set, then enabling this has no effect. /// /// This is disabled by default. pub fn max_columns_preview(&mut self, yes: bool) -> &mut StandardBuilder { self.config.max_columns_preview = yes; self } /// Set the maximum amount of matching lines that are printed. /// /// If multi line search is enabled and a match spans multiple lines, then /// that match is counted exactly once for the purposes of enforcing this /// limit, regardless of how many lines it spans. pub fn max_matches(&mut self, limit: Option) -> &mut StandardBuilder { self.config.max_matches = limit; self } /// Print the column number of the first match in a line. /// /// This option is convenient for use with `per_match` which will print a /// line for every match along with the starting offset for that match. /// /// Column numbers are computed in terms of bytes from the start of the /// line being printed. /// /// This is disabled by default. pub fn column(&mut self, yes: bool) -> &mut StandardBuilder { self.config.column = yes; self } /// Print the absolute byte offset of the beginning of each line printed. /// /// The absolute byte offset starts from the beginning of each search and /// is zero based. /// /// If the `only_matching` option is set, then this will print the absolute /// byte offset of the beginning of each match. pub fn byte_offset(&mut self, yes: bool) -> &mut StandardBuilder { self.config.byte_offset = yes; self } /// When enabled, all lines will have prefix ASCII whitespace trimmed /// before being written. /// /// This is disabled by default. pub fn trim_ascii(&mut self, yes: bool) -> &mut StandardBuilder { self.config.trim_ascii = yes; self } /// Set the separator used between sets of search results. /// /// When this is set, then it will be printed on its own line immediately /// before the results for a single search if and only if a previous search /// had already printed results. In effect, this permits showing a divider /// between sets of search results that does not appear at the beginning /// or end of all search results. /// /// To reproduce the classic grep format, this is typically set to `--` /// (the same as the context separator) if and only if contextual lines /// have been requested, but disabled otherwise. /// /// By default, this is disabled. pub fn separator_search( &mut self, sep: Option>, ) -> &mut StandardBuilder { self.config.separator_search = Arc::new(sep); self } /// Set the separator used between discontiguous runs of search context, /// but only when the searcher is configured to report contextual lines. /// /// The separator is always printed on its own line, even if it's empty. /// /// If no separator is set, then nothing is printed when a context break /// occurs. /// /// By default, this is set to `--`. pub fn separator_context( &mut self, sep: Option>, ) -> &mut StandardBuilder { self.config.separator_context = Arc::new(sep); self } /// Set the separator used between fields emitted for matching lines. /// /// For example, when the searcher has line numbers enabled, this printer /// will print the line number before each matching line. The bytes given /// here will be written after the line number but before the matching /// line. /// /// By default, this is set to `:`. pub fn separator_field_match( &mut self, sep: Vec, ) -> &mut StandardBuilder { self.config.separator_field_match = Arc::new(sep); self } /// Set the separator used between fields emitted for context lines. /// /// For example, when the searcher has line numbers enabled, this printer /// will print the line number before each context line. The bytes given /// here will be written after the line number but before the context /// line. /// /// By default, this is set to `-`. pub fn separator_field_context( &mut self, sep: Vec, ) -> &mut StandardBuilder { self.config.separator_field_context = Arc::new(sep); self } /// Set the path separator used when printing file paths. /// /// When a printer is configured with a file path, and when a match is /// found, that file path will be printed (either as a heading or as a /// prefix to each matching or contextual line, depending on other /// configuration settings). Typically, printing is done by emitting the /// file path as is. However, this setting provides the ability to use a /// different path separator from what the current environment has /// configured. /// /// A typical use for this option is to permit cygwin users on Windows to /// set the path separator to `/` instead of using the system default of /// `\`. pub fn separator_path(&mut self, sep: Option) -> &mut StandardBuilder { self.config.separator_path = sep; self } /// Set the path terminator used. /// /// The path terminator is a byte that is printed after every file path /// emitted by this printer. /// /// If no path terminator is set (the default), then paths are terminated /// by either new lines (for when `heading` is enabled) or the match or /// context field separators (e.g., `:` or `-`). pub fn path_terminator( &mut self, terminator: Option, ) -> &mut StandardBuilder { self.config.path_terminator = terminator; self } } /// The standard printer, which implements grep-like formatting, including /// color support. /// /// A default printer can be created with either of the `Standard::new` or /// `Standard::new_no_color` constructors. However, there are a considerable /// number of options that configure this printer's output. Those options can /// be configured using [`StandardBuilder`](struct.StandardBuilder.html). /// /// This type is generic over `W`, which represents any implementation /// of the `termcolor::WriteColor` trait. If colors are not desired, /// then the `new_no_color` constructor can be used, or, alternatively, /// the `termcolor::NoColor` adapter can be used to wrap any `io::Write` /// implementation without enabling any colors. #[derive(Debug)] pub struct Standard { config: Config, wtr: RefCell>, matches: Vec, } impl Standard { /// Return a standard printer with a default configuration that writes /// matches to the given writer. /// /// The writer should be an implementation of `termcolor::WriteColor` /// and not just a bare implementation of `io::Write`. To use a normal /// `io::Write` implementation (simultaneously sacrificing colors), use /// the `new_no_color` constructor. pub fn new(wtr: W) -> Standard { StandardBuilder::new().build(wtr) } } impl Standard> { /// Return a standard printer with a default configuration that writes /// matches to the given writer. /// /// The writer can be any implementation of `io::Write`. With this /// constructor, the printer will never emit colors. pub fn new_no_color(wtr: W) -> Standard> { StandardBuilder::new().build_no_color(wtr) } } impl Standard { /// Return an implementation of `Sink` for the standard printer. /// /// This does not associate the printer with a file path, which means this /// implementation will never print a file path along with the matches. pub fn sink<'s, M: Matcher>( &'s mut self, matcher: M, ) -> StandardSink<'static, 's, M, W> { let stats = if self.config.stats { Some(Stats::new()) } else { None }; let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher: matcher, standard: self, replacer: Replacer::new(), path: None, start_time: Instant::now(), match_count: 0, after_context_remaining: 0, binary_byte_offset: None, stats: stats, needs_match_granularity: needs_match_granularity, } } /// Return an implementation of `Sink` associated with a file path. /// /// When the printer is associated with a path, then it may, depending on /// its configuration, print the path along with the matches found. pub fn sink_with_path<'p, 's, M, P>( &'s mut self, matcher: M, path: &'p P, ) -> StandardSink<'p, 's, M, W> where M: Matcher, P: ?Sized + AsRef, { if !self.config.path { return self.sink(matcher); } let stats = if self.config.stats { Some(Stats::new()) } else { None }; let ppath = PrinterPath::with_separator( path.as_ref(), self.config.separator_path, ); let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher: matcher, standard: self, replacer: Replacer::new(), path: Some(ppath), start_time: Instant::now(), match_count: 0, after_context_remaining: 0, binary_byte_offset: None, stats: stats, needs_match_granularity: needs_match_granularity, } } /// Returns true if and only if the configuration of the printer requires /// us to find each individual match in the lines reported by the searcher. /// /// We care about this distinction because finding each individual match /// costs more, so we only do it when we need to. fn needs_match_granularity(&self) -> bool { let supports_color = self.wtr.borrow().supports_color(); let match_colored = !self.config.colors.matched().is_none(); // Coloring requires identifying each individual match. (supports_color && match_colored) // The column feature requires finding the position of the first match. || self.config.column // Requires finding each match for performing replacement. || self.config.replacement.is_some() // Emitting a line for each match requires finding each match. || self.config.per_match // Emitting only the match requires finding each match. || self.config.only_matching // Computing certain statistics requires finding each match. || self.config.stats } } impl Standard { /// Returns true if and only if this printer has written at least one byte /// to the underlying writer during any of the previous searches. pub fn has_written(&self) -> bool { self.wtr.borrow().total_count() > 0 } /// Return a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { self.wtr.get_mut().get_mut() } /// Consume this printer and return back ownership of the underlying /// writer. pub fn into_inner(self) -> W { self.wtr.into_inner().into_inner() } } /// An implementation of `Sink` associated with a matcher and an optional file /// path for the standard printer. /// /// A `Sink` can be created via the /// [`Standard::sink`](struct.Standard.html#method.sink) /// or /// [`Standard::sink_with_path`](struct.Standard.html#method.sink_with_path) /// methods, depending on whether you want to include a file path in the /// printer's output. /// /// Building a `StandardSink` is cheap, and callers should create a new one /// for each thing that is searched. After a search has completed, callers may /// query this sink for information such as whether a match occurred or whether /// binary data was found (and if so, the offset at which it occurred). /// /// This type is generic over a few type parameters: /// /// * `'p` refers to the lifetime of the file path, if one is provided. When /// no file path is given, then this is `'static`. /// * `'s` refers to the lifetime of the /// [`Standard`](struct.Standard.html) /// printer that this type borrows. /// * `M` refers to the type of matcher used by /// `grep_searcher::Searcher` that is reporting results to this sink. /// * `W` refers to the underlying writer that this printer is writing its /// output to. #[derive(Debug)] pub struct StandardSink<'p, 's, M: Matcher, W> { matcher: M, standard: &'s mut Standard, replacer: Replacer, path: Option>, start_time: Instant, match_count: u64, after_context_remaining: u64, binary_byte_offset: Option, stats: Option, needs_match_granularity: bool, } impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> { /// Returns true if and only if this printer received a match in the /// previous search. /// /// This is unaffected by the result of searches before the previous /// search on this sink. pub fn has_match(&self) -> bool { self.match_count > 0 } /// Return the total number of matches reported to this sink. /// /// This corresponds to the number of times `Sink::matched` is called /// on the previous search. /// /// This is unaffected by the result of searches before the previous /// search on this sink. pub fn match_count(&self) -> u64 { self.match_count } /// If binary data was found in the previous search, this returns the /// offset at which the binary data was first detected. /// /// The offset returned is an absolute offset relative to the entire /// set of bytes searched. /// /// This is unaffected by the result of searches before the previous /// search. e.g., If the search prior to the previous search found binary /// data but the previous search found no binary data, then this will /// return `None`. pub fn binary_byte_offset(&self) -> Option { self.binary_byte_offset } /// Return a reference to the stats produced by the printer for all /// searches executed on this sink. /// /// This only returns stats if they were requested via the /// [`StandardBuilder`](struct.StandardBuilder.html) /// configuration. pub fn stats(&self) -> Option<&Stats> { self.stats.as_ref() } /// Execute the matcher over the given bytes and record the match /// locations if the current configuration demands match granularity. fn record_matches( &mut self, searcher: &Searcher, bytes: &[u8], range: std::ops::Range, ) -> io::Result<()> { self.standard.matches.clear(); if !self.needs_match_granularity { return Ok(()); } // If printing requires knowing the location of each individual match, // then compute and stored those right now for use later. While this // adds an extra copy for storing the matches, we do amortize the // allocation for it and this greatly simplifies the printing logic to // the extent that it's easy to ensure that we never do more than // one search to find the matches (well, for replacements, we do one // additional search to perform the actual replacement). let matches = &mut self.standard.matches; find_iter_at_in_context( searcher, &self.matcher, bytes, range.clone(), |m| { let (s, e) = (m.start() - range.start, m.end() - range.start); matches.push(Match::new(s, e)); true }, )?; // Don't report empty matches appearing at the end of the bytes. if !matches.is_empty() && matches.last().unwrap().is_empty() && matches.last().unwrap().start() >= range.end { matches.pop().unwrap(); } Ok(()) } /// If the configuration specifies a replacement, then this executes the /// replacement, lazily allocating memory if necessary. /// /// To access the result of a replacement, use `replacer.replacement()`. fn replace( &mut self, searcher: &Searcher, bytes: &[u8], range: std::ops::Range, ) -> io::Result<()> { self.replacer.clear(); if self.standard.config.replacement.is_some() { let replacement = (*self.standard.config.replacement) .as_ref() .map(|r| &*r) .unwrap(); self.replacer.replace_all( searcher, &self.matcher, bytes, range, replacement, )?; } Ok(()) } /// Returns true if this printer should quit. /// /// This implements the logic for handling quitting after seeing a certain /// amount of matches. In most cases, the logic is simple, but we must /// permit all "after" contextual lines to print after reaching the limit. fn should_quit(&self) -> bool { let limit = match self.standard.config.max_matches { None => return false, Some(limit) => limit, }; if self.match_count < limit { return false; } self.after_context_remaining == 0 } /// Returns whether the current match count exceeds the configured limit. /// If there is no limit, then this always returns false. fn match_more_than_limit(&self) -> bool { let limit = match self.standard.config.max_matches { None => return false, Some(limit) => limit, }; self.match_count > limit } } impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { type Error = io::Error; fn matched( &mut self, searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { self.match_count += 1; // When we've exceeded our match count, then the remaining context // lines should not be reset, but instead, decremented. This avoids a // bug where we display more matches than a configured limit. The main // idea here is that 'matched' might be called again while printing // an after-context line. In that case, we should treat this as a // contextual line rather than a matching line for the purposes of // termination. if self.match_more_than_limit() { self.after_context_remaining = self.after_context_remaining.saturating_sub(1); } else { self.after_context_remaining = searcher.after_context() as u64; } self.record_matches( searcher, mat.buffer(), mat.bytes_range_in_buffer(), )?; self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?; if let Some(ref mut stats) = self.stats { stats.add_matches(self.standard.matches.len() as u64); stats.add_matched_lines(mat.lines().count() as u64); } if searcher.binary_detection().convert_byte().is_some() { if self.binary_byte_offset.is_some() { return Ok(false); } } StandardImpl::from_match(searcher, self, mat).sink()?; Ok(!self.should_quit()) } fn context( &mut self, searcher: &Searcher, ctx: &SinkContext<'_>, ) -> Result { self.standard.matches.clear(); self.replacer.clear(); if ctx.kind() == &SinkContextKind::After { self.after_context_remaining = self.after_context_remaining.saturating_sub(1); } if searcher.invert_match() { self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?; self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?; } if searcher.binary_detection().convert_byte().is_some() { if self.binary_byte_offset.is_some() { return Ok(false); } } StandardImpl::from_context(searcher, self, ctx).sink()?; Ok(!self.should_quit()) } fn context_break( &mut self, searcher: &Searcher, ) -> Result { StandardImpl::new(searcher, self).write_context_separator()?; Ok(true) } fn binary_data( &mut self, _searcher: &Searcher, binary_byte_offset: u64, ) -> Result { self.binary_byte_offset = Some(binary_byte_offset); Ok(true) } fn begin(&mut self, _searcher: &Searcher) -> Result { self.standard.wtr.borrow_mut().reset_count(); self.start_time = Instant::now(); self.match_count = 0; self.after_context_remaining = 0; self.binary_byte_offset = None; if self.standard.config.max_matches == Some(0) { return Ok(false); } Ok(true) } fn finish( &mut self, searcher: &Searcher, finish: &SinkFinish, ) -> Result<(), io::Error> { if let Some(offset) = self.binary_byte_offset { StandardImpl::new(searcher, self).write_binary_message(offset)?; } if let Some(stats) = self.stats.as_mut() { stats.add_elapsed(self.start_time.elapsed()); stats.add_searches(1); if self.match_count > 0 { stats.add_searches_with_match(1); } stats.add_bytes_searched(finish.byte_count()); stats.add_bytes_printed(self.standard.wtr.borrow().count()); } Ok(()) } } /// The actual implementation of the standard printer. This couples together /// the searcher, the sink implementation and information about the match. /// /// A StandardImpl is initialized every time a match or a contextual line is /// reported. #[derive(Debug)] struct StandardImpl<'a, M: Matcher, W> { searcher: &'a Searcher, sink: &'a StandardSink<'a, 'a, M, W>, sunk: Sunk<'a>, /// Set to true if and only if we are writing a match with color. in_color_match: Cell, } impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { /// Bundle self with a searcher and return the core implementation of Sink. fn new( searcher: &'a Searcher, sink: &'a StandardSink<'_, '_, M, W>, ) -> StandardImpl<'a, M, W> { StandardImpl { searcher: searcher, sink: sink, sunk: Sunk::empty(), in_color_match: Cell::new(false), } } /// Bundle self with a searcher and return the core implementation of Sink /// for use with handling matching lines. fn from_match( searcher: &'a Searcher, sink: &'a StandardSink<'_, '_, M, W>, mat: &'a SinkMatch<'a>, ) -> StandardImpl<'a, M, W> { let sunk = Sunk::from_sink_match( mat, &sink.standard.matches, sink.replacer.replacement(), ); StandardImpl { sunk: sunk, ..StandardImpl::new(searcher, sink) } } /// Bundle self with a searcher and return the core implementation of Sink /// for use with handling contextual lines. fn from_context( searcher: &'a Searcher, sink: &'a StandardSink<'_, '_, M, W>, ctx: &'a SinkContext<'a>, ) -> StandardImpl<'a, M, W> { let sunk = Sunk::from_sink_context( ctx, &sink.standard.matches, sink.replacer.replacement(), ); StandardImpl { sunk: sunk, ..StandardImpl::new(searcher, sink) } } fn sink(&self) -> io::Result<()> { self.write_search_prelude()?; if self.sunk.matches().is_empty() { if self.multi_line() && !self.is_context() { self.sink_fast_multi_line() } else { self.sink_fast() } } else { if self.multi_line() && !self.is_context() { self.sink_slow_multi_line() } else { self.sink_slow() } } } /// Print matches (limited to one line) quickly by avoiding the detection /// of each individual match in the lines reported in the given /// `SinkMatch`. /// /// This should only be used when the configuration does not demand match /// granularity and the searcher is not in multi line mode. fn sink_fast(&self) -> io::Result<()> { debug_assert!(self.sunk.matches().is_empty()); debug_assert!(!self.multi_line() || self.is_context()); self.write_prelude( self.sunk.absolute_byte_offset(), self.sunk.line_number(), None, )?; self.write_line(self.sunk.bytes()) } /// Print matches (possibly spanning more than one line) quickly by /// avoiding the detection of each individual match in the lines reported /// in the given `SinkMatch`. /// /// This should only be used when the configuration does not demand match /// granularity. This may be used when the searcher is in multi line mode. fn sink_fast_multi_line(&self) -> io::Result<()> { debug_assert!(self.sunk.matches().is_empty()); // This isn't actually a required invariant for using this method, // but if we wind up here and multi line mode is disabled, then we // should still treat it as a bug since we should be using matched_fast // instead. debug_assert!(self.multi_line()); let line_term = self.searcher.line_terminator().as_byte(); let mut absolute_byte_offset = self.sunk.absolute_byte_offset(); for (i, line) in self.sunk.lines(line_term).enumerate() { self.write_prelude( absolute_byte_offset, self.sunk.line_number().map(|n| n + i as u64), None, )?; absolute_byte_offset += line.len() as u64; self.write_line(line)?; } Ok(()) } /// Print a matching line where the configuration of the printer requires /// finding each individual match (e.g., for coloring). fn sink_slow(&self) -> io::Result<()> { debug_assert!(!self.sunk.matches().is_empty()); debug_assert!(!self.multi_line() || self.is_context()); if self.config().only_matching { for &m in self.sunk.matches() { self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number(), Some(m.start() as u64 + 1), )?; let buf = &self.sunk.bytes()[m]; self.write_colored_line(&[Match::new(0, buf.len())], buf)?; } } else if self.config().per_match { for &m in self.sunk.matches() { self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number(), Some(m.start() as u64 + 1), )?; self.write_colored_line(&[m], self.sunk.bytes())?; } } else { self.write_prelude( self.sunk.absolute_byte_offset(), self.sunk.line_number(), Some(self.sunk.matches()[0].start() as u64 + 1), )?; self.write_colored_line(self.sunk.matches(), self.sunk.bytes())?; } Ok(()) } fn sink_slow_multi_line(&self) -> io::Result<()> { debug_assert!(!self.sunk.matches().is_empty()); debug_assert!(self.multi_line()); if self.config().only_matching { return self.sink_slow_multi_line_only_matching(); } else if self.config().per_match { return self.sink_slow_multi_per_match(); } let line_term = self.searcher.line_terminator().as_byte(); let bytes = self.sunk.bytes(); let matches = self.sunk.matches(); let mut midx = 0; let mut count = 0; let mut stepper = LineStep::new(line_term, 0, bytes.len()); while let Some((start, end)) = stepper.next(bytes) { let line = Match::new(start, end); self.write_prelude( self.sunk.absolute_byte_offset() + line.start() as u64, self.sunk.line_number().map(|n| n + count), Some(matches[0].start() as u64 + 1), )?; count += 1; if self.exceeds_max_columns(&bytes[line]) { self.write_exceeded_line(bytes, line, matches, &mut midx)?; } else { self.write_colored_matches(bytes, line, matches, &mut midx)?; self.write_line_term()?; } } Ok(()) } fn sink_slow_multi_line_only_matching(&self) -> io::Result<()> { let line_term = self.searcher.line_terminator().as_byte(); let spec = self.config().colors.matched(); let bytes = self.sunk.bytes(); let matches = self.sunk.matches(); let mut midx = 0; let mut count = 0; let mut stepper = LineStep::new(line_term, 0, bytes.len()); while let Some((start, end)) = stepper.next(bytes) { let mut line = Match::new(start, end); self.trim_line_terminator(bytes, &mut line); self.trim_ascii_prefix(bytes, &mut line); while !line.is_empty() { if matches[midx].end() <= line.start() { if midx + 1 < matches.len() { midx += 1; continue; } else { break; } } let m = matches[midx]; if line.start() < m.start() { let upto = cmp::min(line.end(), m.start()); line = line.with_start(upto); } else { let upto = cmp::min(line.end(), m.end()); self.write_prelude( self.sunk.absolute_byte_offset() + m.start() as u64, self.sunk.line_number().map(|n| n + count), Some(m.start() as u64 + 1), )?; let this_line = line.with_end(upto); line = line.with_start(upto); if self.exceeds_max_columns(&bytes[this_line]) { self.write_exceeded_line( bytes, this_line, matches, &mut midx, )?; } else { self.write_spec(spec, &bytes[this_line])?; self.write_line_term()?; } } } count += 1; } Ok(()) } fn sink_slow_multi_per_match(&self) -> io::Result<()> { let line_term = self.searcher.line_terminator().as_byte(); let spec = self.config().colors.matched(); let bytes = self.sunk.bytes(); for &m in self.sunk.matches() { let mut count = 0; let mut stepper = LineStep::new(line_term, 0, bytes.len()); while let Some((start, end)) = stepper.next(bytes) { let mut line = Match::new(start, end); if line.start() >= m.end() { break; } else if line.end() <= m.start() { count += 1; continue; } self.write_prelude( self.sunk.absolute_byte_offset() + line.start() as u64, self.sunk.line_number().map(|n| n + count), Some(m.start().saturating_sub(line.start()) as u64 + 1), )?; count += 1; if self.exceeds_max_columns(&bytes[line]) { self.write_exceeded_line(bytes, line, &[m], &mut 0)?; continue; } self.trim_line_terminator(bytes, &mut line); self.trim_ascii_prefix(bytes, &mut line); while !line.is_empty() { if m.end() <= line.start() { self.write(&bytes[line])?; line = line.with_start(line.end()); } else if line.start() < m.start() { let upto = cmp::min(line.end(), m.start()); self.write(&bytes[line.with_end(upto)])?; line = line.with_start(upto); } else { let upto = cmp::min(line.end(), m.end()); self.write_spec(spec, &bytes[line.with_end(upto)])?; line = line.with_start(upto); } } self.write_line_term()?; // It turns out that vimgrep really only wants one line per // match, even when a match spans multiple lines. So when // that option is enabled, we just quit after printing the // first line. // // See: https://github.com/BurntSushi/ripgrep/issues/1866 if self.config().per_match_one_line { break; } } } Ok(()) } /// Write the beginning part of a matching line. This (may) include things /// like the file path, line number among others, depending on the /// configuration and the parameters given. #[inline(always)] fn write_prelude( &self, absolute_byte_offset: u64, line_number: Option, column: Option, ) -> io::Result<()> { let sep = self.separator_field(); if !self.config().heading { self.write_path_field(sep)?; } if let Some(n) = line_number { self.write_line_number(n, sep)?; } if let Some(n) = column { if self.config().column { self.write_column_number(n, sep)?; } } if self.config().byte_offset { self.write_byte_offset(absolute_byte_offset, sep)?; } Ok(()) } #[inline(always)] fn write_line(&self, line: &[u8]) -> io::Result<()> { if self.exceeds_max_columns(line) { let range = Match::new(0, line.len()); self.write_exceeded_line( line, range, self.sunk.matches(), &mut 0, )?; } else { self.write_trim(line)?; if !self.has_line_terminator(line) { self.write_line_term()?; } } Ok(()) } fn write_colored_line( &self, matches: &[Match], bytes: &[u8], ) -> io::Result<()> { // If we know we aren't going to emit color, then we can go faster. let spec = self.config().colors.matched(); if !self.wtr().borrow().supports_color() || spec.is_none() { return self.write_line(bytes); } let line = Match::new(0, bytes.len()); if self.exceeds_max_columns(bytes) { self.write_exceeded_line(bytes, line, matches, &mut 0) } else { self.write_colored_matches(bytes, line, matches, &mut 0)?; self.write_line_term()?; Ok(()) } } /// Write the `line` portion of `bytes`, with appropriate coloring for /// each `match`, starting at `match_index`. /// /// This accounts for trimming any whitespace prefix and will *never* print /// a line terminator. If a match exceeds the range specified by `line`, /// then only the part of the match within `line` (if any) is printed. fn write_colored_matches( &self, bytes: &[u8], mut line: Match, matches: &[Match], match_index: &mut usize, ) -> io::Result<()> { self.trim_line_terminator(bytes, &mut line); self.trim_ascii_prefix(bytes, &mut line); if matches.is_empty() { self.write(&bytes[line])?; return Ok(()); } while !line.is_empty() { if matches[*match_index].end() <= line.start() { if *match_index + 1 < matches.len() { *match_index += 1; continue; } else { self.end_color_match()?; self.write(&bytes[line])?; break; } } let m = matches[*match_index]; if line.start() < m.start() { let upto = cmp::min(line.end(), m.start()); self.end_color_match()?; self.write(&bytes[line.with_end(upto)])?; line = line.with_start(upto); } else { let upto = cmp::min(line.end(), m.end()); self.start_color_match()?; self.write(&bytes[line.with_end(upto)])?; line = line.with_start(upto); } } self.end_color_match()?; Ok(()) } fn write_exceeded_line( &self, bytes: &[u8], mut line: Match, matches: &[Match], match_index: &mut usize, ) -> io::Result<()> { if self.config().max_columns_preview { let original = line; let end = bytes[line] .grapheme_indices() .map(|(_, end, _)| end) .take(self.config().max_columns.unwrap_or(0) as usize) .last() .unwrap_or(0) + line.start(); line = line.with_end(end); self.write_colored_matches(bytes, line, matches, match_index)?; if matches.is_empty() { self.write(b" [... omitted end of long line]")?; } else { let remaining = matches .iter() .filter(|m| { m.start() >= line.end() && m.start() < original.end() }) .count(); let tense = if remaining == 1 { "match" } else { "matches" }; write!( self.wtr().borrow_mut(), " [... {} more {}]", remaining, tense, )?; } self.write_line_term()?; return Ok(()); } if self.sunk.original_matches().is_empty() { if self.is_context() { self.write(b"[Omitted long context line]")?; } else { self.write(b"[Omitted long matching line]")?; } } else { if self.config().only_matching { if self.is_context() { self.write(b"[Omitted long context line]")?; } else { self.write(b"[Omitted long matching line]")?; } } else { write!( self.wtr().borrow_mut(), "[Omitted long line with {} matches]", self.sunk.original_matches().len(), )?; } } self.write_line_term()?; Ok(()) } /// If this printer has a file path associated with it, then this will /// write that path to the underlying writer followed by a line terminator. /// (If a path terminator is set, then that is used instead of the line /// terminator.) fn write_path_line(&self) -> io::Result<()> { if let Some(path) = self.path() { self.write_spec(self.config().colors.path(), path.as_bytes())?; if let Some(term) = self.config().path_terminator { self.write(&[term])?; } else { self.write_line_term()?; } } Ok(()) } /// If this printer has a file path associated with it, then this will /// write that path to the underlying writer followed by the given field /// separator. (If a path terminator is set, then that is used instead of /// the field separator.) fn write_path_field(&self, field_separator: &[u8]) -> io::Result<()> { if let Some(path) = self.path() { self.write_spec(self.config().colors.path(), path.as_bytes())?; if let Some(term) = self.config().path_terminator { self.write(&[term])?; } else { self.write(field_separator)?; } } Ok(()) } fn write_search_prelude(&self) -> io::Result<()> { let this_search_written = self.wtr().borrow().count() > 0; if this_search_written { return Ok(()); } if let Some(ref sep) = *self.config().separator_search { let ever_written = self.wtr().borrow().total_count() > 0; if ever_written { self.write(sep)?; self.write_line_term()?; } } if self.config().heading { self.write_path_line()?; } Ok(()) } fn write_binary_message(&self, offset: u64) -> io::Result<()> { if self.sink.match_count == 0 { return Ok(()); } let bin = self.searcher.binary_detection(); if let Some(byte) = bin.quit_byte() { if let Some(path) = self.path() { self.write_spec(self.config().colors.path(), path.as_bytes())?; self.write(b": ")?; } let remainder = format!( "WARNING: stopped searching binary file after match \ (found {:?} byte around offset {})\n", [byte].as_bstr(), offset, ); self.write(remainder.as_bytes())?; } else if let Some(byte) = bin.convert_byte() { if let Some(path) = self.path() { self.write_spec(self.config().colors.path(), path.as_bytes())?; self.write(b": ")?; } let remainder = format!( "binary file matches (found {:?} byte around offset {})\n", [byte].as_bstr(), offset, ); self.write(remainder.as_bytes())?; } Ok(()) } fn write_context_separator(&self) -> io::Result<()> { if let Some(ref sep) = *self.config().separator_context { self.write(sep)?; self.write_line_term()?; } Ok(()) } fn write_line_number( &self, line_number: u64, field_separator: &[u8], ) -> io::Result<()> { let n = line_number.to_string(); self.write_spec(self.config().colors.line(), n.as_bytes())?; self.write(field_separator)?; Ok(()) } fn write_column_number( &self, column_number: u64, field_separator: &[u8], ) -> io::Result<()> { let n = column_number.to_string(); self.write_spec(self.config().colors.column(), n.as_bytes())?; self.write(field_separator)?; Ok(()) } fn write_byte_offset( &self, offset: u64, field_separator: &[u8], ) -> io::Result<()> { let n = offset.to_string(); self.write_spec(self.config().colors.column(), n.as_bytes())?; self.write(field_separator)?; Ok(()) } fn write_line_term(&self) -> io::Result<()> { self.write(self.searcher.line_terminator().as_bytes()) } fn write_spec(&self, spec: &ColorSpec, buf: &[u8]) -> io::Result<()> { let mut wtr = self.wtr().borrow_mut(); wtr.set_color(spec)?; wtr.write_all(buf)?; wtr.reset()?; Ok(()) } fn start_color_match(&self) -> io::Result<()> { if self.in_color_match.get() { return Ok(()); } self.wtr().borrow_mut().set_color(self.config().colors.matched())?; self.in_color_match.set(true); Ok(()) } fn end_color_match(&self) -> io::Result<()> { if !self.in_color_match.get() { return Ok(()); } self.wtr().borrow_mut().reset()?; self.in_color_match.set(false); Ok(()) } fn write_trim(&self, buf: &[u8]) -> io::Result<()> { if !self.config().trim_ascii { return self.write(buf); } let mut range = Match::new(0, buf.len()); self.trim_ascii_prefix(buf, &mut range); self.write(&buf[range]) } fn write(&self, buf: &[u8]) -> io::Result<()> { self.wtr().borrow_mut().write_all(buf) } fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) { trim_line_terminator(&self.searcher, buf, line); } fn has_line_terminator(&self, buf: &[u8]) -> bool { self.searcher.line_terminator().is_suffix(buf) } fn is_context(&self) -> bool { self.sunk.context_kind().is_some() } /// Return the underlying configuration for this printer. fn config(&self) -> &'a Config { &self.sink.standard.config } /// Return the underlying writer that we are printing to. fn wtr(&self) -> &'a RefCell> { &self.sink.standard.wtr } /// Return the path associated with this printer, if one exists. fn path(&self) -> Option<&'a PrinterPath<'a>> { self.sink.path.as_ref() } /// Return the appropriate field separator based on whether we are emitting /// matching or contextual lines. fn separator_field(&self) -> &[u8] { if self.is_context() { &self.config().separator_field_context } else { &self.config().separator_field_match } } /// Returns true if and only if the given line exceeds the maximum number /// of columns set. If no maximum is set, then this always returns false. fn exceeds_max_columns(&self, line: &[u8]) -> bool { self.config().max_columns.map_or(false, |m| line.len() as u64 > m) } /// Returns true if and only if the searcher may report matches over /// multiple lines. /// /// Note that this doesn't just return whether the searcher is in multi /// line mode, but also checks if the mater can match over multiple lines. /// If it can't, then we don't need multi line handling, even if the /// searcher has multi line mode enabled. fn multi_line(&self) -> bool { self.searcher.multi_line_with_matcher(&self.sink.matcher) } /// Trim prefix ASCII spaces from the given slice and return the /// corresponding range. /// /// This stops trimming a prefix as soon as it sees non-whitespace or a /// line terminator. fn trim_ascii_prefix(&self, slice: &[u8], range: &mut Match) { if !self.config().trim_ascii { return; } let lineterm = self.searcher.line_terminator(); *range = trim_ascii_prefix(lineterm, slice, *range) } } #[cfg(test)] mod tests { use grep_matcher::LineTerminator; use grep_regex::{RegexMatcher, RegexMatcherBuilder}; use grep_searcher::SearcherBuilder; use termcolor::{Ansi, NoColor}; use super::{ColorSpecs, Standard, StandardBuilder}; const SHERLOCK: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached.\ "; #[allow(dead_code)] const SHERLOCK_CRLF: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock\r Holmeses, success in the province of detective work must always\r be, to a very large extent, the result of luck. Sherlock Holmes\r can extract a clew from a wisp of straw or a flake of cigar ash;\r but Doctor Watson has to have it taken out for him and dusted,\r and exhibited clearly, with a label attached.\ "; fn printer_contents(printer: &mut Standard>>) -> String { String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap() } fn printer_contents_ansi(printer: &mut Standard>>) -> String { String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap() } #[test] fn reports_match() { let matcher = RegexMatcher::new("Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); assert!(sink.has_match()); let matcher = RegexMatcher::new("zzzzz").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); assert!(!sink.has_match()); } #[test] fn reports_binary() { use grep_searcher::BinaryDetection; let matcher = RegexMatcher::new("Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); assert!(sink.binary_byte_offset().is_none()); let matcher = RegexMatcher::new(".+").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .binary_detection(BinaryDetection::quit(b'\x00')) .build() .search_reader(&matcher, &b"abc\x00"[..], &mut sink) .unwrap(); assert_eq!(sink.binary_byte_offset(), Some(3)); } #[test] fn reports_stats() { use std::time::Duration; let matcher = RegexMatcher::new("Sherlock|opposed").unwrap(); let mut printer = StandardBuilder::new().stats(true).build(NoColor::new(vec![])); let stats = { let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); sink.stats().unwrap().clone() }; let buf = printer_contents(&mut printer); assert!(stats.elapsed() > Duration::default()); assert_eq!(stats.searches(), 1); assert_eq!(stats.searches_with_match(), 1); assert_eq!(stats.bytes_searched(), SHERLOCK.len() as u64); assert_eq!(stats.bytes_printed(), buf.len() as u64); assert_eq!(stats.matched_lines(), 2); assert_eq!(stats.matches(), 3); } #[test] fn reports_stats_multiple() { use std::time::Duration; let matcher = RegexMatcher::new("Sherlock|opposed").unwrap(); let mut printer = StandardBuilder::new().stats(true).build(NoColor::new(vec![])); let stats = { let mut sink = printer.sink(&matcher); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, &b"zzzzzzzzzz"[..], &mut sink) .unwrap(); SearcherBuilder::new() .line_number(false) .build() .search_reader(&matcher, SHERLOCK.as_bytes(), &mut sink) .unwrap(); sink.stats().unwrap().clone() }; let buf = printer_contents(&mut printer); assert!(stats.elapsed() > Duration::default()); assert_eq!(stats.searches(), 3); assert_eq!(stats.searches_with_match(), 2); assert_eq!(stats.bytes_searched(), 10 + 2 * SHERLOCK.len() as u64); assert_eq!(stats.bytes_printed(), buf.len() as u64); assert_eq!(stats.matched_lines(), 4); assert_eq!(stats.matches(), 6); } #[test] fn context_break() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .separator_context(Some(b"--abc--".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always --abc-- can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn context_break_multiple_no_heading() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .separator_search(Some(b"--xyz--".to_vec())) .separator_context(Some(b"--abc--".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always --abc-- can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. --xyz-- For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always --abc-- can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn context_break_multiple_heading() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .heading(true) .separator_search(Some(b"--xyz--".to_vec())) .separator_context(Some(b"--abc--".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always --abc-- can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. --xyz-- For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always --abc-- can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn path() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().path(false).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the Doctor Watsons of this world, as opposed to the Sherlock 5:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn separator_field() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .separator_field_match(b"!!".to_vec()) .separator_field_context(b"^^".to_vec()) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ sherlock!!For the Doctor Watsons of this world, as opposed to the Sherlock sherlock^^Holmeses, success in the province of detective work must always -- sherlock^^can extract a clew from a wisp of straw or a flake of cigar ash; sherlock!!but Doctor Watson has to have it taken out for him and dusted, sherlock^^and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn separator_path() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .separator_path(Some(b'Z')) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "books/sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ booksZsherlock:For the Doctor Watsons of this world, as opposed to the Sherlock booksZsherlock:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn path_terminator() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .path_terminator(Some(b'Z')) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "books/sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ books/sherlockZFor the Doctor Watsons of this world, as opposed to the Sherlock books/sherlockZbut Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn heading() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().heading(true).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ sherlock For the Doctor Watsons of this world, as opposed to the Sherlock but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn no_heading() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().heading(false).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock sherlock:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn no_heading_multiple() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().heading(false).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let matcher = RegexMatcher::new("Sherlock").unwrap(); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock sherlock:but Doctor Watson has to have it taken out for him and dusted, sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn heading_multiple() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().heading(true).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let matcher = RegexMatcher::new("Sherlock").unwrap(); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ sherlock For the Doctor Watsons of this world, as opposed to the Sherlock but Doctor Watson has to have it taken out for him and dusted, sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn trim_ascii() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .trim_ascii(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, " Watson".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ Watson "; assert_eq_printed!(expected, got); } #[test] fn trim_ascii_multi_line() { let matcher = RegexMatcher::new("(?s:.{0})Watson").unwrap(); let mut printer = StandardBuilder::new() .trim_ascii(true) .stats(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, " Watson".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ Watson "; assert_eq_printed!(expected, got); } #[test] fn trim_ascii_with_line_term() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .trim_ascii(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .before_context(1) .build() .search_reader( &matcher, "\n Watson".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1- 2:Watson "; assert_eq_printed!(expected, got); } #[test] fn line_number() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the Doctor Watsons of this world, as opposed to the Sherlock 5:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn line_number_multi_line() { let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the Doctor Watsons of this world, as opposed to the Sherlock 2:Holmeses, success in the province of detective work must always 3:be, to a very large extent, the result of luck. Sherlock Holmes 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn column_number() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new().column(true).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 16:For the Doctor Watsons of this world, as opposed to the Sherlock 12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn column_number_multi_line() { let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); let mut printer = StandardBuilder::new().column(true).build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 16:For the Doctor Watsons of this world, as opposed to the Sherlock 16:Holmeses, success in the province of detective work must always 16:be, to a very large extent, the result of luck. Sherlock Holmes 16:can extract a clew from a wisp of straw or a flake of cigar ash; 16:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn byte_offset() { let matcher = RegexMatcher::new("Watson").unwrap(); let mut printer = StandardBuilder::new() .byte_offset(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 0:For the Doctor Watsons of this world, as opposed to the Sherlock 258:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn byte_offset_multi_line() { let matcher = RegexMatcher::new("(?s)Watson.+Watson").unwrap(); let mut printer = StandardBuilder::new() .byte_offset(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 0:For the Doctor Watsons of this world, as opposed to the Sherlock 65:Holmeses, success in the province of detective work must always 129:be, to a very large extent, the result of luck. Sherlock Holmes 193:can extract a clew from a wisp of straw or a flake of cigar ash; 258:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn max_columns() { let matcher = RegexMatcher::new("ash|dusted").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(63)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ [Omitted long matching line] but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn max_columns_preview() { let matcher = RegexMatcher::new("exhibited|dusted").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(46)) .max_columns_preview(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ but Doctor Watson has to have it taken out for [... omitted end of long line] and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_columns_with_count() { let matcher = RegexMatcher::new("cigar|ash|dusted").unwrap(); let mut printer = StandardBuilder::new() .stats(true) .max_columns(Some(63)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ [Omitted long line with 2 matches] but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn max_columns_with_count_preview_no_match() { let matcher = RegexMatcher::new("exhibited|has to have it").unwrap(); let mut printer = StandardBuilder::new() .stats(true) .max_columns(Some(46)) .max_columns_preview(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ but Doctor Watson has to have it taken out for [... 0 more matches] and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_columns_with_count_preview_one_match() { let matcher = RegexMatcher::new("exhibited|dusted").unwrap(); let mut printer = StandardBuilder::new() .stats(true) .max_columns(Some(46)) .max_columns_preview(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ but Doctor Watson has to have it taken out for [... 1 more match] and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_columns_with_count_preview_two_matches() { let matcher = RegexMatcher::new("exhibited|dusted|has to have it").unwrap(); let mut printer = StandardBuilder::new() .stats(true) .max_columns(Some(46)) .max_columns_preview(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ but Doctor Watson has to have it taken out for [... 1 more match] and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_columns_multi_line() { let matcher = RegexMatcher::new("(?s)ash.+dusted").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(63)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ [Omitted long matching line] but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn max_columns_multi_line_preview() { let matcher = RegexMatcher::new("(?s)clew|cigar ash.+have it|exhibited") .unwrap(); let mut printer = StandardBuilder::new() .stats(true) .max_columns(Some(46)) .max_columns_preview(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ can extract a clew from a wisp of straw or a f [... 1 more match] but Doctor Watson has to have it taken out for [... 0 more matches] and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_matches() { let matcher = RegexMatcher::new("Sherlock").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock "; assert_eq_printed!(expected, got); } #[test] fn max_matches_context() { // after context: 1 let matcher = RegexMatcher::new("Doctor Watsons").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always "; assert_eq_printed!(expected, got); // after context: 4 let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .after_context(4) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); // after context: 1, max matches: 2 let matcher = RegexMatcher::new("Doctor Watsons|but Doctor").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(2)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always -- but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); // after context: 4, max matches: 2 let mut printer = StandardBuilder::new() .max_matches(Some(2)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .after_context(4) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn max_matches_multi_line1() { let matcher = RegexMatcher::new("(?s:.{0})Sherlock").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock "; assert_eq_printed!(expected, got); } #[test] fn max_matches_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .multi_line(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always "; assert_eq_printed!(expected, got); } #[test] fn only_matching() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:Doctor Watsons 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_multi_line1() { let matcher = RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:Doctor Watsons 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:Watsons of this world, as opposed to the Sherlock 2:16:Holmeses 5:12:Watson has to have it taken out for him and dusted, 6:12:and exhibited clearly "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(10)) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:[Omitted long matching line] 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns_preview() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(10)) .max_columns_preview(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:Doctor Wat [... 0 more matches] 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns_multi_line1() { // The `(?s:.{0})` trick fools the matcher into thinking that it // can match across multiple lines without actually doing so. This is // so we can test multi-line handling in the case of a match on only // one line. let matcher = RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(10)) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:[Omitted long matching line] 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns_preview_multi_line1() { // The `(?s:.{0})` trick fools the matcher into thinking that it // can match across multiple lines without actually doing so. This is // so we can test multi-line handling in the case of a match on only // one line. let matcher = RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(10)) .max_columns_preview(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:Doctor Wat [... 0 more matches] 1:57:Sherlock 3:49:Sherlock "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(50)) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:Watsons of this world, as opposed to the Sherlock 2:16:Holmeses 5:12:[Omitted long matching line] 6:12:and exhibited clearly "; assert_eq_printed!(expected, got); } #[test] fn only_matching_max_columns_preview_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .max_columns(Some(50)) .max_columns_preview(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:Watsons of this world, as opposed to the Sherlock 2:16:Holmeses 5:12:Watson has to have it taken out for him and dusted [... 0 more matches] 6:12:and exhibited clearly "; assert_eq_printed!(expected, got); } #[test] fn per_match() { let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:For the Doctor Watsons of this world, as opposed to the Sherlock 1:57:For the Doctor Watsons of this world, as opposed to the Sherlock 3:49:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line1() { let matcher = RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:For the Doctor Watsons of this world, as opposed to the Sherlock 1:57:For the Doctor Watsons of this world, as opposed to the Sherlock 3:49:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line2() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:For the Doctor Watsons of this world, as opposed to the Sherlock 2:1:Holmeses, success in the province of detective work must always 5:12:but Doctor Watson has to have it taken out for him and dusted, 6:1:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line3() { let matcher = RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:For the Doctor Watsons of this world, as opposed to the Sherlock 2:1:Holmeses, success in the province of detective work must always 2:58:Holmeses, success in the province of detective work must always 3:1:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line1_only_first_line() { let matcher = RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .per_match_one_line(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:9:For the Doctor Watsons of this world, as opposed to the Sherlock 1:57:For the Doctor Watsons of this world, as opposed to the Sherlock 3:49:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line2_only_first_line() { let matcher = RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .per_match_one_line(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:For the Doctor Watsons of this world, as opposed to the Sherlock 5:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn per_match_multi_line3_only_first_line() { let matcher = RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .per_match_one_line(true) .column(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:16:For the Doctor Watsons of this world, as opposed to the Sherlock 2:58:Holmeses, success in the province of detective work must always "; assert_eq_printed!(expected, got); } #[test] fn replacement_passthru() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .passthru(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the doctah Watsons MD of this world, as opposed to the doctah MD 2-Holmeses, success in the province of detective work must always 3:be, to a very large extent, the result of luck. doctah MD Holmes 4-can extract a clew from a wisp of straw or a flake of cigar ash; 5:but doctah Watson MD has to have it taken out for him and dusted, 6-and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn replacement() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the doctah Watsons MD of this world, as opposed to the doctah MD 3:be, to a very large extent, the result of luck. doctah MD Holmes 5:but doctah Watson MD has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } // This is a somewhat weird test that checks the behavior of attempting // to replace a line terminator with something else. // // See: https://github.com/BurntSushi/ripgrep/issues/1311 #[test] fn replacement_multi_line() { let matcher = RegexMatcher::new(r"\n").unwrap(); let mut printer = StandardBuilder::new() .replacement(Some(b"?".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .multi_line(true) .build() .search_reader( &matcher, "hello\nworld\n".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "1:hello?world?\n"; assert_eq_printed!(expected, got); } #[test] fn replacement_multi_line_diff_line_term() { let matcher = RegexMatcherBuilder::new() .line_terminator(Some(b'\x00')) .build(r"\n") .unwrap(); let mut printer = StandardBuilder::new() .replacement(Some(b"?".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_terminator(LineTerminator::byte(b'\x00')) .line_number(true) .multi_line(true) .build() .search_reader( &matcher, "hello\nworld\n".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "1:hello?world?\x00"; assert_eq_printed!(expected, got); } #[test] fn replacement_multi_line_combine_lines() { let matcher = RegexMatcher::new(r"\n(.)?").unwrap(); let mut printer = StandardBuilder::new() .replacement(Some(b"?$1".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .multi_line(true) .build() .search_reader( &matcher, "hello\nworld\n".as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "1:hello?world?\n"; assert_eq_printed!(expected, got); } #[test] fn replacement_max_columns() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(67)) .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:[Omitted long line with 2 matches] 3:be, to a very large extent, the result of luck. doctah MD Holmes 5:but doctah Watson MD has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn replacement_max_columns_preview1() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(67)) .max_columns_preview(true) .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the doctah Watsons MD of this world, as opposed to the doctah [... 0 more matches] 3:be, to a very large extent, the result of luck. doctah MD Holmes 5:but doctah Watson MD has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn replacement_max_columns_preview2() { let matcher = RegexMatcher::new("exhibited|dusted|has to have it").unwrap(); let mut printer = StandardBuilder::new() .max_columns(Some(43)) .max_columns_preview(true) .replacement(Some(b"xxx".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(false) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ but Doctor Watson xxx taken out for him and [... 1 more match] and xxx clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn replacement_only_matching() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:doctah Watsons MD 1:doctah MD 3:doctah MD 5:doctah Watson MD "; assert_eq_printed!(expected, got); } #[test] fn replacement_per_match() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); let mut printer = StandardBuilder::new() .per_match(true) .replacement(Some(b"doctah $1 MD".to_vec())) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1:For the doctah Watsons MD of this world, as opposed to the doctah MD 1:For the doctah Watsons MD of this world, as opposed to the doctah MD 3:be, to a very large extent, the result of luck. doctah MD Holmes 5:but doctah Watson MD has to have it taken out for him and dusted, "; assert_eq_printed!(expected, got); } #[test] fn invert() { let matcher = RegexMatcher::new(r"Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .invert_match(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 2:Holmeses, success in the province of detective work must always 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn invert_multi_line() { let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .invert_match(true) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 2:Holmeses, success in the province of detective work must always 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn invert_context() { let matcher = RegexMatcher::new(r"Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .invert_match(true) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1-For the Doctor Watsons of this world, as opposed to the Sherlock 2:Holmeses, success in the province of detective work must always 3-be, to a very large extent, the result of luck. Sherlock Holmes 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn invert_context_multi_line() { let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .invert_match(true) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1-For the Doctor Watsons of this world, as opposed to the Sherlock 2:Holmeses, success in the province of detective work must always 3-be, to a very large extent, the result of luck. Sherlock Holmes 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn invert_context_only_matching() { let matcher = RegexMatcher::new(r"Sherlock").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .invert_match(true) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1-Sherlock 2:Holmeses, success in the province of detective work must always 3-Sherlock 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn invert_context_only_matching_multi_line() { let matcher = RegexMatcher::new(r"(?s:.{0})Sherlock").unwrap(); let mut printer = StandardBuilder::new() .only_matching(true) .build(NoColor::new(vec![])); SearcherBuilder::new() .multi_line(true) .line_number(true) .invert_match(true) .before_context(1) .after_context(1) .build() .search_reader( &matcher, SHERLOCK.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "\ 1-Sherlock 2:Holmeses, success in the province of detective work must always 3-Sherlock 4:can extract a clew from a wisp of straw or a flake of cigar ash; 5:but Doctor Watson has to have it taken out for him and dusted, 6:and exhibited clearly, with a label attached. "; assert_eq_printed!(expected, got); } #[test] fn regression_search_empty_with_crlf() { let matcher = RegexMatcherBuilder::new().crlf(true).build(r"x?").unwrap(); let mut printer = StandardBuilder::new() .color_specs(ColorSpecs::default_with_color()) .build(Ansi::new(vec![])); SearcherBuilder::new() .line_terminator(LineTerminator::crlf()) .build() .search_reader(&matcher, &b"\n"[..], printer.sink(&matcher)) .unwrap(); let got = printer_contents_ansi(&mut printer); assert!(!got.is_empty()); } #[test] fn regression_after_context_with_match() { let haystack = "\ a b c d e d e d e d e "; let matcher = RegexMatcherBuilder::new().build(r"d").unwrap(); let mut printer = StandardBuilder::new() .max_matches(Some(1)) .build(NoColor::new(vec![])); SearcherBuilder::new() .line_number(true) .after_context(2) .build() .search_reader( &matcher, haystack.as_bytes(), printer.sink(&matcher), ) .unwrap(); let got = printer_contents(&mut printer); let expected = "4:d\n5-e\n6:d\n"; assert_eq_printed!(expected, got); } } grep-printer-0.1.6/src/stats.rs000064400000000000000000000077760072674642500146120ustar 00000000000000use std::ops::{Add, AddAssign}; use std::time::Duration; use crate::util::NiceDuration; /// Summary statistics produced at the end of a search. /// /// When statistics are reported by a printer, they correspond to all searches /// executed with that printer. #[derive(Clone, Debug, Default, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(serde::Serialize))] pub struct Stats { elapsed: NiceDuration, searches: u64, searches_with_match: u64, bytes_searched: u64, bytes_printed: u64, matched_lines: u64, matches: u64, } impl Add for Stats { type Output = Stats; fn add(self, rhs: Stats) -> Stats { self + &rhs } } impl<'a> Add<&'a Stats> for Stats { type Output = Stats; fn add(self, rhs: &'a Stats) -> Stats { Stats { elapsed: NiceDuration(self.elapsed.0 + rhs.elapsed.0), searches: self.searches + rhs.searches, searches_with_match: self.searches_with_match + rhs.searches_with_match, bytes_searched: self.bytes_searched + rhs.bytes_searched, bytes_printed: self.bytes_printed + rhs.bytes_printed, matched_lines: self.matched_lines + rhs.matched_lines, matches: self.matches + rhs.matches, } } } impl AddAssign for Stats { fn add_assign(&mut self, rhs: Stats) { *self += &rhs; } } impl<'a> AddAssign<&'a Stats> for Stats { fn add_assign(&mut self, rhs: &'a Stats) { self.elapsed.0 += rhs.elapsed.0; self.searches += rhs.searches; self.searches_with_match += rhs.searches_with_match; self.bytes_searched += rhs.bytes_searched; self.bytes_printed += rhs.bytes_printed; self.matched_lines += rhs.matched_lines; self.matches += rhs.matches; } } impl Stats { /// Return a new value for tracking aggregate statistics across searches. /// /// All statistics are set to `0`. pub fn new() -> Stats { Stats::default() } /// Return the total amount of time elapsed. pub fn elapsed(&self) -> Duration { self.elapsed.0 } /// Return the total number of searches executed. pub fn searches(&self) -> u64 { self.searches } /// Return the total number of searches that found at least one match. pub fn searches_with_match(&self) -> u64 { self.searches_with_match } /// Return the total number of bytes searched. pub fn bytes_searched(&self) -> u64 { self.bytes_searched } /// Return the total number of bytes printed. pub fn bytes_printed(&self) -> u64 { self.bytes_printed } /// Return the total number of lines that participated in a match. /// /// When matches may contain multiple lines then this includes every line /// that is part of every match. pub fn matched_lines(&self) -> u64 { self.matched_lines } /// Return the total number of matches. /// /// There may be multiple matches per line. pub fn matches(&self) -> u64 { self.matches } /// Add to the elapsed time. pub fn add_elapsed(&mut self, duration: Duration) { self.elapsed.0 += duration; } /// Add to the number of searches executed. pub fn add_searches(&mut self, n: u64) { self.searches += n; } /// Add to the number of searches that found at least one match. pub fn add_searches_with_match(&mut self, n: u64) { self.searches_with_match += n; } /// Add to the total number of bytes searched. pub fn add_bytes_searched(&mut self, n: u64) { self.bytes_searched += n; } /// Add to the total number of bytes printed. pub fn add_bytes_printed(&mut self, n: u64) { self.bytes_printed += n; } /// Add to the total number of lines that participated in a match. pub fn add_matched_lines(&mut self, n: u64) { self.matched_lines += n; } /// Add to the total number of matches. pub fn add_matches(&mut self, n: u64) { self.matches += n; } } grep-printer-0.1.6/src/summary.rs000064400000000000000000001125730072674642500151410ustar 00000000000000use std::cell::RefCell; use std::io::{self, Write}; use std::path::Path; use std::sync::Arc; use std::time::Instant; use grep_matcher::Matcher; use grep_searcher::{Searcher, Sink, SinkError, SinkFinish, SinkMatch}; use termcolor::{ColorSpec, NoColor, WriteColor}; use crate::color::ColorSpecs; use crate::counter::CounterWriter; use crate::stats::Stats; use crate::util::{find_iter_at_in_context, PrinterPath}; /// The configuration for the summary printer. /// /// This is manipulated by the SummaryBuilder and then referenced by the actual /// implementation. Once a printer is build, the configuration is frozen and /// cannot changed. #[derive(Debug, Clone)] struct Config { kind: SummaryKind, colors: ColorSpecs, stats: bool, path: bool, max_matches: Option, exclude_zero: bool, separator_field: Arc>, separator_path: Option, path_terminator: Option, } impl Default for Config { fn default() -> Config { Config { kind: SummaryKind::Count, colors: ColorSpecs::default(), stats: false, path: true, max_matches: None, exclude_zero: true, separator_field: Arc::new(b":".to_vec()), separator_path: None, path_terminator: None, } } } /// The type of summary output (if any) to print. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum SummaryKind { /// Show only a count of the total number of matches (counting each line /// at most once) found. /// /// If the `path` setting is enabled, then the count is prefixed by the /// corresponding file path. Count, /// Show only a count of the total number of matches (counting possibly /// many matches on each line) found. /// /// If the `path` setting is enabled, then the count is prefixed by the /// corresponding file path. CountMatches, /// Show only the file path if and only if a match was found. /// /// This ignores the `path` setting and always shows the file path. If no /// file path is provided, then searching will immediately stop and return /// an error. PathWithMatch, /// Show only the file path if and only if a match was found. /// /// This ignores the `path` setting and always shows the file path. If no /// file path is provided, then searching will immediately stop and return /// an error. PathWithoutMatch, /// Don't show any output and the stop the search once a match is found. /// /// Note that if `stats` is enabled, then searching continues in order to /// compute statistics. Quiet, } impl SummaryKind { /// Returns true if and only if this output mode requires a file path. /// /// When an output mode requires a file path, then the summary printer /// will report an error at the start of every search that lacks a file /// path. fn requires_path(&self) -> bool { use self::SummaryKind::*; match *self { PathWithMatch | PathWithoutMatch => true, Count | CountMatches | Quiet => false, } } /// Returns true if and only if this output mode requires computing /// statistics, regardless of whether they have been enabled or not. fn requires_stats(&self) -> bool { use self::SummaryKind::*; match *self { CountMatches => true, Count | PathWithMatch | PathWithoutMatch | Quiet => false, } } /// Returns true if and only if a printer using this output mode can /// quit after seeing the first match. fn quit_early(&self) -> bool { use self::SummaryKind::*; match *self { PathWithMatch | Quiet => true, Count | CountMatches | PathWithoutMatch => false, } } } /// A builder for summary printer. /// /// The builder permits configuring how the printer behaves. The summary /// printer has fewer configuration options than the standard printer because /// it aims to produce aggregate output about a single search (typically just /// one line) instead of output for each match. /// /// Once a `Summary` printer is built, its configuration cannot be changed. #[derive(Clone, Debug)] pub struct SummaryBuilder { config: Config, } impl SummaryBuilder { /// Return a new builder for configuring the summary printer. pub fn new() -> SummaryBuilder { SummaryBuilder { config: Config::default() } } /// Build a printer using any implementation of `termcolor::WriteColor`. /// /// The implementation of `WriteColor` used here controls whether colors /// are used or not when colors have been configured using the /// `color_specs` method. /// /// For maximum portability, callers should generally use either /// `termcolor::StandardStream` or `termcolor::BufferedStandardStream` /// where appropriate, which will automatically enable colors on Windows /// when possible. /// /// However, callers may also provide an arbitrary writer using the /// `termcolor::Ansi` or `termcolor::NoColor` wrappers, which always enable /// colors via ANSI escapes or always disable colors, respectively. /// /// As a convenience, callers may use `build_no_color` to automatically /// select the `termcolor::NoColor` wrapper to avoid needing to import /// from `termcolor` explicitly. pub fn build(&self, wtr: W) -> Summary { Summary { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), } } /// Build a printer from any implementation of `io::Write` and never emit /// any colors, regardless of the user color specification settings. /// /// This is a convenience routine for /// `SummaryBuilder::build(termcolor::NoColor::new(wtr))`. pub fn build_no_color(&self, wtr: W) -> Summary> { self.build(NoColor::new(wtr)) } /// Set the output mode for this printer. /// /// The output mode controls how aggregate results of a search are printed. /// /// By default, this printer uses the `Count` mode. pub fn kind(&mut self, kind: SummaryKind) -> &mut SummaryBuilder { self.config.kind = kind; self } /// Set the user color specifications to use for coloring in this printer. /// /// A [`UserColorSpec`](struct.UserColorSpec.html) can be constructed from /// a string in accordance with the color specification format. See the /// `UserColorSpec` type documentation for more details on the format. /// A [`ColorSpecs`](struct.ColorSpecs.html) can then be generated from /// zero or more `UserColorSpec`s. /// /// Regardless of the color specifications provided here, whether color /// is actually used or not is determined by the implementation of /// `WriteColor` provided to `build`. For example, if `termcolor::NoColor` /// is provided to `build`, then no color will ever be printed regardless /// of the color specifications provided here. /// /// This completely overrides any previous color specifications. This does /// not add to any previously provided color specifications on this /// builder. /// /// The default color specifications provide no styling. pub fn color_specs(&mut self, specs: ColorSpecs) -> &mut SummaryBuilder { self.config.colors = specs; self } /// Enable the gathering of various aggregate statistics. /// /// When this is enabled (it's disabled by default), statistics will be /// gathered for all uses of `Summary` printer returned by `build`, /// including but not limited to, the total number of matches, the total /// number of bytes searched and the total number of bytes printed. /// /// Aggregate statistics can be accessed via the sink's /// [`SummarySink::stats`](struct.SummarySink.html#method.stats) /// method. /// /// When this is enabled, this printer may need to do extra work in order /// to compute certain statistics, which could cause the search to take /// longer. For example, in `Quiet` mode, a search can quit after finding /// the first match, but if `stats` is enabled, then the search will /// continue after the first match in order to compute statistics. /// /// For a complete description of available statistics, see /// [`Stats`](struct.Stats.html). /// /// Note that some output modes, such as `CountMatches`, automatically /// enable this option even if it has been explicitly disabled. pub fn stats(&mut self, yes: bool) -> &mut SummaryBuilder { self.config.stats = yes; self } /// When enabled, if a path was given to the printer, then it is shown in /// the output (either as a heading or as a prefix to each matching line). /// When disabled, then no paths are ever included in the output even when /// a path is provided to the printer. /// /// This setting has no effect in `PathWithMatch` and `PathWithoutMatch` /// modes. /// /// This is enabled by default. pub fn path(&mut self, yes: bool) -> &mut SummaryBuilder { self.config.path = yes; self } /// Set the maximum amount of matches that are printed. /// /// If multi line search is enabled and a match spans multiple lines, then /// that match is counted exactly once for the purposes of enforcing this /// limit, regardless of how many lines it spans. /// /// This is disabled by default. pub fn max_matches(&mut self, limit: Option) -> &mut SummaryBuilder { self.config.max_matches = limit; self } /// Exclude count-related summary results with no matches. /// /// When enabled and the mode is either `Count` or `CountMatches`, then /// results are not printed if no matches were found. Otherwise, every /// search prints a result with a possibly `0` number of matches. /// /// This is enabled by default. pub fn exclude_zero(&mut self, yes: bool) -> &mut SummaryBuilder { self.config.exclude_zero = yes; self } /// Set the separator used between fields for the `Count` and /// `CountMatches` modes. /// /// By default, this is set to `:`. pub fn separator_field(&mut self, sep: Vec) -> &mut SummaryBuilder { self.config.separator_field = Arc::new(sep); self } /// Set the path separator used when printing file paths. /// /// Typically, printing is done by emitting the file path as is. However, /// this setting provides the ability to use a different path separator /// from what the current environment has configured. /// /// A typical use for this option is to permit cygwin users on Windows to /// set the path separator to `/` instead of using the system default of /// `\`. /// /// This is disabled by default. pub fn separator_path(&mut self, sep: Option) -> &mut SummaryBuilder { self.config.separator_path = sep; self } /// Set the path terminator used. /// /// The path terminator is a byte that is printed after every file path /// emitted by this printer. /// /// If no path terminator is set (the default), then paths are terminated /// by either new lines or the configured field separator. pub fn path_terminator( &mut self, terminator: Option, ) -> &mut SummaryBuilder { self.config.path_terminator = terminator; self } } /// The summary printer, which emits aggregate results from a search. /// /// Aggregate results generally correspond to file paths and/or the number of /// matches found. /// /// A default printer can be created with either of the `Summary::new` or /// `Summary::new_no_color` constructors. However, there are a number of /// options that configure this printer's output. Those options can be /// configured using [`SummaryBuilder`](struct.SummaryBuilder.html). /// /// This type is generic over `W`, which represents any implementation of /// the `termcolor::WriteColor` trait. #[derive(Debug)] pub struct Summary { config: Config, wtr: RefCell>, } impl Summary { /// Return a summary printer with a default configuration that writes /// matches to the given writer. /// /// The writer should be an implementation of `termcolor::WriteColor` /// and not just a bare implementation of `io::Write`. To use a normal /// `io::Write` implementation (simultaneously sacrificing colors), use /// the `new_no_color` constructor. /// /// The default configuration uses the `Count` summary mode. pub fn new(wtr: W) -> Summary { SummaryBuilder::new().build(wtr) } } impl Summary> { /// Return a summary printer with a default configuration that writes /// matches to the given writer. /// /// The writer can be any implementation of `io::Write`. With this /// constructor, the printer will never emit colors. /// /// The default configuration uses the `Count` summary mode. pub fn new_no_color(wtr: W) -> Summary> { SummaryBuilder::new().build_no_color(wtr) } } impl Summary { /// Return an implementation of `Sink` for the summary printer. /// /// This does not associate the printer with a file path, which means this /// implementation will never print a file path. If the output mode of /// this summary printer does not make sense without a file path (such as /// `PathWithMatch` or `PathWithoutMatch`), then any searches executed /// using this sink will immediately quit with an error. pub fn sink<'s, M: Matcher>( &'s mut self, matcher: M, ) -> SummarySink<'static, 's, M, W> { let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { None }; SummarySink { matcher: matcher, summary: self, path: None, start_time: Instant::now(), match_count: 0, binary_byte_offset: None, stats: stats, } } /// Return an implementation of `Sink` associated with a file path. /// /// When the printer is associated with a path, then it may, depending on /// its configuration, print the path. pub fn sink_with_path<'p, 's, M, P>( &'s mut self, matcher: M, path: &'p P, ) -> SummarySink<'p, 's, M, W> where M: Matcher, P: ?Sized + AsRef, { if !self.config.path && !self.config.kind.requires_path() { return self.sink(matcher); } let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { None }; let ppath = PrinterPath::with_separator( path.as_ref(), self.config.separator_path, ); SummarySink { matcher: matcher, summary: self, path: Some(ppath), start_time: Instant::now(), match_count: 0, binary_byte_offset: None, stats: stats, } } } impl Summary { /// Returns true if and only if this printer has written at least one byte /// to the underlying writer during any of the previous searches. pub fn has_written(&self) -> bool { self.wtr.borrow().total_count() > 0 } /// Return a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { self.wtr.get_mut().get_mut() } /// Consume this printer and return back ownership of the underlying /// writer. pub fn into_inner(self) -> W { self.wtr.into_inner().into_inner() } } /// An implementation of `Sink` associated with a matcher and an optional file /// path for the summary printer. /// /// This type is generic over a few type parameters: /// /// * `'p` refers to the lifetime of the file path, if one is provided. When /// no file path is given, then this is `'static`. /// * `'s` refers to the lifetime of the /// [`Summary`](struct.Summary.html) /// printer that this type borrows. /// * `M` refers to the type of matcher used by /// `grep_searcher::Searcher` that is reporting results to this sink. /// * `W` refers to the underlying writer that this printer is writing its /// output to. #[derive(Debug)] pub struct SummarySink<'p, 's, M: Matcher, W> { matcher: M, summary: &'s mut Summary, path: Option>, start_time: Instant, match_count: u64, binary_byte_offset: Option, stats: Option, } impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> { /// Returns true if and only if this printer received a match in the /// previous search. /// /// This is unaffected by the result of searches before the previous /// search. pub fn has_match(&self) -> bool { match self.summary.config.kind { SummaryKind::PathWithoutMatch => self.match_count == 0, _ => self.match_count > 0, } } /// If binary data was found in the previous search, this returns the /// offset at which the binary data was first detected. /// /// The offset returned is an absolute offset relative to the entire /// set of bytes searched. /// /// This is unaffected by the result of searches before the previous /// search. e.g., If the search prior to the previous search found binary /// data but the previous search found no binary data, then this will /// return `None`. pub fn binary_byte_offset(&self) -> Option { self.binary_byte_offset } /// Return a reference to the stats produced by the printer for all /// searches executed on this sink. /// /// This only returns stats if they were requested via the /// [`SummaryBuilder`](struct.SummaryBuilder.html) /// configuration. pub fn stats(&self) -> Option<&Stats> { self.stats.as_ref() } /// Returns true if and only if the searcher may report matches over /// multiple lines. /// /// Note that this doesn't just return whether the searcher is in multi /// line mode, but also checks if the mater can match over multiple lines. /// If it can't, then we don't need multi line handling, even if the /// searcher has multi line mode enabled. fn multi_line(&self, searcher: &Searcher) -> bool { searcher.multi_line_with_matcher(&self.matcher) } /// Returns true if this printer should quit. /// /// This implements the logic for handling quitting after seeing a certain /// amount of matches. In most cases, the logic is simple, but we must /// permit all "after" contextual lines to print after reaching the limit. fn should_quit(&self) -> bool { let limit = match self.summary.config.max_matches { None => return false, Some(limit) => limit, }; self.match_count >= limit } /// If this printer has a file path associated with it, then this will /// write that path to the underlying writer followed by a line terminator. /// (If a path terminator is set, then that is used instead of the line /// terminator.) fn write_path_line(&self, searcher: &Searcher) -> io::Result<()> { if let Some(ref path) = self.path { self.write_spec( self.summary.config.colors.path(), path.as_bytes(), )?; if let Some(term) = self.summary.config.path_terminator { self.write(&[term])?; } else { self.write_line_term(searcher)?; } } Ok(()) } /// If this printer has a file path associated with it, then this will /// write that path to the underlying writer followed by the field /// separator. (If a path terminator is set, then that is used instead of /// the field separator.) fn write_path_field(&self) -> io::Result<()> { if let Some(ref path) = self.path { self.write_spec( self.summary.config.colors.path(), path.as_bytes(), )?; if let Some(term) = self.summary.config.path_terminator { self.write(&[term])?; } else { self.write(&self.summary.config.separator_field)?; } } Ok(()) } /// Write the line terminator configured on the given searcher. fn write_line_term(&self, searcher: &Searcher) -> io::Result<()> { self.write(searcher.line_terminator().as_bytes()) } /// Write the given bytes using the give style. fn write_spec(&self, spec: &ColorSpec, buf: &[u8]) -> io::Result<()> { self.summary.wtr.borrow_mut().set_color(spec)?; self.write(buf)?; self.summary.wtr.borrow_mut().reset()?; Ok(()) } /// Write all of the given bytes. fn write(&self, buf: &[u8]) -> io::Result<()> { self.summary.wtr.borrow_mut().write_all(buf) } } impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { type Error = io::Error; fn matched( &mut self, searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { let is_multi_line = self.multi_line(searcher); let sink_match_count = if self.stats.is_none() && !is_multi_line { 1 } else { // This gives us as many bytes as the searcher can offer. This // isn't guaranteed to hold the necessary context to get match // detection correct (because of look-around), but it does in // practice. let buf = mat.buffer(); let range = mat.bytes_range_in_buffer(); let mut count = 0; find_iter_at_in_context( searcher, &self.matcher, buf, range, |_| { count += 1; true }, )?; count }; if is_multi_line { self.match_count += sink_match_count; } else { self.match_count += 1; } if let Some(ref mut stats) = self.stats { stats.add_matches(sink_match_count); stats.add_matched_lines(mat.lines().count() as u64); } else if self.summary.config.kind.quit_early() { return Ok(false); } Ok(!self.should_quit()) } fn begin(&mut self, _searcher: &Searcher) -> Result { if self.path.is_none() && self.summary.config.kind.requires_path() { return Err(io::Error::error_message(format!( "output kind {:?} requires a file path", self.summary.config.kind, ))); } self.summary.wtr.borrow_mut().reset_count(); self.start_time = Instant::now(); self.match_count = 0; self.binary_byte_offset = None; if self.summary.config.max_matches == Some(0) { return Ok(false); } Ok(true) } fn finish( &mut self, searcher: &Searcher, finish: &SinkFinish, ) -> Result<(), io::Error> { self.binary_byte_offset = finish.binary_byte_offset(); if let Some(ref mut stats) = self.stats { stats.add_elapsed(self.start_time.elapsed()); stats.add_searches(1); if self.match_count > 0 { stats.add_searches_with_match(1); } stats.add_bytes_searched(finish.byte_count()); stats.add_bytes_printed(self.summary.wtr.borrow().count()); } // If our binary detection method says to quit after seeing binary // data, then we shouldn't print any results at all, even if we've // found a match before detecting binary data. The intent here is to // keep BinaryDetection::quit as a form of filter. Otherwise, we can // present a matching file with a smaller number of matches than // there might be, which can be quite misleading. // // If our binary detection method is to convert binary data, then we // don't quit and therefore search the entire contents of the file. // // There is an unfortunate inconsistency here. Namely, when using // Quiet or PathWithMatch, then the printer can quit after the first // match seen, which could be long before seeing binary data. This // means that using PathWithMatch can print a path where as using // Count might not print it at all because of binary data. // // It's not possible to fix this without also potentially significantly // impacting the performance of Quiet or PathWithMatch, so we accept // the bug. if self.binary_byte_offset.is_some() && searcher.binary_detection().quit_byte().is_some() { // Squash the match count. The statistics reported will still // contain the match count, but the "official" match count should // be zero. self.match_count = 0; return Ok(()); } let show_count = !self.summary.config.exclude_zero || self.match_count > 0; match self.summary.config.kind { SummaryKind::Count => { if show_count { self.write_path_field()?; self.write(self.match_count.to_string().as_bytes())?; self.write_line_term(searcher)?; } } SummaryKind::CountMatches => { if show_count { let stats = self .stats .as_ref() .expect("CountMatches should enable stats tracking"); self.write_path_field()?; self.write(stats.matches().to_string().as_bytes())?; self.write_line_term(searcher)?; } } SummaryKind::PathWithMatch => { if self.match_count > 0 { self.write_path_line(searcher)?; } } SummaryKind::PathWithoutMatch => { if self.match_count == 0 { self.write_path_line(searcher)?; } } SummaryKind::Quiet => {} } Ok(()) } } #[cfg(test)] mod tests { use grep_regex::RegexMatcher; use grep_searcher::SearcherBuilder; use termcolor::NoColor; use super::{Summary, SummaryBuilder, SummaryKind}; const SHERLOCK: &'static [u8] = b"\ For the Doctor Watsons of this world, as opposed to the Sherlock Holmeses, success in the province of detective work must always be, to a very large extent, the result of luck. Sherlock Holmes can extract a clew from a wisp of straw or a flake of cigar ash; but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; fn printer_contents(printer: &mut Summary>>) -> String { String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap() } #[test] fn path_with_match_error() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithMatch) .build_no_color(vec![]); let res = SearcherBuilder::new().build().search_reader( &matcher, SHERLOCK, printer.sink(&matcher), ); assert!(res.is_err()); } #[test] fn path_without_match_error() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithoutMatch) .build_no_color(vec![]); let res = SearcherBuilder::new().build().search_reader( &matcher, SHERLOCK, printer.sink(&matcher), ); assert!(res.is_err()); } #[test] fn count_no_path() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("2\n", got); } #[test] fn count_no_path_even_with_path() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .path(false) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("2\n", got); } #[test] fn count_path() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock:2\n", got); } #[test] fn count_path_with_zero() { let matcher = RegexMatcher::new(r"NO MATCH").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .exclude_zero(false) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock:0\n", got); } #[test] fn count_path_without_zero() { let matcher = RegexMatcher::new(r"NO MATCH").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .exclude_zero(true) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("", got); } #[test] fn count_path_field_separator() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .separator_field(b"ZZ".to_vec()) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlockZZ2\n", got); } #[test] fn count_path_terminator() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .path_terminator(Some(b'\x00')) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock\x002\n", got); } #[test] fn count_path_separator() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .separator_path(Some(b'\\')) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "/home/andrew/sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("\\home\\andrew\\sherlock:2\n", got); } #[test] fn count_max_matches() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Count) .max_matches(Some(1)) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, printer.sink(&matcher)) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("1\n", got); } #[test] fn count_matches() { let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::CountMatches) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock:4\n", got); } #[test] fn path_with_match_found() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithMatch) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock\n", got); } #[test] fn path_with_match_not_found() { let matcher = RegexMatcher::new(r"ZZZZZZZZ").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithMatch) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("", got); } #[test] fn path_without_match_found() { let matcher = RegexMatcher::new(r"ZZZZZZZZZ").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithoutMatch) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("sherlock\n", got); } #[test] fn path_without_match_not_found() { let matcher = RegexMatcher::new(r"Watson").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::PathWithoutMatch) .build_no_color(vec![]); SearcherBuilder::new() .build() .search_reader( &matcher, SHERLOCK, printer.sink_with_path(&matcher, "sherlock"), ) .unwrap(); let got = printer_contents(&mut printer); assert_eq_printed!("", got); } #[test] fn quiet() { let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Quiet) .build_no_color(vec![]); let match_count = { let mut sink = printer.sink_with_path(&matcher, "sherlock"); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, &mut sink) .unwrap(); sink.match_count }; let got = printer_contents(&mut printer); assert_eq_printed!("", got); // There is actually more than one match, but Quiet should quit after // finding the first one. assert_eq!(1, match_count); } #[test] fn quiet_with_stats() { let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap(); let mut printer = SummaryBuilder::new() .kind(SummaryKind::Quiet) .stats(true) .build_no_color(vec![]); let match_count = { let mut sink = printer.sink_with_path(&matcher, "sherlock"); SearcherBuilder::new() .build() .search_reader(&matcher, SHERLOCK, &mut sink) .unwrap(); sink.match_count }; let got = printer_contents(&mut printer); assert_eq_printed!("", got); // There is actually more than one match, and Quiet will usually quit // after finding the first one, but since we request stats, it will // mush on to find all matches. assert_eq!(3, match_count); } } grep-printer-0.1.6/src/util.rs000064400000000000000000000365730072674642500144260ustar 00000000000000use std::borrow::Cow; use std::fmt; use std::io; use std::path::Path; use std::time; use bstr::{ByteSlice, ByteVec}; use grep_matcher::{Captures, LineTerminator, Match, Matcher}; use grep_searcher::{ LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch, }; #[cfg(feature = "serde1")] use serde::{Serialize, Serializer}; use crate::MAX_LOOK_AHEAD; /// A type for handling replacements while amortizing allocation. pub struct Replacer { space: Option>, } struct Space { /// The place to store capture locations. caps: M::Captures, /// The place to write a replacement to. dst: Vec, /// The place to store match offsets in terms of `dst`. matches: Vec, } impl fmt::Debug for Replacer { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let (dst, matches) = self.replacement().unwrap_or((&[], &[])); f.debug_struct("Replacer") .field("dst", &dst) .field("matches", &matches) .finish() } } impl Replacer { /// Create a new replacer for use with a particular matcher. /// /// This constructor does not allocate. Instead, space for dealing with /// replacements is allocated lazily only when needed. pub fn new() -> Replacer { Replacer { space: None } } /// Executes a replacement on the given subject string by replacing all /// matches with the given replacement. To access the result of the /// replacement, use the `replacement` method. /// /// This can fail if the underlying matcher reports an error. pub fn replace_all<'a>( &'a mut self, searcher: &Searcher, matcher: &M, mut subject: &[u8], range: std::ops::Range, replacement: &[u8], ) -> io::Result<()> { // See the giant comment in 'find_iter_at_in_context' below for why we // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); if is_multi_line { if subject[range.end..].len() >= MAX_LOOK_AHEAD { subject = &subject[..range.end + MAX_LOOK_AHEAD]; } } else { // When searching a single line, we should remove the line // terminator. Otherwise, it's possible for the regex (via // look-around) to observe the line terminator and not match // because of it. let mut m = Match::new(0, range.end); trim_line_terminator(searcher, subject, &mut m); subject = &subject[..m.end()]; } { let &mut Space { ref mut dst, ref mut caps, ref mut matches } = self.allocate(matcher)?; dst.clear(); matches.clear(); matcher .replace_with_captures_at( subject, range.start, caps, dst, |caps, dst| { let start = dst.len(); caps.interpolate( |name| matcher.capture_index(name), subject, replacement, dst, ); let end = dst.len(); matches.push(Match::new(start, end)); true }, ) .map_err(io::Error::error_message)?; } Ok(()) } /// Return the result of the prior replacement and the match offsets for /// all replacement occurrences within the returned replacement buffer. /// /// If no replacement has occurred then `None` is returned. pub fn replacement<'a>(&'a self) -> Option<(&'a [u8], &'a [Match])> { match self.space { None => None, Some(ref space) => { if space.matches.is_empty() { None } else { Some((&space.dst, &space.matches)) } } } } /// Clear space used for performing a replacement. /// /// Subsequent calls to `replacement` after calling `clear` (but before /// executing another replacement) will always return `None`. pub fn clear(&mut self) { if let Some(ref mut space) = self.space { space.dst.clear(); space.matches.clear(); } } /// Allocate space for replacements when used with the given matcher and /// return a mutable reference to that space. /// /// This can fail if allocating space for capture locations from the given /// matcher fails. fn allocate(&mut self, matcher: &M) -> io::Result<&mut Space> { if self.space.is_none() { let caps = matcher.new_captures().map_err(io::Error::error_message)?; self.space = Some(Space { caps: caps, dst: vec![], matches: vec![] }); } Ok(self.space.as_mut().unwrap()) } } /// A simple layer of abstraction over either a match or a contextual line /// reported by the searcher. /// /// In particular, this provides an API that unions the `SinkMatch` and /// `SinkContext` types while also exposing a list of all individual match /// locations. /// /// While this serves as a convenient mechanism to abstract over `SinkMatch` /// and `SinkContext`, this also provides a way to abstract over replacements. /// Namely, after a replacement, a `Sunk` value can be constructed using the /// results of the replacement instead of the bytes reported directly by the /// searcher. #[derive(Debug)] pub struct Sunk<'a> { bytes: &'a [u8], absolute_byte_offset: u64, line_number: Option, context_kind: Option<&'a SinkContextKind>, matches: &'a [Match], original_matches: &'a [Match], } impl<'a> Sunk<'a> { #[inline] pub fn empty() -> Sunk<'static> { Sunk { bytes: &[], absolute_byte_offset: 0, line_number: None, context_kind: None, matches: &[], original_matches: &[], } } #[inline] pub fn from_sink_match( sunk: &'a SinkMatch<'a>, original_matches: &'a [Match], replacement: Option<(&'a [u8], &'a [Match])>, ) -> Sunk<'a> { let (bytes, matches) = replacement.unwrap_or_else(|| (sunk.bytes(), original_matches)); Sunk { bytes: bytes, absolute_byte_offset: sunk.absolute_byte_offset(), line_number: sunk.line_number(), context_kind: None, matches: matches, original_matches: original_matches, } } #[inline] pub fn from_sink_context( sunk: &'a SinkContext<'a>, original_matches: &'a [Match], replacement: Option<(&'a [u8], &'a [Match])>, ) -> Sunk<'a> { let (bytes, matches) = replacement.unwrap_or_else(|| (sunk.bytes(), original_matches)); Sunk { bytes: bytes, absolute_byte_offset: sunk.absolute_byte_offset(), line_number: sunk.line_number(), context_kind: Some(sunk.kind()), matches: matches, original_matches: original_matches, } } #[inline] pub fn context_kind(&self) -> Option<&'a SinkContextKind> { self.context_kind } #[inline] pub fn bytes(&self) -> &'a [u8] { self.bytes } #[inline] pub fn matches(&self) -> &'a [Match] { self.matches } #[inline] pub fn original_matches(&self) -> &'a [Match] { self.original_matches } #[inline] pub fn lines(&self, line_term: u8) -> LineIter<'a> { LineIter::new(line_term, self.bytes()) } #[inline] pub fn absolute_byte_offset(&self) -> u64 { self.absolute_byte_offset } #[inline] pub fn line_number(&self) -> Option { self.line_number } } /// A simple encapsulation of a file path used by a printer. /// /// This represents any transforms that we might want to perform on the path, /// such as converting it to valid UTF-8 and/or replacing its separator with /// something else. This allows us to amortize work if we are printing the /// file path for every match. /// /// In the common case, no transformation is needed, which lets us avoid the /// allocation. Typically, only Windows requires a transform, since we can't /// access the raw bytes of a path directly and first need to lossily convert /// to UTF-8. Windows is also typically where the path separator replacement /// is used, e.g., in cygwin environments to use `/` instead of `\`. /// /// Users of this type are expected to construct it from a normal `Path` /// found in the standard library. It can then be written to any `io::Write` /// implementation using the `as_bytes` method. This achieves platform /// portability with a small cost: on Windows, paths that are not valid UTF-16 /// will not roundtrip correctly. #[derive(Clone, Debug)] pub struct PrinterPath<'a>(Cow<'a, [u8]>); impl<'a> PrinterPath<'a> { /// Create a new path suitable for printing. pub fn new(path: &'a Path) -> PrinterPath<'a> { PrinterPath(Vec::from_path_lossy(path)) } /// Create a new printer path from the given path which can be efficiently /// written to a writer without allocation. /// /// If the given separator is present, then any separators in `path` are /// replaced with it. pub fn with_separator(path: &'a Path, sep: Option) -> PrinterPath<'a> { let mut ppath = PrinterPath::new(path); if let Some(sep) = sep { ppath.replace_separator(sep); } ppath } /// Replace the path separator in this path with the given separator /// and do it in place. On Windows, both `/` and `\` are treated as /// path separators that are both replaced by `new_sep`. In all other /// environments, only `/` is treated as a path separator. fn replace_separator(&mut self, new_sep: u8) { let transformed_path: Vec = self .0 .bytes() .map(|b| { if b == b'/' || (cfg!(windows) && b == b'\\') { new_sep } else { b } }) .collect(); self.0 = Cow::Owned(transformed_path); } /// Return the raw bytes for this path. pub fn as_bytes(&self) -> &[u8] { &self.0 } } /// A type that provides "nicer" Display and Serialize impls for /// std::time::Duration. The serialization format should actually be compatible /// with the Deserialize impl for std::time::Duration, since this type only /// adds new fields. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub struct NiceDuration(pub time::Duration); impl fmt::Display for NiceDuration { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:0.6}s", self.fractional_seconds()) } } impl NiceDuration { /// Returns the number of seconds in this duration in fraction form. /// The number to the left of the decimal point is the number of seconds, /// and the number to the right is the number of milliseconds. fn fractional_seconds(&self) -> f64 { let fractional = (self.0.subsec_nanos() as f64) / 1_000_000_000.0; self.0.as_secs() as f64 + fractional } } #[cfg(feature = "serde1")] impl Serialize for NiceDuration { fn serialize(&self, ser: S) -> Result { use serde::ser::SerializeStruct; let mut state = ser.serialize_struct("Duration", 2)?; state.serialize_field("secs", &self.0.as_secs())?; state.serialize_field("nanos", &self.0.subsec_nanos())?; state.serialize_field("human", &format!("{}", self))?; state.end() } } /// Trim prefix ASCII spaces from the given slice and return the corresponding /// range. /// /// This stops trimming a prefix as soon as it sees non-whitespace or a line /// terminator. pub fn trim_ascii_prefix( line_term: LineTerminator, slice: &[u8], range: Match, ) -> Match { fn is_space(b: u8) -> bool { match b { b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true, _ => false, } } let count = slice[range] .iter() .take_while(|&&b| -> bool { is_space(b) && !line_term.as_bytes().contains(&b) }) .count(); range.with_start(range.start() + count) } pub fn find_iter_at_in_context( searcher: &Searcher, matcher: M, mut bytes: &[u8], range: std::ops::Range, mut matched: F, ) -> io::Result<()> where M: Matcher, F: FnMut(Match) -> bool, { // This strange dance is to account for the possibility of look-ahead in // the regex. The problem here is that mat.bytes() doesn't include the // lines beyond the match boundaries in mulit-line mode, which means that // when we try to rediscover the full set of matches here, the regex may no // longer match if it required some look-ahead beyond the matching lines. // // PCRE2 (and the grep-matcher interfaces) has no way of specifying an end // bound of the search. So we kludge it and let the regex engine search the // rest of the buffer... But to avoid things getting too crazy, we cap the // buffer. // // If it weren't for multi-line mode, then none of this would be needed. // Alternatively, if we refactored the grep interfaces to pass along the // full set of matches (if available) from the searcher, then that might // also help here. But that winds up paying an upfront unavoidable cost for // the case where matches don't need to be counted. So then you'd have to // introduce a way to pass along matches conditionally, only when needed. // Yikes. // // Maybe the bigger picture thing here is that the searcher should be // responsible for finding matches when necessary, and the printer // shouldn't be involved in this business in the first place. Sigh. Live // and learn. Abstraction boundaries are hard. let is_multi_line = searcher.multi_line_with_matcher(&matcher); if is_multi_line { if bytes[range.end..].len() >= MAX_LOOK_AHEAD { bytes = &bytes[..range.end + MAX_LOOK_AHEAD]; } } else { // When searching a single line, we should remove the line terminator. // Otherwise, it's possible for the regex (via look-around) to observe // the line terminator and not match because of it. let mut m = Match::new(0, range.end); trim_line_terminator(searcher, bytes, &mut m); bytes = &bytes[..m.end()]; } matcher .find_iter_at(bytes, range.start, |m| { if m.start() >= range.end { return false; } matched(m) }) .map_err(io::Error::error_message) } /// Given a buf and some bounds, if there is a line terminator at the end of /// the given bounds in buf, then the bounds are trimmed to remove the line /// terminator. pub fn trim_line_terminator( searcher: &Searcher, buf: &[u8], line: &mut Match, ) { let lineterm = searcher.line_terminator(); if lineterm.is_suffix(&buf[*line]) { let mut end = line.end() - 1; if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') { end -= 1; } *line = line.with_end(end); } }