grep-cli-0.1.6/.cargo_vcs_info.json0000644000000001120000000000100125340ustar { "git": { "sha1": "0cf2b98df2447589d9cf1d6c2d18265100327fa1" } } grep-cli-0.1.6/Cargo.toml0000644000000025740000000000100105500ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "grep-cli" version = "0.1.6" authors = ["Andrew Gallant "] description = "Utilities for search oriented command line applications.\n" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" documentation = "https://docs.rs/grep-cli" readme = "README.md" keywords = ["regex", "grep", "cli", "utility", "util"] license = "Unlicense/MIT" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" [dependencies.atty] version = "0.2.11" [dependencies.bstr] version = "0.2.0" [dependencies.globset] version = "0.4.7" [dependencies.lazy_static] version = "1.1.0" [dependencies.log] version = "0.4.5" [dependencies.regex] version = "1.1" [dependencies.same-file] version = "1.0.4" [dependencies.termcolor] version = "1.0.4" [target."cfg(windows)".dependencies.winapi-util] version = "0.1.1" grep-cli-0.1.6/Cargo.toml.orig000064400000000000000000000013630072674642500142540ustar 00000000000000[package] name = "grep-cli" version = "0.1.6" #:version authors = ["Andrew Gallant "] description = """ Utilities for search oriented command line applications. """ documentation = "https://docs.rs/grep-cli" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" readme = "README.md" keywords = ["regex", "grep", "cli", "utility", "util"] license = "Unlicense/MIT" edition = "2018" [dependencies] atty = "0.2.11" bstr = "0.2.0" globset = { version = "0.4.7", path = "../globset" } lazy_static = "1.1.0" log = "0.4.5" regex = "1.1" same-file = "1.0.4" termcolor = "1.0.4" [target.'cfg(windows)'.dependencies.winapi-util] version = "0.1.1" grep-cli-0.1.6/LICENSE-MIT000064400000000000000000000020710072674642500130160ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Andrew Gallant Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. grep-cli-0.1.6/README.md000064400000000000000000000016300072674642500126410ustar 00000000000000grep-cli -------- A utility library that provides common routines desired in search oriented command line applications. This includes, but is not limited to, parsing hex escapes, detecting whether stdin is readable and more. To the extent possible, this crate strives for compatibility across Windows, macOS and Linux. [![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) [![](https://img.shields.io/crates/v/grep-cli.svg)](https://crates.io/crates/grep-cli) Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation [https://docs.rs/grep-cli](https://docs.rs/grep-cli) **NOTE:** You probably don't want to use this crate directly. Instead, you should prefer the facade defined in the [`grep`](https://docs.rs/grep) crate. ### Usage Add this to your `Cargo.toml`: ```toml [dependencies] grep-cli = "0.1" ``` grep-cli-0.1.6/UNLICENSE000064400000000000000000000022730072674642500126360ustar 00000000000000This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to grep-cli-0.1.6/src/decompress.rs000064400000000000000000000456300072674642500146730ustar 00000000000000use std::ffi::{OsStr, OsString}; use std::fs::File; use std::io; use std::path::{Path, PathBuf}; use std::process::Command; use globset::{Glob, GlobSet, GlobSetBuilder}; use crate::process::{CommandError, CommandReader, CommandReaderBuilder}; /// A builder for a matcher that determines which files get decompressed. #[derive(Clone, Debug)] pub struct DecompressionMatcherBuilder { /// The commands for each matching glob. commands: Vec, /// Whether to include the default matching rules. defaults: bool, } /// A representation of a single command for decompressing data /// out-of-proccess. #[derive(Clone, Debug)] struct DecompressionCommand { /// The glob that matches this command. glob: String, /// The command or binary name. bin: PathBuf, /// The arguments to invoke with the command. args: Vec, } impl Default for DecompressionMatcherBuilder { fn default() -> DecompressionMatcherBuilder { DecompressionMatcherBuilder::new() } } impl DecompressionMatcherBuilder { /// Create a new builder for configuring a decompression matcher. pub fn new() -> DecompressionMatcherBuilder { DecompressionMatcherBuilder { commands: vec![], defaults: true } } /// Build a matcher for determining how to decompress files. /// /// If there was a problem compiling the matcher, then an error is /// returned. pub fn build(&self) -> Result { let defaults = if !self.defaults { vec![] } else { default_decompression_commands() }; let mut glob_builder = GlobSetBuilder::new(); let mut commands = vec![]; for decomp_cmd in defaults.iter().chain(&self.commands) { let glob = Glob::new(&decomp_cmd.glob).map_err(|err| { CommandError::io(io::Error::new(io::ErrorKind::Other, err)) })?; glob_builder.add(glob); commands.push(decomp_cmd.clone()); } let globs = glob_builder.build().map_err(|err| { CommandError::io(io::Error::new(io::ErrorKind::Other, err)) })?; Ok(DecompressionMatcher { globs, commands }) } /// When enabled, the default matching rules will be compiled into this /// matcher before any other associations. When disabled, only the /// rules explicitly given to this builder will be used. /// /// This is enabled by default. pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder { self.defaults = yes; self } /// Associates a glob with a command to decompress files matching the glob. /// /// If multiple globs match the same file, then the most recently added /// glob takes precedence. /// /// The syntax for the glob is documented in the /// [`globset` crate](https://docs.rs/globset/#syntax). /// /// The `program` given is resolved with respect to `PATH` and turned /// into an absolute path internally before being executed by the current /// platform. Notably, on Windows, this avoids a security problem where /// passing a relative path to `CreateProcess` will automatically search /// the current directory for a matching program. If the program could /// not be resolved, then it is silently ignored and the association is /// dropped. For this reason, callers should prefer `try_associate`. pub fn associate( &mut self, glob: &str, program: P, args: I, ) -> &mut DecompressionMatcherBuilder where P: AsRef, I: IntoIterator, A: AsRef, { let _ = self.try_associate(glob, program, args); self } /// Associates a glob with a command to decompress files matching the glob. /// /// If multiple globs match the same file, then the most recently added /// glob takes precedence. /// /// The syntax for the glob is documented in the /// [`globset` crate](https://docs.rs/globset/#syntax). /// /// The `program` given is resolved with respect to `PATH` and turned /// into an absolute path internally before being executed by the current /// platform. Notably, on Windows, this avoids a security problem where /// passing a relative path to `CreateProcess` will automatically search /// the current directory for a matching program. If the program could not /// be resolved, then an error is returned. pub fn try_associate( &mut self, glob: &str, program: P, args: I, ) -> Result<&mut DecompressionMatcherBuilder, CommandError> where P: AsRef, I: IntoIterator, A: AsRef, { let glob = glob.to_string(); let bin = resolve_binary(Path::new(program.as_ref()))?; let args = args.into_iter().map(|a| a.as_ref().to_os_string()).collect(); self.commands.push(DecompressionCommand { glob, bin, args }); Ok(self) } } /// A matcher for determining how to decompress files. #[derive(Clone, Debug)] pub struct DecompressionMatcher { /// The set of globs to match. Each glob has a corresponding entry in /// `commands`. When a glob matches, the corresponding command should be /// used to perform out-of-process decompression. globs: GlobSet, /// The commands for each matching glob. commands: Vec, } impl Default for DecompressionMatcher { fn default() -> DecompressionMatcher { DecompressionMatcher::new() } } impl DecompressionMatcher { /// Create a new matcher with default rules. /// /// To add more matching rules, build a matcher with /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html). pub fn new() -> DecompressionMatcher { DecompressionMatcherBuilder::new() .build() .expect("built-in matching rules should always compile") } /// Return a pre-built command based on the given file path that can /// decompress its contents. If no such decompressor is known, then this /// returns `None`. /// /// If there are multiple possible commands matching the given path, then /// the command added last takes precedence. pub fn command>(&self, path: P) -> Option { for i in self.globs.matches(path).into_iter().rev() { let decomp_cmd = &self.commands[i]; let mut cmd = Command::new(&decomp_cmd.bin); cmd.args(&decomp_cmd.args); return Some(cmd); } None } /// Returns true if and only if the given file path has at least one /// matching command to perform decompression on. pub fn has_command>(&self, path: P) -> bool { self.globs.is_match(path) } } /// Configures and builds a streaming reader for decompressing data. #[derive(Clone, Debug, Default)] pub struct DecompressionReaderBuilder { matcher: DecompressionMatcher, command_builder: CommandReaderBuilder, } impl DecompressionReaderBuilder { /// Create a new builder with the default configuration. pub fn new() -> DecompressionReaderBuilder { DecompressionReaderBuilder::default() } /// Build a new streaming reader for decompressing data. /// /// If decompression is done out-of-process and if there was a problem /// spawning the process, then its error is logged at the debug level and a /// passthru reader is returned that does no decompression. This behavior /// typically occurs when the given file path matches a decompression /// command, but is executing in an environment where the decompression /// command is not available. /// /// If the given file path could not be matched with a decompression /// strategy, then a passthru reader is returned that does no /// decompression. pub fn build>( &self, path: P, ) -> Result { let path = path.as_ref(); let mut cmd = match self.matcher.command(path) { None => return DecompressionReader::new_passthru(path), Some(cmd) => cmd, }; cmd.arg(path); match self.command_builder.build(&mut cmd) { Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }), Err(err) => { log::debug!( "{}: error spawning command '{:?}': {} \ (falling back to uncompressed reader)", path.display(), cmd, err, ); DecompressionReader::new_passthru(path) } } } /// Set the matcher to use to look up the decompression command for each /// file path. /// /// A set of sensible rules is enabled by default. Setting this will /// completely replace the current rules. pub fn matcher( &mut self, matcher: DecompressionMatcher, ) -> &mut DecompressionReaderBuilder { self.matcher = matcher; self } /// Get the underlying matcher currently used by this builder. pub fn get_matcher(&self) -> &DecompressionMatcher { &self.matcher } /// When enabled, the reader will asynchronously read the contents of the /// command's stderr output. When disabled, stderr is only read after the /// stdout stream has been exhausted (or if the process quits with an error /// code). /// /// Note that when enabled, this may require launching an additional /// thread in order to read stderr. This is done so that the process being /// executed is never blocked from writing to stdout or stderr. If this is /// disabled, then it is possible for the process to fill up the stderr /// buffer and deadlock. /// /// This is enabled by default. pub fn async_stderr( &mut self, yes: bool, ) -> &mut DecompressionReaderBuilder { self.command_builder.async_stderr(yes); self } } /// A streaming reader for decompressing the contents of a file. /// /// The purpose of this reader is to provide a seamless way to decompress the /// contents of file using existing tools in the current environment. This is /// meant to be an alternative to using decompression libraries in favor of the /// simplicity and portability of using external commands such as `gzip` and /// `xz`. This does impose the overhead of spawning a process, so other means /// for performing decompression should be sought if this overhead isn't /// acceptable. /// /// A decompression reader comes with a default set of matching rules that are /// meant to associate file paths with the corresponding command to use to /// decompress them. For example, a glob like `*.gz` matches gzip compressed /// files with the command `gzip -d -c`. If a file path does not match any /// existing rules, or if it matches a rule whose command does not exist in the /// current environment, then the decompression reader passes through the /// contents of the underlying file without doing any decompression. /// /// The default matching rules are probably good enough for most cases, and if /// they require revision, pull requests are welcome. In cases where they must /// be changed or extended, they can be customized through the use of /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html) /// and /// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html). /// /// By default, this reader will asynchronously read the processes' stderr. /// This prevents subtle deadlocking bugs for noisy processes that write a lot /// to stderr. Currently, the entire contents of stderr is read on to the heap. /// /// # Example /// /// This example shows how to read the decompressed contents of a file without /// needing to explicitly choose the decompression command to run. /// /// Note that if you need to decompress multiple files, it is better to use /// `DecompressionReaderBuilder`, which will amortize the cost of compiling the /// matcher. /// /// ```no_run /// use std::io::Read; /// use std::process::Command; /// use grep_cli::DecompressionReader; /// /// # fn example() -> Result<(), Box<::std::error::Error>> { /// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?; /// let mut contents = vec![]; /// rdr.read_to_end(&mut contents)?; /// # Ok(()) } /// ``` #[derive(Debug)] pub struct DecompressionReader { rdr: Result, } impl DecompressionReader { /// Build a new streaming reader for decompressing data. /// /// If decompression is done out-of-process and if there was a problem /// spawning the process, then its error is returned. /// /// If the given file path could not be matched with a decompression /// strategy, then a passthru reader is returned that does no /// decompression. /// /// This uses the default matching rules for determining how to decompress /// the given file. To change those matching rules, use /// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html) /// and /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html). /// /// When creating readers for many paths. it is better to use the builder /// since it will amortize the cost of constructing the matcher. pub fn new>( path: P, ) -> Result { DecompressionReaderBuilder::new().build(path) } /// Creates a new "passthru" decompression reader that reads from the file /// corresponding to the given path without doing decompression and without /// executing another process. fn new_passthru(path: &Path) -> Result { let file = File::open(path)?; Ok(DecompressionReader { rdr: Err(file) }) } /// Closes this reader, freeing any resources used by its underlying child /// process, if one was used. If the child process exits with a nonzero /// exit code, the returned Err value will include its stderr. /// /// `close` is idempotent, meaning it can be safely called multiple times. /// The first call closes the CommandReader and any subsequent calls do /// nothing. /// /// This method should be called after partially reading a file to prevent /// resource leakage. However there is no need to call `close` explicitly /// if your code always calls `read` to EOF, as `read` takes care of /// calling `close` in this case. /// /// `close` is also called in `drop` as a last line of defense against /// resource leakage. Any error from the child process is then printed as a /// warning to stderr. This can be avoided by explictly calling `close` /// before the CommandReader is dropped. pub fn close(&mut self) -> io::Result<()> { match self.rdr { Ok(ref mut rdr) => rdr.close(), Err(_) => Ok(()), } } } impl io::Read for DecompressionReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self.rdr { Ok(ref mut rdr) => rdr.read(buf), Err(ref mut rdr) => rdr.read(buf), } } } /// Resolves a path to a program to a path by searching for the program in /// `PATH`. /// /// If the program could not be resolved, then an error is returned. /// /// The purpose of doing this instead of passing the path to the program /// directly to Command::new is that Command::new will hand relative paths /// to CreateProcess on Windows, which will implicitly search the current /// working directory for the executable. This could be undesirable for /// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an /// untrusted directory tree could result in arbitrary programs executing on /// Windows. /// /// Note that this could still return a relative path if PATH contains a /// relative path. We permit this since it is assumed that the user has set /// this explicitly, and thus, desires this behavior. /// /// On non-Windows, this is a no-op. pub fn resolve_binary>( prog: P, ) -> Result { use std::env; fn is_exe(path: &Path) -> bool { let md = match path.metadata() { Err(_) => return false, Ok(md) => md, }; !md.is_dir() } let prog = prog.as_ref(); if !cfg!(windows) || prog.is_absolute() { return Ok(prog.to_path_buf()); } let syspaths = match env::var_os("PATH") { Some(syspaths) => syspaths, None => { let msg = "system PATH environment variable not found"; return Err(CommandError::io(io::Error::new( io::ErrorKind::Other, msg, ))); } }; for syspath in env::split_paths(&syspaths) { if syspath.as_os_str().is_empty() { continue; } let abs_prog = syspath.join(prog); if is_exe(&abs_prog) { return Ok(abs_prog.to_path_buf()); } if abs_prog.extension().is_none() { let abs_prog = abs_prog.with_extension("exe"); if is_exe(&abs_prog) { return Ok(abs_prog.to_path_buf()); } } } let msg = format!("{}: could not find executable in PATH", prog.display()); return Err(CommandError::io(io::Error::new(io::ErrorKind::Other, msg))); } fn default_decompression_commands() -> Vec { const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"]; const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"]; const ARGS_XZ: &[&str] = &["xz", "-d", "-c"]; const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"]; const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"]; const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"]; const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"]; const ARGS_UNCOMPRESS: &[&str] = &["uncompress", "-c"]; fn add(glob: &str, args: &[&str], cmds: &mut Vec) { let bin = match resolve_binary(Path::new(args[0])) { Ok(bin) => bin, Err(err) => { log::debug!("{}", err); return; } }; cmds.push(DecompressionCommand { glob: glob.to_string(), bin, args: args .iter() .skip(1) .map(|s| OsStr::new(s).to_os_string()) .collect(), }); } let mut cmds = vec![]; add("*.gz", ARGS_GZIP, &mut cmds); add("*.tgz", ARGS_GZIP, &mut cmds); add("*.bz2", ARGS_BZIP, &mut cmds); add("*.tbz2", ARGS_BZIP, &mut cmds); add("*.xz", ARGS_XZ, &mut cmds); add("*.txz", ARGS_XZ, &mut cmds); add("*.lz4", ARGS_LZ4, &mut cmds); add("*.lzma", ARGS_LZMA, &mut cmds); add("*.br", ARGS_BROTLI, &mut cmds); add("*.zst", ARGS_ZSTD, &mut cmds); add("*.zstd", ARGS_ZSTD, &mut cmds); add("*.Z", ARGS_UNCOMPRESS, &mut cmds); cmds } grep-cli-0.1.6/src/escape.rs000064400000000000000000000170100072674642500137560ustar 00000000000000use std::ffi::OsStr; use std::str; use bstr::{ByteSlice, ByteVec}; /// A single state in the state machine used by `unescape`. #[derive(Clone, Copy, Eq, PartialEq)] enum State { /// The state after seeing a `\`. Escape, /// The state after seeing a `\x`. HexFirst, /// The state after seeing a `\x[0-9A-Fa-f]`. HexSecond(char), /// Default state. Literal, } /// Escapes arbitrary bytes into a human readable string. /// /// This converts `\t`, `\r` and `\n` into their escaped forms. It also /// converts the non-printable subset of ASCII in addition to invalid UTF-8 /// bytes to hexadecimal escape sequences. Everything else is left as is. /// /// The dual of this routine is [`unescape`](fn.unescape.html). /// /// # Example /// /// This example shows how to convert a byte string that contains a `\n` and /// invalid UTF-8 bytes into a `String`. /// /// Pay special attention to the use of raw strings. That is, `r"\n"` is /// equivalent to `"\\n"`. /// /// ``` /// use grep_cli::escape; /// /// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz")); /// ``` pub fn escape(bytes: &[u8]) -> String { let mut escaped = String::new(); for (s, e, ch) in bytes.char_indices() { if ch == '\u{FFFD}' { for b in bytes[s..e].bytes() { escape_byte(b, &mut escaped); } } else { escape_char(ch, &mut escaped); } } escaped } /// Escapes an OS string into a human readable string. /// /// This is like [`escape`](fn.escape.html), but accepts an OS string. pub fn escape_os(string: &OsStr) -> String { escape(Vec::from_os_str_lossy(string).as_bytes()) } /// Unescapes a string. /// /// It supports a limited set of escape sequences: /// /// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes. /// * `\xZZ` hexadecimal escapes are mapped to their byte. /// /// Everything else is left as is, including non-hexadecimal escapes like /// `\xGG`. /// /// This is useful when it is desirable for a command line argument to be /// capable of specifying arbitrary bytes or otherwise make it easier to /// specify non-printable characters. /// /// The dual of this routine is [`escape`](fn.escape.html). /// /// # Example /// /// This example shows how to convert an escaped string (which is valid UTF-8) /// into a corresponding sequence of bytes. Each escape sequence is mapped to /// its bytes, which may include invalid UTF-8. /// /// Pay special attention to the use of raw strings. That is, `r"\n"` is /// equivalent to `"\\n"`. /// /// ``` /// use grep_cli::unescape; /// /// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz")); /// ``` pub fn unescape(s: &str) -> Vec { use self::State::*; let mut bytes = vec![]; let mut state = Literal; for c in s.chars() { match state { Escape => match c { '\\' => { bytes.push(b'\\'); state = Literal; } 'n' => { bytes.push(b'\n'); state = Literal; } 'r' => { bytes.push(b'\r'); state = Literal; } 't' => { bytes.push(b'\t'); state = Literal; } 'x' => { state = HexFirst; } c => { bytes.extend(format!(r"\{}", c).into_bytes()); state = Literal; } }, HexFirst => match c { '0'..='9' | 'A'..='F' | 'a'..='f' => { state = HexSecond(c); } c => { bytes.extend(format!(r"\x{}", c).into_bytes()); state = Literal; } }, HexSecond(first) => match c { '0'..='9' | 'A'..='F' | 'a'..='f' => { let ordinal = format!("{}{}", first, c); let byte = u8::from_str_radix(&ordinal, 16).unwrap(); bytes.push(byte); state = Literal; } c => { let original = format!(r"\x{}{}", first, c); bytes.extend(original.into_bytes()); state = Literal; } }, Literal => match c { '\\' => { state = Escape; } c => { bytes.extend(c.to_string().as_bytes()); } }, } } match state { Escape => bytes.push(b'\\'), HexFirst => bytes.extend(b"\\x"), HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()), Literal => {} } bytes } /// Unescapes an OS string. /// /// This is like [`unescape`](fn.unescape.html), but accepts an OS string. /// /// Note that this first lossily decodes the given OS string as UTF-8. That /// is, an escaped string (the thing given) should be valid UTF-8. pub fn unescape_os(string: &OsStr) -> Vec { unescape(&string.to_string_lossy()) } /// Adds the given codepoint to the given string, escaping it if necessary. fn escape_char(cp: char, into: &mut String) { if cp.is_ascii() { escape_byte(cp as u8, into); } else { into.push(cp); } } /// Adds the given byte to the given string, escaping it if necessary. fn escape_byte(byte: u8, into: &mut String) { match byte { 0x21..=0x5B | 0x5D..=0x7D => into.push(byte as char), b'\n' => into.push_str(r"\n"), b'\r' => into.push_str(r"\r"), b'\t' => into.push_str(r"\t"), b'\\' => into.push_str(r"\\"), _ => into.push_str(&format!(r"\x{:02X}", byte)), } } #[cfg(test)] mod tests { use super::{escape, unescape}; fn b(bytes: &'static [u8]) -> Vec { bytes.to_vec() } #[test] fn empty() { assert_eq!(b(b""), unescape(r"")); assert_eq!(r"", escape(b"")); } #[test] fn backslash() { assert_eq!(b(b"\\"), unescape(r"\\")); assert_eq!(r"\\", escape(b"\\")); } #[test] fn nul() { assert_eq!(b(b"\x00"), unescape(r"\x00")); assert_eq!(r"\x00", escape(b"\x00")); } #[test] fn nl() { assert_eq!(b(b"\n"), unescape(r"\n")); assert_eq!(r"\n", escape(b"\n")); } #[test] fn tab() { assert_eq!(b(b"\t"), unescape(r"\t")); assert_eq!(r"\t", escape(b"\t")); } #[test] fn carriage() { assert_eq!(b(b"\r"), unescape(r"\r")); assert_eq!(r"\r", escape(b"\r")); } #[test] fn nothing_simple() { assert_eq!(b(b"\\a"), unescape(r"\a")); assert_eq!(b(b"\\a"), unescape(r"\\a")); assert_eq!(r"\\a", escape(b"\\a")); } #[test] fn nothing_hex0() { assert_eq!(b(b"\\x"), unescape(r"\x")); assert_eq!(b(b"\\x"), unescape(r"\\x")); assert_eq!(r"\\x", escape(b"\\x")); } #[test] fn nothing_hex1() { assert_eq!(b(b"\\xz"), unescape(r"\xz")); assert_eq!(b(b"\\xz"), unescape(r"\\xz")); assert_eq!(r"\\xz", escape(b"\\xz")); } #[test] fn nothing_hex2() { assert_eq!(b(b"\\xzz"), unescape(r"\xzz")); assert_eq!(b(b"\\xzz"), unescape(r"\\xzz")); assert_eq!(r"\\xzz", escape(b"\\xzz")); } #[test] fn invalid_utf8() { assert_eq!(r"\xFF", escape(b"\xFF")); assert_eq!(r"a\xFFb", escape(b"a\xFFb")); } } grep-cli-0.1.6/src/human.rs000064400000000000000000000107670072674642500136420ustar 00000000000000use std::error; use std::fmt; use std::io; use std::num::ParseIntError; use regex::Regex; /// An error that occurs when parsing a human readable size description. /// /// This error provides an end user friendly message describing why the /// description coudln't be parsed and what the expected format is. #[derive(Clone, Debug, Eq, PartialEq)] pub struct ParseSizeError { original: String, kind: ParseSizeErrorKind, } #[derive(Clone, Debug, Eq, PartialEq)] enum ParseSizeErrorKind { InvalidFormat, InvalidInt(ParseIntError), Overflow, } impl ParseSizeError { fn format(original: &str) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::InvalidFormat, } } fn int(original: &str, err: ParseIntError) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::InvalidInt(err), } } fn overflow(original: &str) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::Overflow, } } } impl error::Error for ParseSizeError { fn description(&self) -> &str { "invalid size" } } impl fmt::Display for ParseSizeError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use self::ParseSizeErrorKind::*; match self.kind { InvalidFormat => write!( f, "invalid format for size '{}', which should be a sequence \ of digits followed by an optional 'K', 'M' or 'G' \ suffix", self.original ), InvalidInt(ref err) => write!( f, "invalid integer found in size '{}': {}", self.original, err ), Overflow => write!(f, "size too big in '{}'", self.original), } } } impl From for io::Error { fn from(size_err: ParseSizeError) -> io::Error { io::Error::new(io::ErrorKind::Other, size_err) } } /// Parse a human readable size like `2M` into a corresponding number of bytes. /// /// Supported size suffixes are `K` (for kilobyte), `M` (for megabyte) and `G` /// (for gigabyte). If a size suffix is missing, then the size is interpreted /// as bytes. If the size is too big to fit into a `u64`, then this returns an /// error. /// /// Additional suffixes may be added over time. pub fn parse_human_readable_size(size: &str) -> Result { lazy_static::lazy_static! { // Normally I'd just parse something this simple by hand to avoid the // regex dep, but we bring regex in any way for glob matching, so might // as well use it. static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap(); } let caps = match RE.captures(size) { Some(caps) => caps, None => return Err(ParseSizeError::format(size)), }; let value: u64 = caps[1].parse().map_err(|err| ParseSizeError::int(size, err))?; let suffix = match caps.get(2) { None => return Ok(value), Some(cap) => cap.as_str(), }; let bytes = match suffix { "K" => value.checked_mul(1 << 10), "M" => value.checked_mul(1 << 20), "G" => value.checked_mul(1 << 30), // Because if the regex matches this group, it must be [KMG]. _ => unreachable!(), }; bytes.ok_or_else(|| ParseSizeError::overflow(size)) } #[cfg(test)] mod tests { use super::*; #[test] fn suffix_none() { let x = parse_human_readable_size("123").unwrap(); assert_eq!(123, x); } #[test] fn suffix_k() { let x = parse_human_readable_size("123K").unwrap(); assert_eq!(123 * (1 << 10), x); } #[test] fn suffix_m() { let x = parse_human_readable_size("123M").unwrap(); assert_eq!(123 * (1 << 20), x); } #[test] fn suffix_g() { let x = parse_human_readable_size("123G").unwrap(); assert_eq!(123 * (1 << 30), x); } #[test] fn invalid_empty() { assert!(parse_human_readable_size("").is_err()); } #[test] fn invalid_non_digit() { assert!(parse_human_readable_size("a").is_err()); } #[test] fn invalid_overflow() { assert!(parse_human_readable_size("9999999999999999G").is_err()); } #[test] fn invalid_suffix() { assert!(parse_human_readable_size("123T").is_err()); } } grep-cli-0.1.6/src/lib.rs000064400000000000000000000210730072674642500132700ustar 00000000000000/*! This crate provides common routines used in command line applications, with a focus on routines useful for search oriented applications. As a utility library, there is no central type or function. However, a key focus of this crate is to improve failure modes and provide user friendly error messages when things go wrong. To the best extent possible, everything in this crate works on Windows, macOS and Linux. # Standard I/O The [`is_readable_stdin`](fn.is_readable_stdin.html), [`is_tty_stderr`](fn.is_tty_stderr.html), [`is_tty_stdin`](fn.is_tty_stdin.html) and [`is_tty_stdout`](fn.is_tty_stdout.html) routines query aspects of standard I/O. `is_readable_stdin` determines whether stdin can be usefully read from, while the `tty` methods determine whether a tty is attached to stdin/stdout/stderr. `is_readable_stdin` is useful when writing an application that changes behavior based on whether the application was invoked with data on stdin. For example, `rg foo` might recursively search the current working directory for occurrences of `foo`, but `rg foo < file` might only search the contents of `file`. The `tty` methods are useful for similar reasons. Namely, commands like `ls` will change their output depending on whether they are printing to a terminal or not. For example, `ls` shows a file on each line when stdout is redirected to a file or a pipe, but condenses the output to show possibly many files on each line when stdout is connected to a tty. # Coloring and buffering The [`stdout`](fn.stdout.html), [`stdout_buffered_block`](fn.stdout_buffered_block.html) and [`stdout_buffered_line`](fn.stdout_buffered_line.html) routines are alternative constructors for [`StandardStream`](struct.StandardStream.html). A `StandardStream` implements `termcolor::WriteColor`, which provides a way to emit colors to terminals. Its key use is the encapsulation of buffering style. Namely, `stdout` will return a line buffered `StandardStream` if and only if stdout is connected to a tty, and will otherwise return a block buffered `StandardStream`. Line buffering is important for use with a tty because it typically decreases the latency at which the end user sees output. Block buffering is used otherwise because it is faster, and redirecting stdout to a file typically doesn't benefit from the decreased latency that line buffering provides. The `stdout_buffered_block` and `stdout_buffered_line` can be used to explicitly set the buffering strategy regardless of whether stdout is connected to a tty or not. # Escaping The [`escape`](fn.escape.html), [`escape_os`](fn.escape_os.html), [`unescape`](fn.unescape.html) and [`unescape_os`](fn.unescape_os.html) routines provide a user friendly way of dealing with UTF-8 encoded strings that can express arbitrary bytes. For example, you might want to accept a string containing arbitrary bytes as a command line argument, but most interactive shells make such strings difficult to type. Instead, we can ask users to use escape sequences. For example, `a\xFFz` is itself a valid UTF-8 string corresponding to the following bytes: ```ignore [b'a', b'\\', b'x', b'F', b'F', b'z'] ``` However, we can interpret `\xFF` as an escape sequence with the `unescape`/`unescape_os` routines, which will yield ```ignore [b'a', b'\xFF', b'z'] ``` instead. For example: ``` use grep_cli::unescape; // Note the use of a raw string! assert_eq!(vec![b'a', b'\xFF', b'z'], unescape(r"a\xFFz")); ``` The `escape`/`escape_os` routines provide the reverse transformation, which makes it easy to show user friendly error messages involving arbitrary bytes. # Building patterns Typically, regular expression patterns must be valid UTF-8. However, command line arguments aren't guaranteed to be valid UTF-8. Unfortunately, the standard library's UTF-8 conversion functions from `OsStr`s do not provide good error messages. However, the [`pattern_from_bytes`](fn.pattern_from_bytes.html) and [`pattern_from_os`](fn.pattern_from_os.html) do, including reporting exactly where the first invalid UTF-8 byte is seen. Additionally, it can be useful to read patterns from a file while reporting good error messages that include line numbers. The [`patterns_from_path`](fn.patterns_from_path.html), [`patterns_from_reader`](fn.patterns_from_reader.html) and [`patterns_from_stdin`](fn.patterns_from_stdin.html) routines do just that. If any pattern is found that is invalid UTF-8, then the error includes the file path (if available) along with the line number and the byte offset at which the first invalid UTF-8 byte was observed. # Read process output Sometimes a command line application needs to execute other processes and read its stdout in a streaming fashion. The [`CommandReader`](struct.CommandReader.html) provides this functionality with an explicit goal of improving failure modes. In particular, if the process exits with an error code, then stderr is read and converted into a normal Rust error to show to end users. This makes the underlying failure modes explicit and gives more information to end users for debugging the problem. As a special case, [`DecompressionReader`](struct.DecompressionReader.html) provides a way to decompress arbitrary files by matching their file extensions up with corresponding decompression programs (such as `gzip` and `xz`). This is useful as a means of performing simplistic decompression in a portable manner without binding to specific compression libraries. This does come with some overhead though, so if you need to decompress lots of small files, this may not be an appropriate convenience to use. Each reader has a corresponding builder for additional configuration, such as whether to read stderr asynchronously in order to avoid deadlock (which is enabled by default). # Miscellaneous parsing The [`parse_human_readable_size`](fn.parse_human_readable_size.html) routine parses strings like `2M` and converts them to the corresponding number of bytes (`2 * 1<<20` in this case). If an invalid size is found, then a good error message is crafted that typically tells the user how to fix the problem. */ #![deny(missing_docs)] mod decompress; mod escape; mod human; mod pattern; mod process; mod wtr; pub use crate::decompress::{ resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader, DecompressionReaderBuilder, }; pub use crate::escape::{escape, escape_os, unescape, unescape_os}; pub use crate::human::{parse_human_readable_size, ParseSizeError}; pub use crate::pattern::{ pattern_from_bytes, pattern_from_os, patterns_from_path, patterns_from_reader, patterns_from_stdin, InvalidPatternError, }; pub use crate::process::{CommandError, CommandReader, CommandReaderBuilder}; pub use crate::wtr::{ stdout, stdout_buffered_block, stdout_buffered_line, StandardStream, }; /// Returns true if and only if stdin is believed to be readable. /// /// When stdin is readable, command line programs may choose to behave /// differently than when stdin is not readable. For example, `command foo` /// might search the current directory for occurrences of `foo` where as /// `command foo < some-file` or `cat some-file | command foo` might instead /// only search stdin for occurrences of `foo`. pub fn is_readable_stdin() -> bool { #[cfg(unix)] fn imp() -> bool { use same_file::Handle; use std::os::unix::fs::FileTypeExt; let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { Err(_) => return false, Ok(md) => md.file_type(), }; ft.is_file() || ft.is_fifo() || ft.is_socket() } #[cfg(windows)] fn imp() -> bool { use winapi_util as winutil; winutil::file::typ(winutil::HandleRef::stdin()) .map(|t| t.is_disk() || t.is_pipe()) .unwrap_or(false) } !is_tty_stdin() && imp() } /// Returns true if and only if stdin is believed to be connectted to a tty /// or a console. pub fn is_tty_stdin() -> bool { atty::is(atty::Stream::Stdin) } /// Returns true if and only if stdout is believed to be connectted to a tty /// or a console. /// /// This is useful for when you want your command line program to produce /// different output depending on whether it's printing directly to a user's /// terminal or whether it's being redirected somewhere else. For example, /// implementations of `ls` will often show one item per line when stdout is /// redirected, but will condensed output when printing to a tty. pub fn is_tty_stdout() -> bool { atty::is(atty::Stream::Stdout) } /// Returns true if and only if stderr is believed to be connectted to a tty /// or a console. pub fn is_tty_stderr() -> bool { atty::is(atty::Stream::Stderr) } grep-cli-0.1.6/src/pattern.rs000064400000000000000000000134100072674642500141730ustar 00000000000000use std::error; use std::ffi::OsStr; use std::fmt; use std::fs::File; use std::io; use std::path::Path; use std::str; use bstr::io::BufReadExt; use crate::escape::{escape, escape_os}; /// An error that occurs when a pattern could not be converted to valid UTF-8. /// /// The purpose of this error is to give a more targeted failure mode for /// patterns written by end users that are not valid UTF-8. #[derive(Clone, Debug, Eq, PartialEq)] pub struct InvalidPatternError { original: String, valid_up_to: usize, } impl InvalidPatternError { /// Returns the index in the given string up to which valid UTF-8 was /// verified. pub fn valid_up_to(&self) -> usize { self.valid_up_to } } impl error::Error for InvalidPatternError { fn description(&self) -> &str { "invalid pattern" } } impl fmt::Display for InvalidPatternError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "found invalid UTF-8 in pattern at byte offset {}: {} \ (disable Unicode mode and use hex escape sequences to match \ arbitrary bytes in a pattern, e.g., '(?-u)\\xFF')", self.valid_up_to, self.original, ) } } impl From for io::Error { fn from(paterr: InvalidPatternError) -> io::Error { io::Error::new(io::ErrorKind::Other, paterr) } } /// Convert an OS string into a regular expression pattern. /// /// This conversion fails if the given pattern is not valid UTF-8, in which /// case, a targeted error with more information about where the invalid UTF-8 /// occurs is given. The error also suggests the use of hex escape sequences, /// which are supported by many regex engines. pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> { pattern.to_str().ok_or_else(|| { let valid_up_to = pattern .to_string_lossy() .find('\u{FFFD}') .expect("a Unicode replacement codepoint for invalid UTF-8"); InvalidPatternError { original: escape_os(pattern), valid_up_to } }) } /// Convert arbitrary bytes into a regular expression pattern. /// /// This conversion fails if the given pattern is not valid UTF-8, in which /// case, a targeted error with more information about where the invalid UTF-8 /// occurs is given. The error also suggests the use of hex escape sequences, /// which are supported by many regex engines. pub fn pattern_from_bytes( pattern: &[u8], ) -> Result<&str, InvalidPatternError> { str::from_utf8(pattern).map_err(|err| InvalidPatternError { original: escape(pattern), valid_up_to: err.valid_up_to(), }) } /// Read patterns from a file path, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number and the file /// path. pub fn patterns_from_path>(path: P) -> io::Result> { let path = path.as_ref(); let file = File::open(path).map_err(|err| { io::Error::new( io::ErrorKind::Other, format!("{}: {}", path.display(), err), ) })?; patterns_from_reader(file).map_err(|err| { io::Error::new( io::ErrorKind::Other, format!("{}:{}", path.display(), err), ) }) } /// Read patterns from stdin, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number and the fact /// that it came from stdin. pub fn patterns_from_stdin() -> io::Result> { let stdin = io::stdin(); let locked = stdin.lock(); patterns_from_reader(locked).map_err(|err| { io::Error::new(io::ErrorKind::Other, format!(":{}", err)) }) } /// Read patterns from any reader, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number. /// /// Note that this routine uses its own internal buffer, so the caller should /// not provide their own buffered reader if possible. /// /// # Example /// /// This shows how to parse patterns, one per line. /// /// ``` /// use grep_cli::patterns_from_reader; /// /// # fn example() -> Result<(), Box<::std::error::Error>> { /// let patterns = "\ /// foo /// bar\\s+foo /// [a-z]{3} /// "; /// /// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![ /// r"foo", /// r"bar\s+foo", /// r"[a-z]{3}", /// ]); /// # Ok(()) } /// ``` pub fn patterns_from_reader(rdr: R) -> io::Result> { let mut patterns = vec![]; let mut line_number = 0; io::BufReader::new(rdr).for_byte_line(|line| { line_number += 1; match pattern_from_bytes(line) { Ok(pattern) => { patterns.push(pattern.to_string()); Ok(true) } Err(err) => Err(io::Error::new( io::ErrorKind::Other, format!("{}: {}", line_number, err), )), } })?; Ok(patterns) } #[cfg(test)] mod tests { use super::*; #[test] fn bytes() { let pat = b"abc\xFFxyz"; let err = pattern_from_bytes(pat).unwrap_err(); assert_eq!(3, err.valid_up_to()); } #[test] #[cfg(unix)] fn os() { use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; let pat = OsStr::from_bytes(b"abc\xFFxyz"); let err = pattern_from_os(pat).unwrap_err(); assert_eq!(3, err.valid_up_to()); } } grep-cli-0.1.6/src/process.rs000064400000000000000000000261400072674642500142000ustar 00000000000000use std::error; use std::fmt; use std::io::{self, Read}; use std::iter; use std::process; use std::thread::{self, JoinHandle}; /// An error that can occur while running a command and reading its output. /// /// This error can be seamlessly converted to an `io::Error` via a `From` /// implementation. #[derive(Debug)] pub struct CommandError { kind: CommandErrorKind, } #[derive(Debug)] enum CommandErrorKind { Io(io::Error), Stderr(Vec), } impl CommandError { /// Create an error from an I/O error. pub(crate) fn io(ioerr: io::Error) -> CommandError { CommandError { kind: CommandErrorKind::Io(ioerr) } } /// Create an error from the contents of stderr (which may be empty). pub(crate) fn stderr(bytes: Vec) -> CommandError { CommandError { kind: CommandErrorKind::Stderr(bytes) } } /// Returns true if and only if this error has empty data from stderr. pub(crate) fn is_empty(&self) -> bool { match self.kind { CommandErrorKind::Stderr(ref bytes) => bytes.is_empty(), _ => false, } } } impl error::Error for CommandError { fn description(&self) -> &str { "command error" } } impl fmt::Display for CommandError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.kind { CommandErrorKind::Io(ref e) => e.fmt(f), CommandErrorKind::Stderr(ref bytes) => { let msg = String::from_utf8_lossy(bytes); if msg.trim().is_empty() { write!(f, "") } else { let div = iter::repeat('-').take(79).collect::(); write!( f, "\n{div}\n{msg}\n{div}", div = div, msg = msg.trim() ) } } } } } impl From for CommandError { fn from(ioerr: io::Error) -> CommandError { CommandError { kind: CommandErrorKind::Io(ioerr) } } } impl From for io::Error { fn from(cmderr: CommandError) -> io::Error { match cmderr.kind { CommandErrorKind::Io(ioerr) => ioerr, CommandErrorKind::Stderr(_) => { io::Error::new(io::ErrorKind::Other, cmderr) } } } } /// Configures and builds a streaming reader for process output. #[derive(Clone, Debug, Default)] pub struct CommandReaderBuilder { async_stderr: bool, } impl CommandReaderBuilder { /// Create a new builder with the default configuration. pub fn new() -> CommandReaderBuilder { CommandReaderBuilder::default() } /// Build a new streaming reader for the given command's output. /// /// The caller should set everything that's required on the given command /// before building a reader, such as its arguments, environment and /// current working directory. Settings such as the stdout and stderr (but /// not stdin) pipes will be overridden so that they can be controlled by /// the reader. /// /// If there was a problem spawning the given command, then its error is /// returned. pub fn build( &self, command: &mut process::Command, ) -> Result { let mut child = command .stdout(process::Stdio::piped()) .stderr(process::Stdio::piped()) .spawn()?; let stderr = if self.async_stderr { StderrReader::r#async(child.stderr.take().unwrap()) } else { StderrReader::sync(child.stderr.take().unwrap()) }; Ok(CommandReader { child, stderr, eof: false }) } /// When enabled, the reader will asynchronously read the contents of the /// command's stderr output. When disabled, stderr is only read after the /// stdout stream has been exhausted (or if the process quits with an error /// code). /// /// Note that when enabled, this may require launching an additional /// thread in order to read stderr. This is done so that the process being /// executed is never blocked from writing to stdout or stderr. If this is /// disabled, then it is possible for the process to fill up the stderr /// buffer and deadlock. /// /// This is enabled by default. pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder { self.async_stderr = yes; self } } /// A streaming reader for a command's output. /// /// The purpose of this reader is to provide an easy way to execute processes /// whose stdout is read in a streaming way while also making the processes' /// stderr available when the process fails with an exit code. This makes it /// possible to execute processes while surfacing the underlying failure mode /// in the case of an error. /// /// Moreover, by default, this reader will asynchronously read the processes' /// stderr. This prevents subtle deadlocking bugs for noisy processes that /// write a lot to stderr. Currently, the entire contents of stderr is read /// on to the heap. /// /// # Example /// /// This example shows how to invoke `gzip` to decompress the contents of a /// file. If the `gzip` command reports a failing exit status, then its stderr /// is returned as an error. /// /// ```no_run /// use std::io::Read; /// use std::process::Command; /// use grep_cli::CommandReader; /// /// # fn example() -> Result<(), Box<::std::error::Error>> { /// let mut cmd = Command::new("gzip"); /// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz"); /// /// let mut rdr = CommandReader::new(&mut cmd)?; /// let mut contents = vec![]; /// rdr.read_to_end(&mut contents)?; /// # Ok(()) } /// ``` #[derive(Debug)] pub struct CommandReader { child: process::Child, stderr: StderrReader, /// This is set to true once 'read' returns zero bytes. When this isn't /// set and we close the reader, then we anticipate a pipe error when /// reaping the child process and silence it. eof: bool, } impl CommandReader { /// Create a new streaming reader for the given command using the default /// configuration. /// /// The caller should set everything that's required on the given command /// before building a reader, such as its arguments, environment and /// current working directory. Settings such as the stdout and stderr (but /// not stdin) pipes will be overridden so that they can be controlled by /// the reader. /// /// If there was a problem spawning the given command, then its error is /// returned. /// /// If the caller requires additional configuration for the reader /// returned, then use /// [`CommandReaderBuilder`](struct.CommandReaderBuilder.html). pub fn new( cmd: &mut process::Command, ) -> Result { CommandReaderBuilder::new().build(cmd) } /// Closes the CommandReader, freeing any resources used by its underlying /// child process. If the child process exits with a nonzero exit code, the /// returned Err value will include its stderr. /// /// `close` is idempotent, meaning it can be safely called multiple times. /// The first call closes the CommandReader and any subsequent calls do /// nothing. /// /// This method should be called after partially reading a file to prevent /// resource leakage. However there is no need to call `close` explicitly /// if your code always calls `read` to EOF, as `read` takes care of /// calling `close` in this case. /// /// `close` is also called in `drop` as a last line of defense against /// resource leakage. Any error from the child process is then printed as a /// warning to stderr. This can be avoided by explictly calling `close` /// before the CommandReader is dropped. pub fn close(&mut self) -> io::Result<()> { // Dropping stdout closes the underlying file descriptor, which should // cause a well-behaved child process to exit. If child.stdout is None // we assume that close() has already been called and do nothing. let stdout = match self.child.stdout.take() { None => return Ok(()), Some(stdout) => stdout, }; drop(stdout); if self.child.wait()?.success() { Ok(()) } else { let err = self.stderr.read_to_end(); // In the specific case where we haven't consumed the full data // from the child process, then closing stdout above results in // a pipe signal being thrown in most cases. But I don't think // there is any reliable and portable way of detecting it. Instead, // if we know we haven't hit EOF (so we anticipate a broken pipe // error) and if stderr otherwise doesn't have anything on it, then // we assume total success. if !self.eof && err.is_empty() { return Ok(()); } Err(io::Error::from(err)) } } } impl Drop for CommandReader { fn drop(&mut self) { if let Err(error) = self.close() { log::warn!("{}", error); } } } impl io::Read for CommandReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { let stdout = match self.child.stdout { None => return Ok(0), Some(ref mut stdout) => stdout, }; let nread = stdout.read(buf)?; if nread == 0 { self.eof = true; self.close().map(|_| 0) } else { Ok(nread) } } } /// A reader that encapsulates the asynchronous or synchronous reading of /// stderr. #[derive(Debug)] enum StderrReader { Async(Option>), Sync(process::ChildStderr), } impl StderrReader { /// Create a reader for stderr that reads contents asynchronously. fn r#async(mut stderr: process::ChildStderr) -> StderrReader { let handle = thread::spawn(move || stderr_to_command_error(&mut stderr)); StderrReader::Async(Some(handle)) } /// Create a reader for stderr that reads contents synchronously. fn sync(stderr: process::ChildStderr) -> StderrReader { StderrReader::Sync(stderr) } /// Consumes all of stderr on to the heap and returns it as an error. /// /// If there was a problem reading stderr itself, then this returns an I/O /// command error. fn read_to_end(&mut self) -> CommandError { match *self { StderrReader::Async(ref mut handle) => { let handle = handle .take() .expect("read_to_end cannot be called more than once"); handle.join().expect("stderr reading thread does not panic") } StderrReader::Sync(ref mut stderr) => { stderr_to_command_error(stderr) } } } } fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError { let mut bytes = vec![]; match stderr.read_to_end(&mut bytes) { Ok(_) => CommandError::stderr(bytes), Err(err) => CommandError::io(err), } } grep-cli-0.1.6/src/wtr.rs000064400000000000000000000103210072674642500133300ustar 00000000000000use std::io; use termcolor; use crate::is_tty_stdout; /// A writer that supports coloring with either line or block buffering. pub struct StandardStream(StandardStreamKind); /// Returns a possibly buffered writer to stdout for the given color choice. /// /// The writer returned is either line buffered or block buffered. The decision /// between these two is made automatically based on whether a tty is attached /// to stdout or not. If a tty is attached, then line buffering is used. /// Otherwise, block buffering is used. In general, block buffering is more /// efficient, but may increase the time it takes for the end user to see the /// first bits of output. /// /// If you need more fine grained control over the buffering mode, then use one /// of `stdout_buffered_line` or `stdout_buffered_block`. /// /// The color choice given is passed along to the underlying writer. To /// completely disable colors in all cases, use `ColorChoice::Never`. pub fn stdout(color_choice: termcolor::ColorChoice) -> StandardStream { if is_tty_stdout() { stdout_buffered_line(color_choice) } else { stdout_buffered_block(color_choice) } } /// Returns a line buffered writer to stdout for the given color choice. /// /// This writer is useful when printing results directly to a tty such that /// users see output as soon as it's written. The downside of this approach /// is that it can be slower, especially when there is a lot of output. /// /// You might consider using /// [`stdout`](fn.stdout.html) /// instead, which chooses the buffering strategy automatically based on /// whether stdout is connected to a tty. pub fn stdout_buffered_line( color_choice: termcolor::ColorChoice, ) -> StandardStream { let out = termcolor::StandardStream::stdout(color_choice); StandardStream(StandardStreamKind::LineBuffered(out)) } /// Returns a block buffered writer to stdout for the given color choice. /// /// This writer is useful when printing results to a file since it amortizes /// the cost of writing data. The downside of this approach is that it can /// increase the latency of display output when writing to a tty. /// /// You might consider using /// [`stdout`](fn.stdout.html) /// instead, which chooses the buffering strategy automatically based on /// whether stdout is connected to a tty. pub fn stdout_buffered_block( color_choice: termcolor::ColorChoice, ) -> StandardStream { let out = termcolor::BufferedStandardStream::stdout(color_choice); StandardStream(StandardStreamKind::BlockBuffered(out)) } enum StandardStreamKind { LineBuffered(termcolor::StandardStream), BlockBuffered(termcolor::BufferedStandardStream), } impl io::Write for StandardStream { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.write(buf), BlockBuffered(ref mut w) => w.write(buf), } } #[inline] fn flush(&mut self) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.flush(), BlockBuffered(ref mut w) => w.flush(), } } } impl termcolor::WriteColor for StandardStream { #[inline] fn supports_color(&self) -> bool { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref w) => w.supports_color(), BlockBuffered(ref w) => w.supports_color(), } } #[inline] fn set_color(&mut self, spec: &termcolor::ColorSpec) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.set_color(spec), BlockBuffered(ref mut w) => w.set_color(spec), } } #[inline] fn reset(&mut self) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.reset(), BlockBuffered(ref mut w) => w.reset(), } } #[inline] fn is_synchronous(&self) -> bool { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref w) => w.is_synchronous(), BlockBuffered(ref w) => w.is_synchronous(), } } }