grep-cli-0.1.11/.cargo_vcs_info.json0000644000000001500000000000100126120ustar { "git": { "sha1": "7943bdfe82182de1bf6d05cbd9cd87d2c32a3130" }, "path_in_vcs": "crates/cli" }grep-cli-0.1.11/Cargo.toml0000644000000026230000000000100106170ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "grep-cli" version = "0.1.11" authors = ["Andrew Gallant "] build = false autobins = false autoexamples = false autotests = false autobenches = false description = """ Utilities for search oriented command line applications. """ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" documentation = "https://docs.rs/grep-cli" readme = "README.md" keywords = [ "regex", "grep", "cli", "utility", "util", ] license = "Unlicense OR MIT" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" [lib] name = "grep_cli" path = "src/lib.rs" [dependencies.bstr] version = "1.6.2" features = ["std"] [dependencies.globset] version = "0.4.15" [dependencies.log] version = "0.4.20" [dependencies.termcolor] version = "1.3.0" [target."cfg(unix)".dependencies.libc] version = "0.2.148" [target."cfg(windows)".dependencies.winapi-util] version = "0.1.6" grep-cli-0.1.11/Cargo.toml.orig000064400000000000000000000014171046102023000143000ustar 00000000000000[package] name = "grep-cli" version = "0.1.11" #:version authors = ["Andrew Gallant "] description = """ Utilities for search oriented command line applications. """ documentation = "https://docs.rs/grep-cli" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli" readme = "README.md" keywords = ["regex", "grep", "cli", "utility", "util"] license = "Unlicense OR MIT" edition = "2021" [dependencies] bstr = { version = "1.6.2", features = ["std"] } globset = { version = "0.4.15", path = "../globset" } log = "0.4.20" termcolor = "1.3.0" [target.'cfg(windows)'.dependencies.winapi-util] version = "0.1.6" [target.'cfg(unix)'.dependencies.libc] version = "0.2.148" grep-cli-0.1.11/LICENSE-MIT000064400000000000000000000020711046102023000130420ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Andrew Gallant Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. grep-cli-0.1.11/README.md000064400000000000000000000016301046102023000126650ustar 00000000000000grep-cli -------- A utility library that provides common routines desired in search oriented command line applications. This includes, but is not limited to, parsing hex escapes, detecting whether stdin is readable and more. To the extent possible, this crate strives for compatibility across Windows, macOS and Linux. [![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) [![](https://img.shields.io/crates/v/grep-cli.svg)](https://crates.io/crates/grep-cli) Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation [https://docs.rs/grep-cli](https://docs.rs/grep-cli) **NOTE:** You probably don't want to use this crate directly. Instead, you should prefer the facade defined in the [`grep`](https://docs.rs/grep) crate. ### Usage Add this to your `Cargo.toml`: ```toml [dependencies] grep-cli = "0.1" ``` grep-cli-0.1.11/UNLICENSE000064400000000000000000000022731046102023000126620ustar 00000000000000This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to grep-cli-0.1.11/src/decompress.rs000064400000000000000000000472311046102023000147160ustar 00000000000000use std::{ ffi::{OsStr, OsString}, fs::File, io, path::{Path, PathBuf}, process::Command, }; use globset::{Glob, GlobSet, GlobSetBuilder}; use crate::process::{CommandError, CommandReader, CommandReaderBuilder}; /// A builder for a matcher that determines which files get decompressed. #[derive(Clone, Debug)] pub struct DecompressionMatcherBuilder { /// The commands for each matching glob. commands: Vec, /// Whether to include the default matching rules. defaults: bool, } /// A representation of a single command for decompressing data /// out-of-process. #[derive(Clone, Debug)] struct DecompressionCommand { /// The glob that matches this command. glob: String, /// The command or binary name. bin: PathBuf, /// The arguments to invoke with the command. args: Vec, } impl Default for DecompressionMatcherBuilder { fn default() -> DecompressionMatcherBuilder { DecompressionMatcherBuilder::new() } } impl DecompressionMatcherBuilder { /// Create a new builder for configuring a decompression matcher. pub fn new() -> DecompressionMatcherBuilder { DecompressionMatcherBuilder { commands: vec![], defaults: true } } /// Build a matcher for determining how to decompress files. /// /// If there was a problem compiling the matcher, then an error is /// returned. pub fn build(&self) -> Result { let defaults = if !self.defaults { vec![] } else { default_decompression_commands() }; let mut glob_builder = GlobSetBuilder::new(); let mut commands = vec![]; for decomp_cmd in defaults.iter().chain(&self.commands) { let glob = Glob::new(&decomp_cmd.glob).map_err(|err| { CommandError::io(io::Error::new(io::ErrorKind::Other, err)) })?; glob_builder.add(glob); commands.push(decomp_cmd.clone()); } let globs = glob_builder.build().map_err(|err| { CommandError::io(io::Error::new(io::ErrorKind::Other, err)) })?; Ok(DecompressionMatcher { globs, commands }) } /// When enabled, the default matching rules will be compiled into this /// matcher before any other associations. When disabled, only the /// rules explicitly given to this builder will be used. /// /// This is enabled by default. pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder { self.defaults = yes; self } /// Associates a glob with a command to decompress files matching the glob. /// /// If multiple globs match the same file, then the most recently added /// glob takes precedence. /// /// The syntax for the glob is documented in the /// [`globset` crate](https://docs.rs/globset/#syntax). /// /// The `program` given is resolved with respect to `PATH` and turned /// into an absolute path internally before being executed by the current /// platform. Notably, on Windows, this avoids a security problem where /// passing a relative path to `CreateProcess` will automatically search /// the current directory for a matching program. If the program could /// not be resolved, then it is silently ignored and the association is /// dropped. For this reason, callers should prefer `try_associate`. pub fn associate( &mut self, glob: &str, program: P, args: I, ) -> &mut DecompressionMatcherBuilder where P: AsRef, I: IntoIterator, A: AsRef, { let _ = self.try_associate(glob, program, args); self } /// Associates a glob with a command to decompress files matching the glob. /// /// If multiple globs match the same file, then the most recently added /// glob takes precedence. /// /// The syntax for the glob is documented in the /// [`globset` crate](https://docs.rs/globset/#syntax). /// /// The `program` given is resolved with respect to `PATH` and turned /// into an absolute path internally before being executed by the current /// platform. Notably, on Windows, this avoids a security problem where /// passing a relative path to `CreateProcess` will automatically search /// the current directory for a matching program. If the program could not /// be resolved, then an error is returned. pub fn try_associate( &mut self, glob: &str, program: P, args: I, ) -> Result<&mut DecompressionMatcherBuilder, CommandError> where P: AsRef, I: IntoIterator, A: AsRef, { let glob = glob.to_string(); let bin = try_resolve_binary(Path::new(program.as_ref()))?; let args = args.into_iter().map(|a| a.as_ref().to_os_string()).collect(); self.commands.push(DecompressionCommand { glob, bin, args }); Ok(self) } } /// A matcher for determining how to decompress files. #[derive(Clone, Debug)] pub struct DecompressionMatcher { /// The set of globs to match. Each glob has a corresponding entry in /// `commands`. When a glob matches, the corresponding command should be /// used to perform out-of-process decompression. globs: GlobSet, /// The commands for each matching glob. commands: Vec, } impl Default for DecompressionMatcher { fn default() -> DecompressionMatcher { DecompressionMatcher::new() } } impl DecompressionMatcher { /// Create a new matcher with default rules. /// /// To add more matching rules, build a matcher with /// [`DecompressionMatcherBuilder`]. pub fn new() -> DecompressionMatcher { DecompressionMatcherBuilder::new() .build() .expect("built-in matching rules should always compile") } /// Return a pre-built command based on the given file path that can /// decompress its contents. If no such decompressor is known, then this /// returns `None`. /// /// If there are multiple possible commands matching the given path, then /// the command added last takes precedence. pub fn command>(&self, path: P) -> Option { for i in self.globs.matches(path).into_iter().rev() { let decomp_cmd = &self.commands[i]; let mut cmd = Command::new(&decomp_cmd.bin); cmd.args(&decomp_cmd.args); return Some(cmd); } None } /// Returns true if and only if the given file path has at least one /// matching command to perform decompression on. pub fn has_command>(&self, path: P) -> bool { self.globs.is_match(path) } } /// Configures and builds a streaming reader for decompressing data. #[derive(Clone, Debug, Default)] pub struct DecompressionReaderBuilder { matcher: DecompressionMatcher, command_builder: CommandReaderBuilder, } impl DecompressionReaderBuilder { /// Create a new builder with the default configuration. pub fn new() -> DecompressionReaderBuilder { DecompressionReaderBuilder::default() } /// Build a new streaming reader for decompressing data. /// /// If decompression is done out-of-process and if there was a problem /// spawning the process, then its error is logged at the debug level and a /// passthru reader is returned that does no decompression. This behavior /// typically occurs when the given file path matches a decompression /// command, but is executing in an environment where the decompression /// command is not available. /// /// If the given file path could not be matched with a decompression /// strategy, then a passthru reader is returned that does no /// decompression. pub fn build>( &self, path: P, ) -> Result { let path = path.as_ref(); let Some(mut cmd) = self.matcher.command(path) else { return DecompressionReader::new_passthru(path); }; cmd.arg(path); match self.command_builder.build(&mut cmd) { Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }), Err(err) => { log::debug!( "{}: error spawning command '{:?}': {} \ (falling back to uncompressed reader)", path.display(), cmd, err, ); DecompressionReader::new_passthru(path) } } } /// Set the matcher to use to look up the decompression command for each /// file path. /// /// A set of sensible rules is enabled by default. Setting this will /// completely replace the current rules. pub fn matcher( &mut self, matcher: DecompressionMatcher, ) -> &mut DecompressionReaderBuilder { self.matcher = matcher; self } /// Get the underlying matcher currently used by this builder. pub fn get_matcher(&self) -> &DecompressionMatcher { &self.matcher } /// When enabled, the reader will asynchronously read the contents of the /// command's stderr output. When disabled, stderr is only read after the /// stdout stream has been exhausted (or if the process quits with an error /// code). /// /// Note that when enabled, this may require launching an additional /// thread in order to read stderr. This is done so that the process being /// executed is never blocked from writing to stdout or stderr. If this is /// disabled, then it is possible for the process to fill up the stderr /// buffer and deadlock. /// /// This is enabled by default. pub fn async_stderr( &mut self, yes: bool, ) -> &mut DecompressionReaderBuilder { self.command_builder.async_stderr(yes); self } } /// A streaming reader for decompressing the contents of a file. /// /// The purpose of this reader is to provide a seamless way to decompress the /// contents of file using existing tools in the current environment. This is /// meant to be an alternative to using decompression libraries in favor of the /// simplicity and portability of using external commands such as `gzip` and /// `xz`. This does impose the overhead of spawning a process, so other means /// for performing decompression should be sought if this overhead isn't /// acceptable. /// /// A decompression reader comes with a default set of matching rules that are /// meant to associate file paths with the corresponding command to use to /// decompress them. For example, a glob like `*.gz` matches gzip compressed /// files with the command `gzip -d -c`. If a file path does not match any /// existing rules, or if it matches a rule whose command does not exist in the /// current environment, then the decompression reader passes through the /// contents of the underlying file without doing any decompression. /// /// The default matching rules are probably good enough for most cases, and if /// they require revision, pull requests are welcome. In cases where they must /// be changed or extended, they can be customized through the use of /// [`DecompressionMatcherBuilder`] and [`DecompressionReaderBuilder`]. /// /// By default, this reader will asynchronously read the processes' stderr. /// This prevents subtle deadlocking bugs for noisy processes that write a lot /// to stderr. Currently, the entire contents of stderr is read on to the heap. /// /// # Example /// /// This example shows how to read the decompressed contents of a file without /// needing to explicitly choose the decompression command to run. /// /// Note that if you need to decompress multiple files, it is better to use /// `DecompressionReaderBuilder`, which will amortize the cost of compiling the /// matcher. /// /// ```no_run /// use std::{io::Read, process::Command}; /// /// use grep_cli::DecompressionReader; /// /// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?; /// let mut contents = vec![]; /// rdr.read_to_end(&mut contents)?; /// # Ok::<(), Box>(()) /// ``` #[derive(Debug)] pub struct DecompressionReader { rdr: Result, } impl DecompressionReader { /// Build a new streaming reader for decompressing data. /// /// If decompression is done out-of-process and if there was a problem /// spawning the process, then its error is returned. /// /// If the given file path could not be matched with a decompression /// strategy, then a passthru reader is returned that does no /// decompression. /// /// This uses the default matching rules for determining how to decompress /// the given file. To change those matching rules, use /// [`DecompressionReaderBuilder`] and [`DecompressionMatcherBuilder`]. /// /// When creating readers for many paths. it is better to use the builder /// since it will amortize the cost of constructing the matcher. pub fn new>( path: P, ) -> Result { DecompressionReaderBuilder::new().build(path) } /// Creates a new "passthru" decompression reader that reads from the file /// corresponding to the given path without doing decompression and without /// executing another process. fn new_passthru(path: &Path) -> Result { let file = File::open(path)?; Ok(DecompressionReader { rdr: Err(file) }) } /// Closes this reader, freeing any resources used by its underlying child /// process, if one was used. If the child process exits with a nonzero /// exit code, the returned Err value will include its stderr. /// /// `close` is idempotent, meaning it can be safely called multiple times. /// The first call closes the CommandReader and any subsequent calls do /// nothing. /// /// This method should be called after partially reading a file to prevent /// resource leakage. However there is no need to call `close` explicitly /// if your code always calls `read` to EOF, as `read` takes care of /// calling `close` in this case. /// /// `close` is also called in `drop` as a last line of defense against /// resource leakage. Any error from the child process is then printed as a /// warning to stderr. This can be avoided by explicitly calling `close` /// before the CommandReader is dropped. pub fn close(&mut self) -> io::Result<()> { match self.rdr { Ok(ref mut rdr) => rdr.close(), Err(_) => Ok(()), } } } impl io::Read for DecompressionReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self.rdr { Ok(ref mut rdr) => rdr.read(buf), Err(ref mut rdr) => rdr.read(buf), } } } /// Resolves a path to a program to a path by searching for the program in /// `PATH`. /// /// If the program could not be resolved, then an error is returned. /// /// The purpose of doing this instead of passing the path to the program /// directly to Command::new is that Command::new will hand relative paths /// to CreateProcess on Windows, which will implicitly search the current /// working directory for the executable. This could be undesirable for /// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an /// untrusted directory tree could result in arbitrary programs executing on /// Windows. /// /// Note that this could still return a relative path if PATH contains a /// relative path. We permit this since it is assumed that the user has set /// this explicitly, and thus, desires this behavior. /// /// On non-Windows, this is a no-op. pub fn resolve_binary>( prog: P, ) -> Result { if !cfg!(windows) { return Ok(prog.as_ref().to_path_buf()); } try_resolve_binary(prog) } /// Resolves a path to a program to a path by searching for the program in /// `PATH`. /// /// If the program could not be resolved, then an error is returned. /// /// The purpose of doing this instead of passing the path to the program /// directly to Command::new is that Command::new will hand relative paths /// to CreateProcess on Windows, which will implicitly search the current /// working directory for the executable. This could be undesirable for /// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an /// untrusted directory tree could result in arbitrary programs executing on /// Windows. /// /// Note that this could still return a relative path if PATH contains a /// relative path. We permit this since it is assumed that the user has set /// this explicitly, and thus, desires this behavior. /// /// If `check_exists` is false or the path is already an absolute path this /// will return immediately. fn try_resolve_binary>( prog: P, ) -> Result { use std::env; fn is_exe(path: &Path) -> bool { let Ok(md) = path.metadata() else { return false }; !md.is_dir() } let prog = prog.as_ref(); if prog.is_absolute() { return Ok(prog.to_path_buf()); } let Some(syspaths) = env::var_os("PATH") else { let msg = "system PATH environment variable not found"; return Err(CommandError::io(io::Error::new( io::ErrorKind::Other, msg, ))); }; for syspath in env::split_paths(&syspaths) { if syspath.as_os_str().is_empty() { continue; } let abs_prog = syspath.join(prog); if is_exe(&abs_prog) { return Ok(abs_prog.to_path_buf()); } if abs_prog.extension().is_none() { for extension in ["com", "exe"] { let abs_prog = abs_prog.with_extension(extension); if is_exe(&abs_prog) { return Ok(abs_prog.to_path_buf()); } } } } let msg = format!("{}: could not find executable in PATH", prog.display()); return Err(CommandError::io(io::Error::new(io::ErrorKind::Other, msg))); } fn default_decompression_commands() -> Vec { const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"]; const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"]; const ARGS_XZ: &[&str] = &["xz", "-d", "-c"]; const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"]; const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"]; const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"]; const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"]; const ARGS_UNCOMPRESS: &[&str] = &["uncompress", "-c"]; fn add(glob: &str, args: &[&str], cmds: &mut Vec) { let bin = match resolve_binary(Path::new(args[0])) { Ok(bin) => bin, Err(err) => { log::debug!("{}", err); return; } }; cmds.push(DecompressionCommand { glob: glob.to_string(), bin, args: args .iter() .skip(1) .map(|s| OsStr::new(s).to_os_string()) .collect(), }); } let mut cmds = vec![]; add("*.gz", ARGS_GZIP, &mut cmds); add("*.tgz", ARGS_GZIP, &mut cmds); add("*.bz2", ARGS_BZIP, &mut cmds); add("*.tbz2", ARGS_BZIP, &mut cmds); add("*.xz", ARGS_XZ, &mut cmds); add("*.txz", ARGS_XZ, &mut cmds); add("*.lz4", ARGS_LZ4, &mut cmds); add("*.lzma", ARGS_LZMA, &mut cmds); add("*.br", ARGS_BROTLI, &mut cmds); add("*.zst", ARGS_ZSTD, &mut cmds); add("*.zstd", ARGS_ZSTD, &mut cmds); add("*.Z", ARGS_UNCOMPRESS, &mut cmds); cmds } grep-cli-0.1.11/src/escape.rs000064400000000000000000000102501046102023000140010ustar 00000000000000use std::ffi::OsStr; use bstr::{ByteSlice, ByteVec}; /// Escapes arbitrary bytes into a human readable string. /// /// This converts `\t`, `\r` and `\n` into their escaped forms. It also /// converts the non-printable subset of ASCII in addition to invalid UTF-8 /// bytes to hexadecimal escape sequences. Everything else is left as is. /// /// The dual of this routine is [`unescape`]. /// /// # Example /// /// This example shows how to convert a byte string that contains a `\n` and /// invalid UTF-8 bytes into a `String`. /// /// Pay special attention to the use of raw strings. That is, `r"\n"` is /// equivalent to `"\\n"`. /// /// ``` /// use grep_cli::escape; /// /// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz")); /// ``` pub fn escape(bytes: &[u8]) -> String { bytes.escape_bytes().to_string() } /// Escapes an OS string into a human readable string. /// /// This is like [`escape`], but accepts an OS string. pub fn escape_os(string: &OsStr) -> String { escape(Vec::from_os_str_lossy(string).as_bytes()) } /// Unescapes a string. /// /// It supports a limited set of escape sequences: /// /// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes. /// * `\xZZ` hexadecimal escapes are mapped to their byte. /// /// Everything else is left as is, including non-hexadecimal escapes like /// `\xGG`. /// /// This is useful when it is desirable for a command line argument to be /// capable of specifying arbitrary bytes or otherwise make it easier to /// specify non-printable characters. /// /// The dual of this routine is [`escape`]. /// /// # Example /// /// This example shows how to convert an escaped string (which is valid UTF-8) /// into a corresponding sequence of bytes. Each escape sequence is mapped to /// its bytes, which may include invalid UTF-8. /// /// Pay special attention to the use of raw strings. That is, `r"\n"` is /// equivalent to `"\\n"`. /// /// ``` /// use grep_cli::unescape; /// /// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz")); /// ``` pub fn unescape(s: &str) -> Vec { Vec::unescape_bytes(s) } /// Unescapes an OS string. /// /// This is like [`unescape`], but accepts an OS string. /// /// Note that this first lossily decodes the given OS string as UTF-8. That /// is, an escaped string (the thing given) should be valid UTF-8. pub fn unescape_os(string: &OsStr) -> Vec { unescape(&string.to_string_lossy()) } #[cfg(test)] mod tests { use super::{escape, unescape}; fn b(bytes: &'static [u8]) -> Vec { bytes.to_vec() } #[test] fn empty() { assert_eq!(b(b""), unescape(r"")); assert_eq!(r"", escape(b"")); } #[test] fn backslash() { assert_eq!(b(b"\\"), unescape(r"\\")); assert_eq!(r"\\", escape(b"\\")); } #[test] fn nul() { assert_eq!(b(b"\x00"), unescape(r"\x00")); assert_eq!(b(b"\x00"), unescape(r"\0")); assert_eq!(r"\0", escape(b"\x00")); } #[test] fn nl() { assert_eq!(b(b"\n"), unescape(r"\n")); assert_eq!(r"\n", escape(b"\n")); } #[test] fn tab() { assert_eq!(b(b"\t"), unescape(r"\t")); assert_eq!(r"\t", escape(b"\t")); } #[test] fn carriage() { assert_eq!(b(b"\r"), unescape(r"\r")); assert_eq!(r"\r", escape(b"\r")); } #[test] fn nothing_simple() { assert_eq!(b(b"\\a"), unescape(r"\a")); assert_eq!(b(b"\\a"), unescape(r"\\a")); assert_eq!(r"\\a", escape(b"\\a")); } #[test] fn nothing_hex0() { assert_eq!(b(b"\\x"), unescape(r"\x")); assert_eq!(b(b"\\x"), unescape(r"\\x")); assert_eq!(r"\\x", escape(b"\\x")); } #[test] fn nothing_hex1() { assert_eq!(b(b"\\xz"), unescape(r"\xz")); assert_eq!(b(b"\\xz"), unescape(r"\\xz")); assert_eq!(r"\\xz", escape(b"\\xz")); } #[test] fn nothing_hex2() { assert_eq!(b(b"\\xzz"), unescape(r"\xzz")); assert_eq!(b(b"\\xzz"), unescape(r"\\xzz")); assert_eq!(r"\\xzz", escape(b"\\xzz")); } #[test] fn invalid_utf8() { assert_eq!(r"\xFF", escape(b"\xFF")); assert_eq!(r"a\xFFb", escape(b"a\xFFb")); } } grep-cli-0.1.11/src/hostname.rs000064400000000000000000000056531046102023000143720ustar 00000000000000use std::{ffi::OsString, io}; /// Returns the hostname of the current system. /// /// It is unusual, although technically possible, for this routine to return /// an error. It is difficult to list out the error conditions, but one such /// possibility is platform support. /// /// # Platform specific behavior /// /// On Windows, this currently uses the "physical DNS hostname" computer name. /// This may change in the future. /// /// On Unix, this returns the result of the `gethostname` function from the /// `libc` linked into the program. pub fn hostname() -> io::Result { #[cfg(windows)] { use winapi_util::sysinfo::{get_computer_name, ComputerNameKind}; get_computer_name(ComputerNameKind::PhysicalDnsHostname) } #[cfg(unix)] { gethostname() } #[cfg(not(any(windows, unix)))] { Err(io::Error::new( io::ErrorKind::Other, "hostname could not be found on unsupported platform", )) } } #[cfg(unix)] fn gethostname() -> io::Result { use std::os::unix::ffi::OsStringExt; // SAFETY: There don't appear to be any safety requirements for calling // sysconf. let limit = unsafe { libc::sysconf(libc::_SC_HOST_NAME_MAX) }; if limit == -1 { // It is in theory possible for sysconf to return -1 for a limit but // *not* set errno, in which case, io::Error::last_os_error is // indeterminate. But untangling that is super annoying because std // doesn't expose any unix-specific APIs for inspecting the errno. (We // could do it ourselves, but it just doesn't seem worth doing?) return Err(io::Error::last_os_error()); } let Ok(maxlen) = usize::try_from(limit) else { let msg = format!("host name max limit ({}) overflowed usize", limit); return Err(io::Error::new(io::ErrorKind::Other, msg)); }; // maxlen here includes the NUL terminator. let mut buf = vec![0; maxlen]; // SAFETY: The pointer we give is valid as it is derived directly from a // Vec. Similarly, `maxlen` is the length of our Vec, and is thus valid // to write to. let rc = unsafe { libc::gethostname(buf.as_mut_ptr().cast::(), maxlen) }; if rc == -1 { return Err(io::Error::last_os_error()); } // POSIX says that if the hostname is bigger than `maxlen`, then it may // write a truncate name back that is not necessarily NUL terminated (wtf, // lol). So if we can't find a NUL terminator, then just give up. let Some(zeropos) = buf.iter().position(|&b| b == 0) else { let msg = "could not find NUL terminator in hostname"; return Err(io::Error::new(io::ErrorKind::Other, msg)); }; buf.truncate(zeropos); buf.shrink_to_fit(); Ok(OsString::from_vec(buf)) } #[cfg(test)] mod tests { use super::*; #[test] fn print_hostname() { println!("{:?}", hostname().unwrap()); } } grep-cli-0.1.11/src/human.rs000064400000000000000000000102021046102023000136460ustar 00000000000000/// An error that occurs when parsing a human readable size description. /// /// This error provides an end user friendly message describing why the /// description couldn't be parsed and what the expected format is. #[derive(Clone, Debug, Eq, PartialEq)] pub struct ParseSizeError { original: String, kind: ParseSizeErrorKind, } #[derive(Clone, Debug, Eq, PartialEq)] enum ParseSizeErrorKind { InvalidFormat, InvalidInt(std::num::ParseIntError), Overflow, } impl ParseSizeError { fn format(original: &str) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::InvalidFormat, } } fn int(original: &str, err: std::num::ParseIntError) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::InvalidInt(err), } } fn overflow(original: &str) -> ParseSizeError { ParseSizeError { original: original.to_string(), kind: ParseSizeErrorKind::Overflow, } } } impl std::error::Error for ParseSizeError {} impl std::fmt::Display for ParseSizeError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use self::ParseSizeErrorKind::*; match self.kind { InvalidFormat => write!( f, "invalid format for size '{}', which should be a non-empty \ sequence of digits followed by an optional 'K', 'M' or 'G' \ suffix", self.original ), InvalidInt(ref err) => write!( f, "invalid integer found in size '{}': {}", self.original, err ), Overflow => write!(f, "size too big in '{}'", self.original), } } } impl From for std::io::Error { fn from(size_err: ParseSizeError) -> std::io::Error { std::io::Error::new(std::io::ErrorKind::Other, size_err) } } /// Parse a human readable size like `2M` into a corresponding number of bytes. /// /// Supported size suffixes are `K` (for kilobyte), `M` (for megabyte) and `G` /// (for gigabyte). If a size suffix is missing, then the size is interpreted /// as bytes. If the size is too big to fit into a `u64`, then this returns an /// error. /// /// Additional suffixes may be added over time. pub fn parse_human_readable_size(size: &str) -> Result { let digits_end = size.as_bytes().iter().take_while(|&b| b.is_ascii_digit()).count(); let digits = &size[..digits_end]; if digits.is_empty() { return Err(ParseSizeError::format(size)); } let value = digits.parse::().map_err(|e| ParseSizeError::int(size, e))?; let suffix = &size[digits_end..]; if suffix.is_empty() { return Ok(value); } let bytes = match suffix { "K" => value.checked_mul(1 << 10), "M" => value.checked_mul(1 << 20), "G" => value.checked_mul(1 << 30), _ => return Err(ParseSizeError::format(size)), }; bytes.ok_or_else(|| ParseSizeError::overflow(size)) } #[cfg(test)] mod tests { use super::*; #[test] fn suffix_none() { let x = parse_human_readable_size("123").unwrap(); assert_eq!(123, x); } #[test] fn suffix_k() { let x = parse_human_readable_size("123K").unwrap(); assert_eq!(123 * (1 << 10), x); } #[test] fn suffix_m() { let x = parse_human_readable_size("123M").unwrap(); assert_eq!(123 * (1 << 20), x); } #[test] fn suffix_g() { let x = parse_human_readable_size("123G").unwrap(); assert_eq!(123 * (1 << 30), x); } #[test] fn invalid_empty() { assert!(parse_human_readable_size("").is_err()); } #[test] fn invalid_non_digit() { assert!(parse_human_readable_size("a").is_err()); } #[test] fn invalid_overflow() { assert!(parse_human_readable_size("9999999999999999G").is_err()); } #[test] fn invalid_suffix() { assert!(parse_human_readable_size("123T").is_err()); } } grep-cli-0.1.11/src/lib.rs000064400000000000000000000266621046102023000133250ustar 00000000000000/*! This crate provides common routines used in command line applications, with a focus on routines useful for search oriented applications. As a utility library, there is no central type or function. However, a key focus of this crate is to improve failure modes and provide user friendly error messages when things go wrong. To the best extent possible, everything in this crate works on Windows, macOS and Linux. # Standard I/O [`is_readable_stdin`] determines whether stdin can be usefully read from. It is useful when writing an application that changes behavior based on whether the application was invoked with data on stdin. For example, `rg foo` might recursively search the current working directory for occurrences of `foo`, but `rg foo < file` might only search the contents of `file`. # Coloring and buffering The [`stdout`], [`stdout_buffered_block`] and [`stdout_buffered_line`] routines are alternative constructors for [`StandardStream`]. A `StandardStream` implements `termcolor::WriteColor`, which provides a way to emit colors to terminals. Its key use is the encapsulation of buffering style. Namely, `stdout` will return a line buffered `StandardStream` if and only if stdout is connected to a tty, and will otherwise return a block buffered `StandardStream`. Line buffering is important for use with a tty because it typically decreases the latency at which the end user sees output. Block buffering is used otherwise because it is faster, and redirecting stdout to a file typically doesn't benefit from the decreased latency that line buffering provides. The `stdout_buffered_block` and `stdout_buffered_line` can be used to explicitly set the buffering strategy regardless of whether stdout is connected to a tty or not. # Escaping The [`escape`](crate::escape()), [`escape_os`], [`unescape`] and [`unescape_os`] routines provide a user friendly way of dealing with UTF-8 encoded strings that can express arbitrary bytes. For example, you might want to accept a string containing arbitrary bytes as a command line argument, but most interactive shells make such strings difficult to type. Instead, we can ask users to use escape sequences. For example, `a\xFFz` is itself a valid UTF-8 string corresponding to the following bytes: ```ignore [b'a', b'\\', b'x', b'F', b'F', b'z'] ``` However, we can interpret `\xFF` as an escape sequence with the `unescape`/`unescape_os` routines, which will yield ```ignore [b'a', b'\xFF', b'z'] ``` instead. For example: ``` use grep_cli::unescape; // Note the use of a raw string! assert_eq!(vec![b'a', b'\xFF', b'z'], unescape(r"a\xFFz")); ``` The `escape`/`escape_os` routines provide the reverse transformation, which makes it easy to show user friendly error messages involving arbitrary bytes. # Building patterns Typically, regular expression patterns must be valid UTF-8. However, command line arguments aren't guaranteed to be valid UTF-8. Unfortunately, the standard library's UTF-8 conversion functions from `OsStr`s do not provide good error messages. However, the [`pattern_from_bytes`] and [`pattern_from_os`] do, including reporting exactly where the first invalid UTF-8 byte is seen. Additionally, it can be useful to read patterns from a file while reporting good error messages that include line numbers. The [`patterns_from_path`], [`patterns_from_reader`] and [`patterns_from_stdin`] routines do just that. If any pattern is found that is invalid UTF-8, then the error includes the file path (if available) along with the line number and the byte offset at which the first invalid UTF-8 byte was observed. # Read process output Sometimes a command line application needs to execute other processes and read its stdout in a streaming fashion. The [`CommandReader`] provides this functionality with an explicit goal of improving failure modes. In particular, if the process exits with an error code, then stderr is read and converted into a normal Rust error to show to end users. This makes the underlying failure modes explicit and gives more information to end users for debugging the problem. As a special case, [`DecompressionReader`] provides a way to decompress arbitrary files by matching their file extensions up with corresponding decompression programs (such as `gzip` and `xz`). This is useful as a means of performing simplistic decompression in a portable manner without binding to specific compression libraries. This does come with some overhead though, so if you need to decompress lots of small files, this may not be an appropriate convenience to use. Each reader has a corresponding builder for additional configuration, such as whether to read stderr asynchronously in order to avoid deadlock (which is enabled by default). # Miscellaneous parsing The [`parse_human_readable_size`] routine parses strings like `2M` and converts them to the corresponding number of bytes (`2 * 1<<20` in this case). If an invalid size is found, then a good error message is crafted that typically tells the user how to fix the problem. */ #![deny(missing_docs)] mod decompress; mod escape; mod hostname; mod human; mod pattern; mod process; mod wtr; pub use crate::{ decompress::{ resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader, DecompressionReaderBuilder, }, escape::{escape, escape_os, unescape, unescape_os}, hostname::hostname, human::{parse_human_readable_size, ParseSizeError}, pattern::{ pattern_from_bytes, pattern_from_os, patterns_from_path, patterns_from_reader, patterns_from_stdin, InvalidPatternError, }, process::{CommandError, CommandReader, CommandReaderBuilder}, wtr::{ stdout, stdout_buffered_block, stdout_buffered_line, StandardStream, }, }; /// Returns true if and only if stdin is believed to be readable. /// /// When stdin is readable, command line programs may choose to behave /// differently than when stdin is not readable. For example, `command foo` /// might search the current directory for occurrences of `foo` where as /// `command foo < some-file` or `cat some-file | command foo` might instead /// only search stdin for occurrences of `foo`. /// /// Note that this isn't perfect and essentially corresponds to a heuristic. /// When things are unclear (such as if an error occurs during introspection to /// determine whether stdin is readable), this prefers to return `false`. That /// means it's possible for an end user to pipe something into your program and /// have this return `false` and thus potentially lead to ignoring the user's /// stdin data. While not ideal, this is perhaps better than falsely assuming /// stdin is readable, which would result in blocking forever on reading stdin. /// Regardless, commands should always provide explicit fallbacks to override /// behavior. For example, `rg foo -` will explicitly search stdin and `rg foo /// ./` will explicitly search the current working directory. pub fn is_readable_stdin() -> bool { use std::io::IsTerminal; #[cfg(unix)] fn imp() -> bool { use std::{ fs::File, os::{fd::AsFd, unix::fs::FileTypeExt}, }; let stdin = std::io::stdin(); let fd = match stdin.as_fd().try_clone_to_owned() { Ok(fd) => fd, Err(err) => { log::debug!( "for heuristic stdin detection on Unix, \ could not clone stdin file descriptor \ (thus assuming stdin is not readable): {err}", ); return false; } }; let file = File::from(fd); let md = match file.metadata() { Ok(md) => md, Err(err) => { log::debug!( "for heuristic stdin detection on Unix, \ could not get file metadata for stdin \ (thus assuming stdin is not readable): {err}", ); return false; } }; let ft = md.file_type(); let is_file = ft.is_file(); let is_fifo = ft.is_fifo(); let is_socket = ft.is_socket(); let is_readable = is_file || is_fifo || is_socket; log::debug!( "for heuristic stdin detection on Unix, \ found that \ is_file={is_file}, is_fifo={is_fifo} and is_socket={is_socket}, \ and thus concluded that is_stdin_readable={is_readable}", ); is_readable } #[cfg(windows)] fn imp() -> bool { let stdin = winapi_util::HandleRef::stdin(); let typ = match winapi_util::file::typ(stdin) { Ok(typ) => typ, Err(err) => { log::debug!( "for heuristic stdin detection on Windows, \ could not get file type of stdin \ (thus assuming stdin is not readable): {err}", ); return false; } }; let is_disk = typ.is_disk(); let is_pipe = typ.is_pipe(); let is_readable = is_disk || is_pipe; log::debug!( "for heuristic stdin detection on Windows, \ found that is_disk={is_disk} and is_pipe={is_pipe}, \ and thus concluded that is_stdin_readable={is_readable}", ); is_readable } #[cfg(not(any(unix, windows)))] fn imp() -> bool { log::debug!("on non-{{Unix,Windows}}, assuming stdin is not readable"); false } !std::io::stdin().is_terminal() && imp() } /// Returns true if and only if stdin is believed to be connected to a tty /// or a console. /// /// Note that this is now just a wrapper around /// [`std::io::IsTerminal`](https://doc.rust-lang.org/std/io/trait.IsTerminal.html). /// Callers should prefer using the `IsTerminal` trait directly. This routine /// is deprecated and will be removed in the next semver incompatible release. #[deprecated(since = "0.1.10", note = "use std::io::IsTerminal instead")] pub fn is_tty_stdin() -> bool { use std::io::IsTerminal; std::io::stdin().is_terminal() } /// Returns true if and only if stdout is believed to be connected to a tty /// or a console. /// /// This is useful for when you want your command line program to produce /// different output depending on whether it's printing directly to a user's /// terminal or whether it's being redirected somewhere else. For example, /// implementations of `ls` will often show one item per line when stdout is /// redirected, but will condensed output when printing to a tty. /// /// Note that this is now just a wrapper around /// [`std::io::IsTerminal`](https://doc.rust-lang.org/std/io/trait.IsTerminal.html). /// Callers should prefer using the `IsTerminal` trait directly. This routine /// is deprecated and will be removed in the next semver incompatible release. #[deprecated(since = "0.1.10", note = "use std::io::IsTerminal instead")] pub fn is_tty_stdout() -> bool { use std::io::IsTerminal; std::io::stdout().is_terminal() } /// Returns true if and only if stderr is believed to be connected to a tty /// or a console. /// /// Note that this is now just a wrapper around /// [`std::io::IsTerminal`](https://doc.rust-lang.org/std/io/trait.IsTerminal.html). /// Callers should prefer using the `IsTerminal` trait directly. This routine /// is deprecated and will be removed in the next semver incompatible release. #[deprecated(since = "0.1.10", note = "use std::io::IsTerminal instead")] pub fn is_tty_stderr() -> bool { use std::io::IsTerminal; std::io::stderr().is_terminal() } grep-cli-0.1.11/src/pattern.rs000064400000000000000000000131711046102023000142230ustar 00000000000000use std::{ffi::OsStr, io, path::Path}; use bstr::io::BufReadExt; use crate::escape::{escape, escape_os}; /// An error that occurs when a pattern could not be converted to valid UTF-8. /// /// The purpose of this error is to give a more targeted failure mode for /// patterns written by end users that are not valid UTF-8. #[derive(Clone, Debug, Eq, PartialEq)] pub struct InvalidPatternError { original: String, valid_up_to: usize, } impl InvalidPatternError { /// Returns the index in the given string up to which valid UTF-8 was /// verified. pub fn valid_up_to(&self) -> usize { self.valid_up_to } } impl std::error::Error for InvalidPatternError {} impl std::fmt::Display for InvalidPatternError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "found invalid UTF-8 in pattern at byte offset {}: {} \ (disable Unicode mode and use hex escape sequences to match \ arbitrary bytes in a pattern, e.g., '(?-u)\\xFF')", self.valid_up_to, self.original, ) } } impl From for io::Error { fn from(paterr: InvalidPatternError) -> io::Error { io::Error::new(io::ErrorKind::Other, paterr) } } /// Convert an OS string into a regular expression pattern. /// /// This conversion fails if the given pattern is not valid UTF-8, in which /// case, a targeted error with more information about where the invalid UTF-8 /// occurs is given. The error also suggests the use of hex escape sequences, /// which are supported by many regex engines. pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> { pattern.to_str().ok_or_else(|| { let valid_up_to = pattern .to_string_lossy() .find('\u{FFFD}') .expect("a Unicode replacement codepoint for invalid UTF-8"); InvalidPatternError { original: escape_os(pattern), valid_up_to } }) } /// Convert arbitrary bytes into a regular expression pattern. /// /// This conversion fails if the given pattern is not valid UTF-8, in which /// case, a targeted error with more information about where the invalid UTF-8 /// occurs is given. The error also suggests the use of hex escape sequences, /// which are supported by many regex engines. pub fn pattern_from_bytes( pattern: &[u8], ) -> Result<&str, InvalidPatternError> { std::str::from_utf8(pattern).map_err(|err| InvalidPatternError { original: escape(pattern), valid_up_to: err.valid_up_to(), }) } /// Read patterns from a file path, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number and the file /// path. pub fn patterns_from_path>(path: P) -> io::Result> { let path = path.as_ref(); let file = std::fs::File::open(path).map_err(|err| { io::Error::new( io::ErrorKind::Other, format!("{}: {}", path.display(), err), ) })?; patterns_from_reader(file).map_err(|err| { io::Error::new( io::ErrorKind::Other, format!("{}:{}", path.display(), err), ) }) } /// Read patterns from stdin, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number and the fact /// that it came from stdin. pub fn patterns_from_stdin() -> io::Result> { let stdin = io::stdin(); let locked = stdin.lock(); patterns_from_reader(locked).map_err(|err| { io::Error::new(io::ErrorKind::Other, format!(":{}", err)) }) } /// Read patterns from any reader, one per line. /// /// If there was a problem reading or if any of the patterns contain invalid /// UTF-8, then an error is returned. If there was a problem with a specific /// pattern, then the error message will include the line number. /// /// Note that this routine uses its own internal buffer, so the caller should /// not provide their own buffered reader if possible. /// /// # Example /// /// This shows how to parse patterns, one per line. /// /// ``` /// use grep_cli::patterns_from_reader; /// /// let patterns = "\ /// foo /// bar\\s+foo /// [a-z]{3} /// "; /// /// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![ /// r"foo", /// r"bar\s+foo", /// r"[a-z]{3}", /// ]); /// # Ok::<(), Box>(()) /// ``` pub fn patterns_from_reader(rdr: R) -> io::Result> { let mut patterns = vec![]; let mut line_number = 0; io::BufReader::new(rdr).for_byte_line(|line| { line_number += 1; match pattern_from_bytes(line) { Ok(pattern) => { patterns.push(pattern.to_string()); Ok(true) } Err(err) => Err(io::Error::new( io::ErrorKind::Other, format!("{}: {}", line_number, err), )), } })?; Ok(patterns) } #[cfg(test)] mod tests { use super::*; #[test] fn bytes() { let pat = b"abc\xFFxyz"; let err = pattern_from_bytes(pat).unwrap_err(); assert_eq!(3, err.valid_up_to()); } #[test] #[cfg(unix)] fn os() { use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; let pat = OsStr::from_bytes(b"abc\xFFxyz"); let err = pattern_from_os(pat).unwrap_err(); assert_eq!(3, err.valid_up_to()); } } grep-cli-0.1.11/src/process.rs000064400000000000000000000256101046102023000142250ustar 00000000000000use std::{ io::{self, Read}, process, }; /// An error that can occur while running a command and reading its output. /// /// This error can be seamlessly converted to an `io::Error` via a `From` /// implementation. #[derive(Debug)] pub struct CommandError { kind: CommandErrorKind, } #[derive(Debug)] enum CommandErrorKind { Io(io::Error), Stderr(Vec), } impl CommandError { /// Create an error from an I/O error. pub(crate) fn io(ioerr: io::Error) -> CommandError { CommandError { kind: CommandErrorKind::Io(ioerr) } } /// Create an error from the contents of stderr (which may be empty). pub(crate) fn stderr(bytes: Vec) -> CommandError { CommandError { kind: CommandErrorKind::Stderr(bytes) } } /// Returns true if and only if this error has empty data from stderr. pub(crate) fn is_empty(&self) -> bool { match self.kind { CommandErrorKind::Stderr(ref bytes) => bytes.is_empty(), _ => false, } } } impl std::error::Error for CommandError {} impl std::fmt::Display for CommandError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.kind { CommandErrorKind::Io(ref e) => e.fmt(f), CommandErrorKind::Stderr(ref bytes) => { let msg = String::from_utf8_lossy(bytes); if msg.trim().is_empty() { write!(f, "") } else { let div = "-".repeat(79); write!( f, "\n{div}\n{msg}\n{div}", div = div, msg = msg.trim() ) } } } } } impl From for CommandError { fn from(ioerr: io::Error) -> CommandError { CommandError { kind: CommandErrorKind::Io(ioerr) } } } impl From for io::Error { fn from(cmderr: CommandError) -> io::Error { match cmderr.kind { CommandErrorKind::Io(ioerr) => ioerr, CommandErrorKind::Stderr(_) => { io::Error::new(io::ErrorKind::Other, cmderr) } } } } /// Configures and builds a streaming reader for process output. #[derive(Clone, Debug, Default)] pub struct CommandReaderBuilder { async_stderr: bool, } impl CommandReaderBuilder { /// Create a new builder with the default configuration. pub fn new() -> CommandReaderBuilder { CommandReaderBuilder::default() } /// Build a new streaming reader for the given command's output. /// /// The caller should set everything that's required on the given command /// before building a reader, such as its arguments, environment and /// current working directory. Settings such as the stdout and stderr (but /// not stdin) pipes will be overridden so that they can be controlled by /// the reader. /// /// If there was a problem spawning the given command, then its error is /// returned. pub fn build( &self, command: &mut process::Command, ) -> Result { let mut child = command .stdout(process::Stdio::piped()) .stderr(process::Stdio::piped()) .spawn()?; let stderr = if self.async_stderr { StderrReader::r#async(child.stderr.take().unwrap()) } else { StderrReader::sync(child.stderr.take().unwrap()) }; Ok(CommandReader { child, stderr, eof: false }) } /// When enabled, the reader will asynchronously read the contents of the /// command's stderr output. When disabled, stderr is only read after the /// stdout stream has been exhausted (or if the process quits with an error /// code). /// /// Note that when enabled, this may require launching an additional /// thread in order to read stderr. This is done so that the process being /// executed is never blocked from writing to stdout or stderr. If this is /// disabled, then it is possible for the process to fill up the stderr /// buffer and deadlock. /// /// This is enabled by default. pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder { self.async_stderr = yes; self } } /// A streaming reader for a command's output. /// /// The purpose of this reader is to provide an easy way to execute processes /// whose stdout is read in a streaming way while also making the processes' /// stderr available when the process fails with an exit code. This makes it /// possible to execute processes while surfacing the underlying failure mode /// in the case of an error. /// /// Moreover, by default, this reader will asynchronously read the processes' /// stderr. This prevents subtle deadlocking bugs for noisy processes that /// write a lot to stderr. Currently, the entire contents of stderr is read /// on to the heap. /// /// # Example /// /// This example shows how to invoke `gzip` to decompress the contents of a /// file. If the `gzip` command reports a failing exit status, then its stderr /// is returned as an error. /// /// ```no_run /// use std::{io::Read, process::Command}; /// /// use grep_cli::CommandReader; /// /// let mut cmd = Command::new("gzip"); /// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz"); /// /// let mut rdr = CommandReader::new(&mut cmd)?; /// let mut contents = vec![]; /// rdr.read_to_end(&mut contents)?; /// # Ok::<(), Box>(()) /// ``` #[derive(Debug)] pub struct CommandReader { child: process::Child, stderr: StderrReader, /// This is set to true once 'read' returns zero bytes. When this isn't /// set and we close the reader, then we anticipate a pipe error when /// reaping the child process and silence it. eof: bool, } impl CommandReader { /// Create a new streaming reader for the given command using the default /// configuration. /// /// The caller should set everything that's required on the given command /// before building a reader, such as its arguments, environment and /// current working directory. Settings such as the stdout and stderr (but /// not stdin) pipes will be overridden so that they can be controlled by /// the reader. /// /// If there was a problem spawning the given command, then its error is /// returned. /// /// If the caller requires additional configuration for the reader /// returned, then use [`CommandReaderBuilder`]. pub fn new( cmd: &mut process::Command, ) -> Result { CommandReaderBuilder::new().build(cmd) } /// Closes the CommandReader, freeing any resources used by its underlying /// child process. If the child process exits with a nonzero exit code, the /// returned Err value will include its stderr. /// /// `close` is idempotent, meaning it can be safely called multiple times. /// The first call closes the CommandReader and any subsequent calls do /// nothing. /// /// This method should be called after partially reading a file to prevent /// resource leakage. However there is no need to call `close` explicitly /// if your code always calls `read` to EOF, as `read` takes care of /// calling `close` in this case. /// /// `close` is also called in `drop` as a last line of defense against /// resource leakage. Any error from the child process is then printed as a /// warning to stderr. This can be avoided by explicitly calling `close` /// before the CommandReader is dropped. pub fn close(&mut self) -> io::Result<()> { // Dropping stdout closes the underlying file descriptor, which should // cause a well-behaved child process to exit. If child.stdout is None // we assume that close() has already been called and do nothing. let stdout = match self.child.stdout.take() { None => return Ok(()), Some(stdout) => stdout, }; drop(stdout); if self.child.wait()?.success() { Ok(()) } else { let err = self.stderr.read_to_end(); // In the specific case where we haven't consumed the full data // from the child process, then closing stdout above results in // a pipe signal being thrown in most cases. But I don't think // there is any reliable and portable way of detecting it. Instead, // if we know we haven't hit EOF (so we anticipate a broken pipe // error) and if stderr otherwise doesn't have anything on it, then // we assume total success. if !self.eof && err.is_empty() { return Ok(()); } Err(io::Error::from(err)) } } } impl Drop for CommandReader { fn drop(&mut self) { if let Err(error) = self.close() { log::warn!("{}", error); } } } impl io::Read for CommandReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { let stdout = match self.child.stdout { None => return Ok(0), Some(ref mut stdout) => stdout, }; let nread = stdout.read(buf)?; if nread == 0 { self.eof = true; self.close().map(|_| 0) } else { Ok(nread) } } } /// A reader that encapsulates the asynchronous or synchronous reading of /// stderr. #[derive(Debug)] enum StderrReader { Async(Option>), Sync(process::ChildStderr), } impl StderrReader { /// Create a reader for stderr that reads contents asynchronously. fn r#async(mut stderr: process::ChildStderr) -> StderrReader { let handle = std::thread::spawn(move || stderr_to_command_error(&mut stderr)); StderrReader::Async(Some(handle)) } /// Create a reader for stderr that reads contents synchronously. fn sync(stderr: process::ChildStderr) -> StderrReader { StderrReader::Sync(stderr) } /// Consumes all of stderr on to the heap and returns it as an error. /// /// If there was a problem reading stderr itself, then this returns an I/O /// command error. fn read_to_end(&mut self) -> CommandError { match *self { StderrReader::Async(ref mut handle) => { let handle = handle .take() .expect("read_to_end cannot be called more than once"); handle.join().expect("stderr reading thread does not panic") } StderrReader::Sync(ref mut stderr) => { stderr_to_command_error(stderr) } } } } fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError { let mut bytes = vec![]; match stderr.read_to_end(&mut bytes) { Ok(_) => CommandError::stderr(bytes), Err(err) => CommandError::io(err), } } grep-cli-0.1.11/src/wtr.rs000064400000000000000000000114071046102023000133620ustar 00000000000000use std::io::{self, IsTerminal}; use termcolor::HyperlinkSpec; /// A writer that supports coloring with either line or block buffering. #[derive(Debug)] pub struct StandardStream(StandardStreamKind); /// Returns a possibly buffered writer to stdout for the given color choice. /// /// The writer returned is either line buffered or block buffered. The decision /// between these two is made automatically based on whether a tty is attached /// to stdout or not. If a tty is attached, then line buffering is used. /// Otherwise, block buffering is used. In general, block buffering is more /// efficient, but may increase the time it takes for the end user to see the /// first bits of output. /// /// If you need more fine grained control over the buffering mode, then use one /// of `stdout_buffered_line` or `stdout_buffered_block`. /// /// The color choice given is passed along to the underlying writer. To /// completely disable colors in all cases, use `ColorChoice::Never`. pub fn stdout(color_choice: termcolor::ColorChoice) -> StandardStream { if std::io::stdout().is_terminal() { stdout_buffered_line(color_choice) } else { stdout_buffered_block(color_choice) } } /// Returns a line buffered writer to stdout for the given color choice. /// /// This writer is useful when printing results directly to a tty such that /// users see output as soon as it's written. The downside of this approach /// is that it can be slower, especially when there is a lot of output. /// /// You might consider using [`stdout`] instead, which chooses the buffering /// strategy automatically based on whether stdout is connected to a tty. pub fn stdout_buffered_line( color_choice: termcolor::ColorChoice, ) -> StandardStream { let out = termcolor::StandardStream::stdout(color_choice); StandardStream(StandardStreamKind::LineBuffered(out)) } /// Returns a block buffered writer to stdout for the given color choice. /// /// This writer is useful when printing results to a file since it amortizes /// the cost of writing data. The downside of this approach is that it can /// increase the latency of display output when writing to a tty. /// /// You might consider using [`stdout`] instead, which chooses the buffering /// strategy automatically based on whether stdout is connected to a tty. pub fn stdout_buffered_block( color_choice: termcolor::ColorChoice, ) -> StandardStream { let out = termcolor::BufferedStandardStream::stdout(color_choice); StandardStream(StandardStreamKind::BlockBuffered(out)) } #[derive(Debug)] enum StandardStreamKind { LineBuffered(termcolor::StandardStream), BlockBuffered(termcolor::BufferedStandardStream), } impl io::Write for StandardStream { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.write(buf), BlockBuffered(ref mut w) => w.write(buf), } } #[inline] fn flush(&mut self) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.flush(), BlockBuffered(ref mut w) => w.flush(), } } } impl termcolor::WriteColor for StandardStream { #[inline] fn supports_color(&self) -> bool { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref w) => w.supports_color(), BlockBuffered(ref w) => w.supports_color(), } } #[inline] fn supports_hyperlinks(&self) -> bool { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref w) => w.supports_hyperlinks(), BlockBuffered(ref w) => w.supports_hyperlinks(), } } #[inline] fn set_color(&mut self, spec: &termcolor::ColorSpec) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.set_color(spec), BlockBuffered(ref mut w) => w.set_color(spec), } } #[inline] fn set_hyperlink(&mut self, link: &HyperlinkSpec) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.set_hyperlink(link), BlockBuffered(ref mut w) => w.set_hyperlink(link), } } #[inline] fn reset(&mut self) -> io::Result<()> { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref mut w) => w.reset(), BlockBuffered(ref mut w) => w.reset(), } } #[inline] fn is_synchronous(&self) -> bool { use self::StandardStreamKind::*; match self.0 { LineBuffered(ref w) => w.is_synchronous(), BlockBuffered(ref w) => w.is_synchronous(), } } }