uucore-0.0.23/.cargo_vcs_info.json0000644000000001500000000000100124140ustar { "git": { "sha1": "a3af2230a64d3b60832ce9944df5388a5ba9128a" }, "path_in_vcs": "src/uucore" }uucore-0.0.23/Cargo.toml0000644000000074610000000000100104260ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "uucore" version = "0.0.23" authors = ["uutils developers"] description = "uutils ~ 'core' uutils code library (cross-platform)" homepage = "https://github.com/uutils/coreutils" keywords = [ "coreutils", "uutils", "cross-platform", "cli", "utility", ] categories = ["command-line-utilities"] license = "MIT" repository = "https://github.com/uutils/coreutils/tree/main/src/uucore" [lib] path = "src/lib/lib.rs" [dependencies.blake2b_simd] version = "1.0.2" optional = true [dependencies.blake3] version = "1.5.0" optional = true [dependencies.clap] version = "4.4" features = [ "wrap_help", "cargo", ] [dependencies.data-encoding] version = "2.4" optional = true [dependencies.data-encoding-macro] version = "0.1.13" optional = true [dependencies.digest] version = "0.10.7" optional = true [dependencies.dns-lookup] version = "2.0.4" optional = true [dependencies.dunce] version = "1.0.4" optional = true [dependencies.glob] version = "0.3.1" [dependencies.hex] version = "0.4.3" optional = true [dependencies.itertools] version = "0.11.0" optional = true [dependencies.libc] version = "0.2.150" optional = true [dependencies.md-5] version = "0.10.6" optional = true [dependencies.memchr] version = "2" optional = true [dependencies.once_cell] version = "1.18.0" [dependencies.os_display] version = "0.1.3" [dependencies.sha1] version = "0.10.6" optional = true [dependencies.sha2] version = "0.10.8" optional = true [dependencies.sha3] version = "0.10.8" optional = true [dependencies.sm3] version = "0.4.2" optional = true [dependencies.thiserror] version = "1.0" optional = true [dependencies.time] version = "0.3" features = [ "formatting", "local-offset", "macros", ] optional = true [dependencies.uucore_procs] version = ">=0.0.19" package = "uucore_procs" [dependencies.wild] version = "2.2" [dependencies.z85] version = "3.0.5" optional = true [dev-dependencies.clap] version = "4.4" features = [ "wrap_help", "cargo", ] [dev-dependencies.once_cell] version = "1.18.0" [dev-dependencies.tempfile] version = "3.8.1" [features] backup-control = [] default = [] encoding = [ "data-encoding", "data-encoding-macro", "z85", "thiserror", ] entries = ["libc"] fs = [ "dunce", "libc", "winapi-util", "windows-sys", ] fsext = [ "libc", "time", "windows-sys", ] lines = [] memo = ["itertools"] mode = ["libc"] perms = [ "libc", "walkdir", ] pipes = [] process = ["libc"] quoting-style = [] ranges = [] ringbuffer = [] signals = [] sum = [ "digest", "hex", "memchr", "md-5", "sha1", "sha2", "sha3", "blake2b_simd", "blake3", "sm3", ] update-control = [] utf8 = [] utmpx = [ "time", "time/macros", "libc", "dns-lookup", ] version-cmp = [] wide = [] [target."cfg(target_os = \"windows\")".dependencies.winapi-util] version = "0.1.6" optional = true [target."cfg(target_os = \"windows\")".dependencies.windows-sys] version = "0.48.0" features = [ "Win32_Storage_FileSystem", "Win32_Foundation", "Win32_System_WindowsProgramming", ] optional = true default-features = false [target."cfg(unix)".dependencies.nix] version = "0.27" features = [ "fs", "uio", "zerocopy", "signal", ] default-features = false [target."cfg(unix)".dependencies.walkdir] version = "2.4" optional = true uucore-0.0.23/Cargo.toml.orig000064400000000000000000000055471046102023000141120ustar 00000000000000# spell-checker:ignore (features) zerocopy [package] name = "uucore" version = "0.0.23" authors = ["uutils developers"] license = "MIT" description = "uutils ~ 'core' uutils code library (cross-platform)" homepage = "https://github.com/uutils/coreutils" repository = "https://github.com/uutils/coreutils/tree/main/src/uucore" # readme = "README.md" keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] categories = ["command-line-utilities"] edition = "2021" [lib] path = "src/lib/lib.rs" [dependencies] clap = { workspace = true } uucore_procs = { workspace = true } dns-lookup = { version = "2.0.4", optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2" glob = { workspace = true } # * optional itertools = { workspace = true, optional = true } thiserror = { workspace = true, optional = true } time = { workspace = true, optional = true, features = [ "formatting", "local-offset", "macros", ] } # * "problem" dependencies (pinned) data-encoding = { version = "2.4", optional = true } data-encoding-macro = { version = "0.1.13", optional = true } z85 = { version = "3.0.5", optional = true } libc = { workspace = true, optional = true } once_cell = { workspace = true } os_display = "0.1.3" digest = { workspace = true, optional = true } hex = { workspace = true, optional = true } memchr = { workspace = true, optional = true } md-5 = { workspace = true, optional = true } sha1 = { workspace = true, optional = true } sha2 = { workspace = true, optional = true } sha3 = { workspace = true, optional = true } blake2b_simd = { workspace = true, optional = true } blake3 = { workspace = true, optional = true } sm3 = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] walkdir = { workspace = true, optional = true } nix = { workspace = true, features = ["fs", "uio", "zerocopy", "signal"] } [dev-dependencies] clap = { workspace = true } once_cell = { workspace = true } tempfile = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies] winapi-util = { workspace = true, optional = true } windows-sys = { workspace = true, optional = true, default-features = false, features = [ "Win32_Storage_FileSystem", "Win32_Foundation", "Win32_System_WindowsProgramming", ] } [features] default = [] # * non-default features backup-control = [] encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"] entries = ["libc"] fs = ["dunce", "libc", "winapi-util", "windows-sys"] fsext = ["libc", "time", "windows-sys"] lines = [] memo = ["itertools"] mode = ["libc"] perms = ["libc", "walkdir"] pipes = [] process = ["libc"] quoting-style = [] ranges = [] ringbuffer = [] signals = [] sum = [ "digest", "hex", "memchr", "md-5", "sha1", "sha2", "sha3", "blake2b_simd", "blake3", "sm3", ] update-control = [] utf8 = [] utmpx = ["time", "time/macros", "libc", "dns-lookup"] version-cmp = [] wide = [] uucore-0.0.23/LICENSE000064400000000000000000000020551046102023000122170ustar 00000000000000Copyright (c) Jordi Boggiano and many others Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. uucore-0.0.23/src/lib/features/backup_control.rs000064400000000000000000000555771046102023000177610ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Implement GNU-style backup functionality. //! //! This module implements the backup functionality as described in the [GNU //! manual][1]. It provides //! //! - pre-defined [`clap`-Arguments][2] for inclusion in utilities that //! implement backups //! - determination of the [backup mode][3] //! - determination of the [backup suffix][4] //! - [backup target path construction][5] //! - [Error types][6] for backup-related errors //! - GNU-compliant [help texts][7] for backup-related errors //! //! Backup-functionality is implemented by the following utilities: //! //! - `cp` //! - `install` //! - `ln` //! - `mv` //! //! //! [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html //! [2]: arguments //! [3]: `determine_backup_mode()` //! [4]: `determine_backup_suffix()` //! [5]: `get_backup_path()` //! [6]: `BackupError` //! [7]: `BACKUP_CONTROL_LONG_HELP` //! //! //! # Usage example //! //! ``` //! #[macro_use] //! extern crate uucore; //! //! use clap::{Command, Arg, ArgMatches}; //! use std::path::{Path, PathBuf}; //! use uucore::backup_control::{self, BackupMode}; //! use uucore::error::{UError, UResult}; //! //! fn main() { //! let usage = String::from("command [OPTION]... ARG"); //! let long_usage = String::from("And here's a detailed explanation"); //! //! let matches = Command::new("command") //! .arg(backup_control::arguments::backup()) //! .arg(backup_control::arguments::backup_no_args()) //! .arg(backup_control::arguments::suffix()) //! .override_usage(usage) //! .after_help(format!( //! "{}\n{}", //! long_usage, //! backup_control::BACKUP_CONTROL_LONG_HELP //! )) //! .get_matches_from(vec![ //! "command", "--backup=t", "--suffix=bak~" //! ]); //! //! let backup_mode = match backup_control::determine_backup_mode(&matches) { //! Err(e) => { //! show!(e); //! return; //! }, //! Ok(mode) => mode, //! }; //! let backup_suffix = backup_control::determine_backup_suffix(&matches); //! let target_path = Path::new("/tmp/example"); //! //! let backup_path = backup_control::get_backup_path( //! backup_mode, target_path, &backup_suffix //! ); //! //! // Perform your backups here. //! //! } //! ``` // spell-checker:ignore backupopt use crate::{ display::Quotable, error::{UError, UResult}, }; use clap::ArgMatches; use std::{ env, error::Error, fmt::{Debug, Display}, path::{Path, PathBuf}, }; pub static BACKUP_CONTROL_VALUES: &[&str] = &[ "simple", "never", "numbered", "t", "existing", "nil", "none", "off", ]; pub const BACKUP_CONTROL_LONG_HELP: &str = "The backup suffix is '~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX. The version control method may be selected via the --backup option or through the VERSION_CONTROL environment variable. Here are the values: none, off never make backups (even if --backup is given) numbered, t make numbered backups existing, nil numbered if numbered backups exist, simple otherwise simple, never always make simple backups"; static VALID_ARGS_HELP: &str = "Valid arguments are: - 'none', 'off' - 'simple', 'never' - 'existing', 'nil' - 'numbered', 't'"; /// Available backup modes. /// /// The mapping of the backup modes to the CLI arguments is annotated on the /// enum variants. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum BackupMode { /// Argument 'none', 'off' NoBackup, /// Argument 'simple', 'never' SimpleBackup, /// Argument 'numbered', 't' NumberedBackup, /// Argument 'existing', 'nil' ExistingBackup, } /// Backup error types. /// /// Errors are currently raised by [`determine_backup_mode`] only. All errors /// are implemented as [`UError`] for uniform handling across utilities. #[derive(Debug, Eq, PartialEq)] pub enum BackupError { /// An invalid argument (e.g. 'foo') was given as backup type. First /// parameter is the argument, second is the arguments origin (CLI or /// ENV-var) InvalidArgument(String, String), /// An ambiguous argument (e.g. 'n') was given as backup type. First /// parameter is the argument, second is the arguments origin (CLI or /// ENV-var) AmbiguousArgument(String, String), /// Currently unused BackupImpossible(), // BackupFailed(PathBuf, PathBuf, std::io::Error), } impl UError for BackupError { fn code(&self) -> i32 { match self { Self::BackupImpossible() => 2, _ => 1, } } fn usage(&self) -> bool { // Suggested by clippy. matches!( self, Self::InvalidArgument(_, _) | Self::AmbiguousArgument(_, _) ) } } impl Error for BackupError {} impl Display for BackupError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::InvalidArgument(arg, origin) => write!( f, "invalid argument {} for '{}'\n{}", arg.quote(), origin, VALID_ARGS_HELP ), Self::AmbiguousArgument(arg, origin) => write!( f, "ambiguous argument {} for '{}'\n{}", arg.quote(), origin, VALID_ARGS_HELP ), Self::BackupImpossible() => write!(f, "cannot create backup"), // Placeholder for later // Self::BackupFailed(from, to, e) => Display::fmt( // &uio_error!(e, "failed to backup {} to {}", from.quote(), to.quote()), // f // ), } } } /// Arguments for backup-related functionality. /// /// Rather than implementing the `clap`-Arguments for every utility, it is /// recommended to include the `clap` arguments via the functions provided here. /// This way the backup-specific arguments are handled uniformly across /// utilities and can be maintained in one central place. pub mod arguments { use clap::ArgAction; pub static OPT_BACKUP: &str = "backupopt_backup"; pub static OPT_BACKUP_NO_ARG: &str = "backupopt_b"; pub static OPT_SUFFIX: &str = "backupopt_suffix"; /// '--backup' argument pub fn backup() -> clap::Arg { clap::Arg::new(OPT_BACKUP) .long("backup") .help("make a backup of each existing destination file") .action(clap::ArgAction::Set) .require_equals(true) .num_args(0..=1) .value_name("CONTROL") } /// '-b' argument pub fn backup_no_args() -> clap::Arg { clap::Arg::new(OPT_BACKUP_NO_ARG) .short('b') .help("like --backup but does not accept an argument") .action(ArgAction::SetTrue) } /// '-S, --suffix' argument pub fn suffix() -> clap::Arg { clap::Arg::new(OPT_SUFFIX) .short('S') .long("suffix") .help("override the usual backup suffix") .action(clap::ArgAction::Set) .value_name("SUFFIX") .allow_hyphen_values(true) } } /// Obtain the suffix to use for a backup. /// /// In order of precedence, this function obtains the backup suffix /// /// 1. From the '-S' or '--suffix' CLI argument, if present /// 2. From the "SIMPLE_BACKUP_SUFFIX" environment variable, if present /// 3. By using the default '~' if none of the others apply /// /// This function directly takes [`clap::ArgMatches`] as argument and looks for /// the '-S' and '--suffix' arguments itself. pub fn determine_backup_suffix(matches: &ArgMatches) -> String { let supplied_suffix = matches.get_one::(arguments::OPT_SUFFIX); if let Some(suffix) = supplied_suffix { String::from(suffix) } else { env::var("SIMPLE_BACKUP_SUFFIX").unwrap_or_else(|_| "~".to_owned()) } } /// Determine the "mode" for the backup operation to perform, if any. /// /// Parses the backup options according to the [GNU manual][1], and converts /// them to an instance of `BackupMode` for further processing. /// /// Takes [`clap::ArgMatches`] as argument which **must** contain the options /// from [`arguments::backup()`] and [`arguments::backup_no_args()`]. Otherwise /// the `NoBackup` mode is returned unconditionally. /// /// It is recommended for anyone who would like to implement the /// backup-functionality to use the arguments prepared in the `arguments` /// submodule (see examples) /// /// [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html /// /// /// # Errors /// /// If an argument supplied directly to the long `backup` option, or read in /// through the `VERSION CONTROL` env var is ambiguous (i.e. may resolve to /// multiple backup modes) or invalid, an [`InvalidArgument`][10] or /// [`AmbiguousArgument`][11] error is returned, respectively. /// /// [10]: BackupError::InvalidArgument /// [11]: BackupError::AmbiguousArgument /// /// /// # Examples /// /// Here's how one would integrate the backup mode determination into an /// application. /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::backup_control::{self, BackupMode}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(backup_control::arguments::backup()) /// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "command", "-b", "--backup=t" /// ]); /// /// let backup_mode = backup_control::determine_backup_mode(&matches).unwrap(); /// assert_eq!(backup_mode, BackupMode::NumberedBackup) /// } /// ``` /// /// This example shows an ambiguous input, as 'n' may resolve to 4 different /// backup modes. /// /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::backup_control::{self, BackupMode, BackupError}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(backup_control::arguments::backup()) /// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "command", "-b", "--backup=n" /// ]); /// /// let backup_mode = backup_control::determine_backup_mode(&matches); /// /// assert!(backup_mode.is_err()); /// let err = backup_mode.unwrap_err(); /// // assert_eq!(err, BackupError::AmbiguousArgument); /// // Use uucore functionality to show the error to the user /// show!(err); /// } /// ``` pub fn determine_backup_mode(matches: &ArgMatches) -> UResult { if matches.contains_id(arguments::OPT_BACKUP) { // Use method to determine the type of backups to make. When this option // is used but method is not specified, then the value of the // VERSION_CONTROL environment variable is used. And if VERSION_CONTROL // is not set, the default backup type is 'existing'. if let Some(method) = matches.get_one::(arguments::OPT_BACKUP) { // Second argument is for the error string that is returned. match_method(method, "backup type") } else if let Ok(method) = env::var("VERSION_CONTROL") { // Second argument is for the error string that is returned. match_method(&method, "$VERSION_CONTROL") } else { // Default if no argument is provided to '--backup' Ok(BackupMode::ExistingBackup) } } else if matches.get_flag(arguments::OPT_BACKUP_NO_ARG) { // the short form of this option, -b does not accept any argument. // Using -b is equivalent to using --backup=existing. Ok(BackupMode::ExistingBackup) } else { // No option was present at all Ok(BackupMode::NoBackup) } } /// Match a backup option string to a `BackupMode`. /// /// The GNU manual specifies that abbreviations to options are valid as long as /// they aren't ambiguous. This function matches the given `method` argument /// against all valid backup options (via `starts_with`), and returns a valid /// [`BackupMode`] if exactly one backup option matches the `method` given. /// /// `origin` is required in order to format the generated error message /// properly, when an error occurs. /// /// /// # Errors /// /// If `method` is invalid or ambiguous (i.e. may resolve to multiple backup /// modes), an [`InvalidArgument`][10] or [`AmbiguousArgument`][11] error is /// returned, respectively. /// /// [10]: BackupError::InvalidArgument /// [11]: BackupError::AmbiguousArgument fn match_method(method: &str, origin: &str) -> UResult { let matches: Vec<&&str> = BACKUP_CONTROL_VALUES .iter() .filter(|val| val.starts_with(method)) .collect(); if matches.len() == 1 { match *matches[0] { "simple" | "never" => Ok(BackupMode::SimpleBackup), "numbered" | "t" => Ok(BackupMode::NumberedBackup), "existing" | "nil" => Ok(BackupMode::ExistingBackup), "none" | "off" => Ok(BackupMode::NoBackup), _ => unreachable!(), // cannot happen as we must have exactly one match // from the list above. } } else if matches.is_empty() { Err(BackupError::InvalidArgument(method.to_string(), origin.to_string()).into()) } else { Err(BackupError::AmbiguousArgument(method.to_string(), origin.to_string()).into()) } } pub fn get_backup_path( backup_mode: BackupMode, backup_path: &Path, suffix: &str, ) -> Option { match backup_mode { BackupMode::NoBackup => None, BackupMode::SimpleBackup => Some(simple_backup_path(backup_path, suffix)), BackupMode::NumberedBackup => Some(numbered_backup_path(backup_path)), BackupMode::ExistingBackup => Some(existing_backup_path(backup_path, suffix)), } } fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { let mut p = path.to_string_lossy().into_owned(); p.push_str(suffix); PathBuf::from(p) } fn numbered_backup_path(path: &Path) -> PathBuf { for i in 1_u64.. { let path_str = &format!("{}.~{}~", path.to_string_lossy(), i); let path = Path::new(path_str); if !path.exists() { return path.to_path_buf(); } } panic!("cannot create backup") } fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { let test_path_str = &format!("{}.~1~", path.to_string_lossy()); let test_path = Path::new(test_path_str); if test_path.exists() { numbered_backup_path(path) } else { simple_backup_path(path, suffix) } } /// Returns true if the source file is likely to be the simple backup file for the target file. /// /// # Arguments /// /// * `source` - A Path reference that holds the source (backup) file path. /// * `target` - A Path reference that holds the target file path. /// * `suffix` - Str that holds the backup suffix. /// /// # Examples /// /// ``` /// use std::path::Path; /// use uucore::backup_control::source_is_target_backup; /// let source = Path::new("data.txt~"); /// let target = Path::new("data.txt"); /// let suffix = String::from("~"); /// /// assert_eq!(source_is_target_backup(&source, &target, &suffix), true); /// ``` /// pub fn source_is_target_backup(source: &Path, target: &Path, suffix: &str) -> bool { let source_filename = source.to_string_lossy(); let target_backup_filename = format!("{}{suffix}", target.to_string_lossy()); source_filename == target_backup_filename } // // Tests for this module // #[cfg(test)] mod tests { use super::*; use std::env; // Required to instantiate mutex in shared context use clap::Command; use once_cell::sync::Lazy; use std::sync::Mutex; // The mutex is required here as by default all tests are run as separate // threads under the same parent process. As environment variables are // specific to processes (and thus shared among threads), data races *will* // occur if no precautions are taken. Thus we have all tests that rely on // environment variables lock this empty mutex to ensure they don't access // it concurrently. static TEST_MUTEX: Lazy> = Lazy::new(|| Mutex::new(())); // Environment variable for "VERSION_CONTROL" static ENV_VERSION_CONTROL: &str = "VERSION_CONTROL"; fn make_app() -> clap::Command { Command::new("command") .arg(arguments::backup()) .arg(arguments::backup_no_args()) .arg(arguments::suffix()) } // Defaults to --backup=existing #[test] fn test_backup_mode_short_only() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } // --backup takes precedence over -b #[test] fn test_backup_mode_long_preferred_over_short() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b", "--backup=none"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); } // --backup can be passed without an argument #[test] fn test_backup_mode_long_without_args_no_env() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } // --backup can be passed with an argument only #[test] fn test_backup_mode_long_with_args() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=simple"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } // --backup errors on invalid argument #[test] fn test_backup_mode_long_with_args_invalid() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=foobar"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("invalid argument 'foobar' for 'backup type'")); } // --backup errors on ambiguous argument #[test] fn test_backup_mode_long_with_args_ambiguous() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=n"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("ambiguous argument 'n' for 'backup type'")); } // --backup accepts shortened arguments (si for simple) #[test] fn test_backup_mode_long_with_arg_shortened() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=si"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } // -b ignores the "VERSION_CONTROL" environment variable #[test] fn test_backup_mode_short_only_ignore_env() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "none"); let matches = make_app().get_matches_from(vec!["command", "-b"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); env::remove_var(ENV_VERSION_CONTROL); } // --backup can be passed without an argument, but reads env var if existent #[test] fn test_backup_mode_long_without_args_with_env() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "none"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); env::remove_var(ENV_VERSION_CONTROL); } // --backup errors on invalid VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_invalid() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "foobar"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("invalid argument 'foobar' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup errors on ambiguous VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_ambiguous() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "n"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("ambiguous argument 'n' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup accepts shortened env vars (si for simple) #[test] fn test_backup_mode_long_with_env_var_shortened() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "si"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); env::remove_var(ENV_VERSION_CONTROL); } #[test] fn test_suffix_takes_hyphen_value() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b", "--suffix", "-v"]); let result = determine_backup_suffix(&matches); assert_eq!(result, "-v"); } #[test] fn test_source_is_target_backup() { let source = Path::new("data.txt.bak"); let target = Path::new("data.txt"); let suffix = String::from(".bak"); assert!(source_is_target_backup(&source, &target, &suffix)); } #[test] fn test_source_is_not_target_backup() { let source = Path::new("data.txt"); let target = Path::new("backup.txt"); let suffix = String::from(".bak"); assert!(!source_is_target_backup(&source, &target, &suffix)); } #[test] fn test_source_is_target_backup_with_tilde_suffix() { let source = Path::new("example~"); let target = Path::new("example"); let suffix = String::from("~"); assert!(source_is_target_backup(&source, &target, &suffix)); } } uucore-0.0.23/src/lib/features/encoding.rs000064400000000000000000000122251046102023000165210ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV // spell-checker:ignore (encodings) lsbf msbf hexupper use data_encoding::{self, BASE32, BASE64}; use std::io::{self, Read, Write}; use data_encoding::{Encoding, BASE32HEX, BASE64URL, HEXUPPER}; use data_encoding_macro::new_encoding; #[cfg(feature = "thiserror")] use thiserror::Error; #[derive(Debug, Error)] pub enum DecodeError { #[error("{}", _0)] Decode(#[from] data_encoding::DecodeError), #[error("{}", _0)] DecodeZ85(#[from] z85::DecodeError), #[error("{}", _0)] Io(#[from] io::Error), } pub enum EncodeError { Z85InputLenNotMultipleOf4, } pub type DecodeResult = Result, DecodeError>; #[derive(Clone, Copy)] pub enum Format { Base64, Base64Url, Base32, Base32Hex, Base16, Base2Lsbf, Base2Msbf, Z85, } use self::Format::*; const BASE2LSBF: Encoding = new_encoding! { symbols: "01", bit_order: LeastSignificantFirst, }; const BASE2MSBF: Encoding = new_encoding! { symbols: "01", bit_order: MostSignificantFirst, }; pub fn encode(f: Format, input: &[u8]) -> Result { Ok(match f { Base32 => BASE32.encode(input), Base64 => BASE64.encode(input), Base64Url => BASE64URL.encode(input), Base32Hex => BASE32HEX.encode(input), Base16 => HEXUPPER.encode(input), Base2Lsbf => BASE2LSBF.encode(input), Base2Msbf => BASE2MSBF.encode(input), Z85 => { // According to the spec we should not accept inputs whose len is not a multiple of 4. // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. if input.len() % 4 == 0 { z85::encode(input) } else { return Err(EncodeError::Z85InputLenNotMultipleOf4); } } }) } pub fn decode(f: Format, input: &[u8]) -> DecodeResult { Ok(match f { Base32 => BASE32.decode(input)?, Base64 => BASE64.decode(input)?, Base64Url => BASE64URL.decode(input)?, Base32Hex => BASE32HEX.decode(input)?, Base16 => HEXUPPER.decode(input)?, Base2Lsbf => BASE2LSBF.decode(input)?, Base2Msbf => BASE2MSBF.decode(input)?, Z85 => { // The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed. // We manually check for a leading '#' and return an error ourselves. if input.starts_with(&[b'#']) { return Err(z85::DecodeError::InvalidByte(0, b'#').into()); } else { z85::decode(input)? } } }) } pub struct Data { line_wrap: usize, ignore_garbage: bool, input: R, format: Format, alphabet: &'static [u8], } impl Data { pub fn new(input: R, format: Format) -> Self { Self { line_wrap: 76, ignore_garbage: false, input, format, alphabet: match format { Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", Base16 => b"0123456789ABCDEF", Base2Lsbf => b"01", Base2Msbf => b"01", Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#", }, } } #[must_use] pub fn line_wrap(mut self, wrap: usize) -> Self { self.line_wrap = wrap; self } #[must_use] pub fn ignore_garbage(mut self, ignore: bool) -> Self { self.ignore_garbage = ignore; self } pub fn decode(&mut self) -> DecodeResult { let mut buf = vec![]; self.input.read_to_end(&mut buf)?; if self.ignore_garbage { buf.retain(|c| self.alphabet.contains(c)); } else { buf.retain(|&c| c != b'\r' && c != b'\n'); }; decode(self.format, &buf) } pub fn encode(&mut self) -> Result { let mut buf: Vec = vec![]; self.input.read_to_end(&mut buf).unwrap(); encode(self.format, buf.as_slice()) } } // NOTE: this will likely be phased out at some point pub fn wrap_print(data: &Data, res: &str) { let stdout = io::stdout(); wrap_write(stdout.lock(), data.line_wrap, res).unwrap(); } pub fn wrap_write(mut writer: W, line_wrap: usize, res: &str) -> io::Result<()> { use std::cmp::min; if line_wrap == 0 { return write!(writer, "{res}"); } let mut start = 0; while start < res.len() { let end = min(start + line_wrap, res.len()); writeln!(writer, "{}", &res[start..end])?; start = end; } Ok(()) } uucore-0.0.23/src/lib/features/entries.rs000064400000000000000000000330731046102023000164100ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) Passwd cstr fnam gecos ngroups egid //! Get password/group file entry //! //! # Examples: //! //! ``` //! use uucore::entries::{self, Locate}; //! //! let root_group = if cfg!(any(target_os = "linux", target_os = "android")) { //! "root" //! } else { //! "wheel" //! }; //! //! assert_eq!("root", entries::uid2usr(0).unwrap()); //! assert_eq!(0, entries::usr2uid("root").unwrap()); //! assert!(entries::gid2grp(0).is_ok()); //! assert!(entries::grp2gid(root_group).is_ok()); //! //! assert!(entries::Passwd::locate(0).is_ok()); //! assert!(entries::Passwd::locate("0").is_ok()); //! assert!(entries::Passwd::locate("root").is_ok()); //! //! assert!(entries::Group::locate(0).is_ok()); //! assert!(entries::Group::locate("0").is_ok()); //! assert!(entries::Group::locate(root_group).is_ok()); //! ``` #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] use libc::time_t; use libc::{c_char, c_int, gid_t, uid_t}; #[cfg(not(target_os = "redox"))] use libc::{getgrgid, getgrnam, getgroups}; use libc::{getpwnam, getpwuid, group, passwd}; use std::ffi::{CStr, CString}; use std::io::Error as IOError; use std::io::ErrorKind; use std::io::Result as IOResult; use std::ptr; use std::sync::Mutex; use once_cell::sync::Lazy; extern "C" { /// From: `` /// > The getgrouplist() function scans the group database to obtain /// > the list of groups that user belongs to. fn getgrouplist( name: *const c_char, gid: gid_t, groups: *mut gid_t, ngroups: *mut c_int, ) -> c_int; } /// From: `` /// > getgroups() returns the supplementary group IDs of the calling /// > process in list. /// > If size is zero, list is not modified, but the total number of /// > supplementary group IDs for the process is returned. This allows /// > the caller to determine the size of a dynamically allocated list /// > to be used in a further call to getgroups(). #[cfg(not(target_os = "redox"))] pub fn get_groups() -> IOResult> { let mut groups = Vec::new(); loop { let ngroups = match unsafe { getgroups(0, ptr::null_mut()) } { -1 => return Err(IOError::last_os_error()), // Not just optimization; 0 would mess up the next call 0 => return Ok(Vec::new()), n => n, }; // This is a small buffer, so we can afford to zero-initialize it and // use safe Vec operations groups.resize(ngroups.try_into().unwrap(), 0); let res = unsafe { getgroups(ngroups, groups.as_mut_ptr()) }; if res == -1 { let err = IOError::last_os_error(); if err.raw_os_error() == Some(libc::EINVAL) { // Number of groups changed, retry continue; } else { return Err(err); } } else { groups.truncate(ngroups.try_into().unwrap()); return Ok(groups); } } } /// The list of group IDs returned from GNU's `groups` and GNU's `id --groups` /// starts with the effective group ID (egid). /// This is a wrapper for `get_groups()` to mimic this behavior. /// /// If `arg_id` is `None` (default), `get_groups_gnu` moves the effective /// group id (egid) to the first entry in the returned Vector. /// If `arg_id` is `Some(x)`, `get_groups_gnu` moves the id with value `x` /// to the first entry in the returned Vector. This might be necessary /// for `id --groups --real` if `gid` and `egid` are not equal. /// /// From: `` /// > As implied by the definition of supplementary groups, the /// > effective group ID may appear in the array returned by /// > getgroups() or it may be returned only by getegid(). Duplication /// > may exist, but the application needs to call getegid() to be sure /// > of getting all of the information. Various implementation /// > variations and administrative sequences cause the set of groups /// > appearing in the result of getgroups() to vary in order and as to /// > whether the effective group ID is included, even when the set of /// > groups is the same (in the mathematical sense of ``set''). (The /// > history of a process and its parents could affect the details of /// > the result.) #[cfg(all(unix, not(target_os = "redox"), feature = "process"))] pub fn get_groups_gnu(arg_id: Option) -> IOResult> { let groups = get_groups()?; let egid = arg_id.unwrap_or_else(crate::features::process::getegid); Ok(sort_groups(groups, egid)) } #[cfg(all(unix, feature = "process"))] fn sort_groups(mut groups: Vec, egid: gid_t) -> Vec { if let Some(index) = groups.iter().position(|&x| x == egid) { groups[..=index].rotate_right(1); } else { groups.insert(0, egid); } groups } #[derive(Clone, Debug)] pub struct Passwd { /// AKA passwd.pw_name pub name: String, /// AKA passwd.pw_uid pub uid: uid_t, /// AKA passwd.pw_gid pub gid: gid_t, /// AKA passwd.pw_gecos pub user_info: Option, /// AKA passwd.pw_shell pub user_shell: Option, /// AKA passwd.pw_dir pub user_dir: Option, /// AKA passwd.pw_passwd pub user_passwd: Option, /// AKA passwd.pw_class #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub user_access_class: Option, /// AKA passwd.pw_change #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub passwd_change_time: time_t, /// AKA passwd.pw_expire #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub expiration: time_t, } /// SAFETY: ptr must point to a valid C string. /// Returns None if ptr is null. unsafe fn cstr2string(ptr: *const c_char) -> Option { if ptr.is_null() { None } else { Some(CStr::from_ptr(ptr).to_string_lossy().into_owned()) } } impl Passwd { /// SAFETY: All the pointed-to strings must be valid and not change while /// the function runs. That means PW_LOCK must be held. unsafe fn from_raw(raw: passwd) -> Self { Self { name: cstr2string(raw.pw_name).expect("passwd without name"), uid: raw.pw_uid, gid: raw.pw_gid, #[cfg(not(all( target_os = "android", any(target_arch = "x86", target_arch = "arm") )))] user_info: cstr2string(raw.pw_gecos), #[cfg(all(target_os = "android", any(target_arch = "x86", target_arch = "arm")))] user_info: None, user_shell: cstr2string(raw.pw_shell), user_dir: cstr2string(raw.pw_dir), user_passwd: cstr2string(raw.pw_passwd), #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] user_access_class: cstr2string(raw.pw_class), #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] passwd_change_time: raw.pw_change, #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] expiration: raw.pw_expire, } } /// This is a wrapper function for `libc::getgrouplist`. /// /// From: `` /// > If the number of groups of which user is a member is less than or /// > equal to *ngroups, then the value *ngroups is returned. /// > If the user is a member of more than *ngroups groups, then /// > getgrouplist() returns -1. In this case, the value returned in /// > *ngroups can be used to resize the buffer passed to a further /// > call getgrouplist(). /// /// However, on macOS/darwin (and maybe others?) `getgrouplist` does /// not update `ngroups` if `ngroups` is too small. Therefore, if not /// updated by `getgrouplist`, `ngroups` needs to be increased in a /// loop until `getgrouplist` stops returning -1. pub fn belongs_to(&self) -> Vec { let mut ngroups: c_int = 8; let mut ngroups_old: c_int; let mut groups = vec![0; ngroups.try_into().unwrap()]; let name = CString::new(self.name.as_bytes()).unwrap(); loop { ngroups_old = ngroups; if unsafe { getgrouplist(name.as_ptr(), self.gid, groups.as_mut_ptr(), &mut ngroups) } == -1 { if ngroups == ngroups_old { ngroups *= 2; } groups.resize(ngroups.try_into().unwrap(), 0); } else { break; } } let ngroups = ngroups.try_into().unwrap(); assert!(ngroups <= groups.len()); groups.truncate(ngroups); groups } } #[derive(Clone, Debug)] pub struct Group { /// AKA group.gr_name pub name: String, /// AKA group.gr_gid pub gid: gid_t, } impl Group { /// SAFETY: gr_name must be valid and not change while /// the function runs. That means PW_LOCK must be held. unsafe fn from_raw(raw: group) -> Self { Self { name: cstr2string(raw.gr_name).expect("group without name"), gid: raw.gr_gid, } } } /// Fetch desired entry. pub trait Locate { fn locate(key: K) -> IOResult where Self: ::std::marker::Sized; } // These functions are not thread-safe: // > The return value may point to a static area, and may be // > overwritten by subsequent calls to getpwent(3), getpwnam(), // > or getpwuid(). // This applies not just to the struct but also the strings it points // to, so we must copy all the data we want before releasing the lock. // (Technically we must also ensure that the raw functions aren't being called // anywhere else in the program.) static PW_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); macro_rules! f { ($fnam:ident, $fid:ident, $t:ident, $st:ident) => { impl Locate<$t> for $st { fn locate(k: $t) -> IOResult { let _guard = PW_LOCK.lock(); // SAFETY: We're holding PW_LOCK. unsafe { let data = $fid(k); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { // FIXME: Resource limits, signals and I/O failure may // cause this too. See getpwnam(3). // errno must be set to zero before the call. We can // use libc::__errno_location() on some platforms. // The same applies for the two cases below. Err(IOError::new( ErrorKind::NotFound, format!("No such id: {}", k), )) } } } } impl<'a> Locate<&'a str> for $st { fn locate(k: &'a str) -> IOResult { let _guard = PW_LOCK.lock(); if let Ok(id) = k.parse::<$t>() { // SAFETY: We're holding PW_LOCK. unsafe { let data = $fid(id); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { Err(IOError::new( ErrorKind::NotFound, format!("No such id: {}", id), )) } } } else { // SAFETY: We're holding PW_LOCK. unsafe { let cstring = CString::new(k).unwrap(); let data = $fnam(cstring.as_ptr()); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { Err(IOError::new( ErrorKind::NotFound, format!("Not found: {}", k), )) } } } } } }; } f!(getpwnam, getpwuid, uid_t, Passwd); #[cfg(not(target_os = "redox"))] f!(getgrnam, getgrgid, gid_t, Group); #[inline] pub fn uid2usr(id: uid_t) -> IOResult { Passwd::locate(id).map(|p| p.name) } #[cfg(not(target_os = "redox"))] #[inline] pub fn gid2grp(id: gid_t) -> IOResult { Group::locate(id).map(|p| p.name) } #[inline] pub fn usr2uid(name: &str) -> IOResult { Passwd::locate(name).map(|p| p.uid) } #[cfg(not(target_os = "redox"))] #[inline] pub fn grp2gid(name: &str) -> IOResult { Group::locate(name).map(|p| p.gid) } #[cfg(test)] mod test { use super::*; #[test] fn test_sort_groups() { assert_eq!(sort_groups(vec![1, 2, 3], 4), vec![4, 1, 2, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 3), vec![3, 1, 2]); assert_eq!(sort_groups(vec![1, 2, 3], 2), vec![2, 1, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 1), vec![1, 2, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 0), vec![0, 1, 2, 3]); } #[test] fn test_entries_get_groups_gnu() { if let Ok(mut groups) = get_groups() { if let Some(last) = groups.pop() { groups.insert(0, last); assert_eq!(get_groups_gnu(Some(last)).unwrap(), groups); } } } } uucore-0.0.23/src/lib/features/fs.rs000064400000000000000000000675651046102023000153640ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to manage files and symlinks // spell-checker:ignore backport #[cfg(unix)] use libc::{ mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR, }; use std::borrow::Cow; use std::collections::HashSet; use std::collections::VecDeque; use std::env; use std::ffi::{OsStr, OsString}; use std::fs; use std::fs::read_dir; use std::hash::Hash; use std::io::{Error, ErrorKind, Result as IOResult}; #[cfg(unix)] use std::os::unix::{fs::MetadataExt, io::AsRawFd}; use std::path::{Component, Path, PathBuf, MAIN_SEPARATOR}; #[cfg(target_os = "windows")] use winapi_util::AsHandleRef; /// Used to check if the `mode` has its `perm` bit set. /// /// This macro expands to `mode & perm != 0`. #[cfg(unix)] #[macro_export] macro_rules! has { ($mode:expr, $perm:expr) => { $mode & $perm != 0 }; } /// Information to uniquely identify a file pub struct FileInformation( #[cfg(unix)] nix::sys::stat::FileStat, #[cfg(windows)] winapi_util::file::Information, ); impl FileInformation { /// Get information from a currently open file #[cfg(unix)] pub fn from_file(file: &impl AsRawFd) -> IOResult { let stat = nix::sys::stat::fstat(file.as_raw_fd())?; Ok(Self(stat)) } /// Get information from a currently open file #[cfg(target_os = "windows")] pub fn from_file(file: &impl AsHandleRef) -> IOResult { let info = winapi_util::file::information(file.as_handle_ref())?; Ok(Self(info)) } /// Get information for a given path. /// /// If `path` points to a symlink and `dereference` is true, information about /// the link's target will be returned. pub fn from_path(path: impl AsRef, dereference: bool) -> IOResult { #[cfg(unix)] { let stat = if dereference { nix::sys::stat::stat(path.as_ref()) } else { nix::sys::stat::lstat(path.as_ref()) }; Ok(Self(stat?)) } #[cfg(target_os = "windows")] { use std::fs::OpenOptions; use std::os::windows::prelude::*; let mut open_options = OpenOptions::new(); let mut custom_flags = 0; if !dereference { custom_flags |= windows_sys::Win32::Storage::FileSystem::FILE_FLAG_OPEN_REPARSE_POINT; } custom_flags |= windows_sys::Win32::Storage::FileSystem::FILE_FLAG_BACKUP_SEMANTICS; open_options.custom_flags(custom_flags); let file = open_options.read(true).open(path.as_ref())?; Self::from_file(&file) } } pub fn file_size(&self) -> u64 { #[cfg(unix)] { assert!(self.0.st_size >= 0, "File size is negative"); self.0.st_size.try_into().unwrap() } #[cfg(target_os = "windows")] { self.0.file_size() } } #[cfg(windows)] pub fn file_index(&self) -> u64 { self.0.file_index() } pub fn number_of_links(&self) -> u64 { #[cfg(all( unix, not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "freebsd"), not(target_os = "netbsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_arch = "aarch64"), not(target_arch = "riscv64"), target_pointer_width = "64" ))] return self.0.st_nlink; #[cfg(all( unix, any( target_vendor = "apple", target_os = "android", target_os = "freebsd", target_os = "netbsd", target_os = "illumos", target_os = "solaris", target_arch = "aarch64", target_arch = "riscv64", not(target_pointer_width = "64") ) ))] return self.0.st_nlink.into(); #[cfg(windows)] return self.0.number_of_links(); } #[cfg(unix)] pub fn inode(&self) -> u64 { #[cfg(all( not(any(target_os = "freebsd", target_os = "netbsd")), target_pointer_width = "64" ))] return self.0.st_ino; #[cfg(any( target_os = "freebsd", target_os = "netbsd", not(target_pointer_width = "64") ))] return self.0.st_ino.into(); } } #[cfg(unix)] impl PartialEq for FileInformation { fn eq(&self, other: &Self) -> bool { self.0.st_dev == other.0.st_dev && self.0.st_ino == other.0.st_ino } } #[cfg(target_os = "windows")] impl PartialEq for FileInformation { fn eq(&self, other: &Self) -> bool { self.0.volume_serial_number() == other.0.volume_serial_number() && self.0.file_index() == other.0.file_index() } } impl Eq for FileInformation {} impl Hash for FileInformation { fn hash(&self, state: &mut H) { #[cfg(unix)] { self.0.st_dev.hash(state); self.0.st_ino.hash(state); } #[cfg(target_os = "windows")] { self.0.volume_serial_number().hash(state); self.0.file_index().hash(state); } } } /// resolve a relative path pub fn resolve_relative_path(path: &Path) -> Cow { if path.components().all(|e| e != Component::ParentDir) { return path.into(); } let root = Component::RootDir.as_os_str(); let mut result = env::current_dir().unwrap_or_else(|_| PathBuf::from(root)); for comp in path.components() { match comp { Component::ParentDir => { if let Ok(p) = result.read_link() { result = p; } result.pop(); } Component::CurDir => (), Component::RootDir | Component::Normal(_) | Component::Prefix(_) => { result.push(comp.as_os_str()); } } } result.into() } /// Controls how symbolic links should be handled when canonicalizing a path. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum MissingHandling { /// Return an error if any part of the path is missing. Normal, /// Resolve symbolic links, ignoring errors on the final component. Existing, /// Resolve symbolic links, ignoring errors on the non-final components. Missing, } /// Controls when symbolic links are resolved #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ResolveMode { /// Do not resolve any symbolic links. None, /// Resolve symlinks as encountered when processing the path Physical, /// Resolve '..' elements before symlinks Logical, } /// Normalize a path by removing relative information /// For example, convert 'bar/../foo/bar.txt' => 'foo/bar.txt' /// copied from `` /// both projects are MIT `` /// for std impl progress see rfc `` /// replace this once that lands pub fn normalize_path(path: &Path) -> PathBuf { let mut components = path.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() { components.next(); PathBuf::from(c.as_os_str()) } else { PathBuf::new() }; for component in components { match component { Component::Prefix(..) => unreachable!(), Component::RootDir => { ret.push(component.as_os_str()); } Component::CurDir => {} Component::ParentDir => { ret.pop(); } Component::Normal(c) => { ret.push(c); } } } ret } fn resolve_symlink>(path: P) -> IOResult> { let result = if fs::symlink_metadata(&path)?.file_type().is_symlink() { Some(fs::read_link(&path)?) } else { None }; Ok(result) } enum OwningComponent { Prefix(OsString), RootDir, CurDir, ParentDir, Normal(OsString), } impl OwningComponent { fn as_os_str(&self) -> &OsStr { match self { Self::Prefix(s) => s.as_os_str(), Self::RootDir => Component::RootDir.as_os_str(), Self::CurDir => Component::CurDir.as_os_str(), Self::ParentDir => Component::ParentDir.as_os_str(), Self::Normal(s) => s.as_os_str(), } } } impl<'a> From> for OwningComponent { fn from(comp: Component<'a>) -> Self { match comp { Component::Prefix(_) => Self::Prefix(comp.as_os_str().to_os_string()), Component::RootDir => Self::RootDir, Component::CurDir => Self::CurDir, Component::ParentDir => Self::ParentDir, Component::Normal(s) => Self::Normal(s.to_os_string()), } } } /// Return the canonical, absolute form of a path. /// /// This function is a generalization of [`std::fs::canonicalize`] that /// allows controlling how symbolic links are resolved and how to deal /// with missing components. It returns the canonical, absolute form of /// a path. /// The `miss_mode` parameter controls how missing path elements are handled /// /// * [`MissingHandling::Normal`] makes this function behave like /// [`std::fs::canonicalize`], resolving symbolic links and returning /// an error if the path does not exist. /// * [`MissingHandling::Missing`] makes this function ignore non-final /// components of the path that could not be resolved. /// * [`MissingHandling::Existing`] makes this function return an error /// if the final component of the path does not exist. /// /// The `res_mode` parameter controls how symbolic links are /// resolved: /// /// * [`ResolveMode::None`] makes this function not try to resolve /// any symbolic links. /// * [`ResolveMode::Physical`] makes this function resolve symlinks as they /// are encountered /// * [`ResolveMode::Logical`] makes this function resolve '..' components /// before symlinks /// #[allow(clippy::cognitive_complexity)] pub fn canonicalize>( original: P, miss_mode: MissingHandling, res_mode: ResolveMode, ) -> IOResult { const SYMLINKS_TO_LOOK_FOR_LOOPS: i32 = 20; let original = original.as_ref(); let has_to_be_directory = (miss_mode == MissingHandling::Normal || miss_mode == MissingHandling::Existing) && { let path_str = original.to_string_lossy(); path_str.ends_with(MAIN_SEPARATOR) || path_str.ends_with('/') }; let original = if original.is_absolute() { original.to_path_buf() } else { let current_dir = env::current_dir()?; dunce::canonicalize(current_dir)?.join(original) }; let path = if res_mode == ResolveMode::Logical { normalize_path(&original) } else { original }; let mut parts: VecDeque = path.components().map(|part| part.into()).collect(); let mut result = PathBuf::new(); let mut followed_symlinks = 0; let mut visited_files = HashSet::new(); while let Some(part) = parts.pop_front() { match part { OwningComponent::Prefix(s) => { result.push(s); continue; } OwningComponent::RootDir | OwningComponent::Normal(..) => { result.push(part.as_os_str()); } OwningComponent::CurDir => {} OwningComponent::ParentDir => { result.pop(); } } if res_mode == ResolveMode::None { continue; } match resolve_symlink(&result) { Ok(Some(link_path)) => { for link_part in link_path.components().rev() { parts.push_front(link_part.into()); } if followed_symlinks < SYMLINKS_TO_LOOK_FOR_LOOPS { followed_symlinks += 1; } else { let file_info = FileInformation::from_path(result.parent().unwrap(), false).unwrap(); let mut path_to_follow = PathBuf::new(); for part in &parts { path_to_follow.push(part.as_os_str()); } if !visited_files.insert((file_info, path_to_follow)) { return Err(Error::new( ErrorKind::InvalidInput, "Too many levels of symbolic links", )); // TODO use ErrorKind::FilesystemLoop when stable } } result.pop(); } Err(e) => { if miss_mode == MissingHandling::Existing || (miss_mode == MissingHandling::Normal && !parts.is_empty()) { return Err(e); } } _ => {} } } // raise Not a directory if required match miss_mode { MissingHandling::Existing => { if has_to_be_directory { read_dir(&result)?; } } MissingHandling::Normal => { if result.exists() { if has_to_be_directory { read_dir(&result)?; } } else if let Some(parent) = result.parent() { read_dir(parent)?; } } MissingHandling::Missing => {} } Ok(result) } #[cfg(not(unix))] /// Display the permissions of a file pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let write = if metadata.permissions().readonly() { '-' } else { 'w' }; if display_file_type { let file_type = if metadata.is_symlink() { 'l' } else if metadata.is_dir() { 'd' } else { '-' }; format!("{file_type}r{write}xr{write}xr{write}x") } else { format!("r{write}xr{write}xr{write}x") } } #[cfg(unix)] /// Display the permissions of a file pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let mode: mode_t = metadata.mode() as mode_t; display_permissions_unix(mode, display_file_type) } /// Returns a character representation of the file type based on its mode. /// This function is specific to Unix-like systems. /// /// - `mode`: The mode of the file, typically obtained from file metadata. /// /// # Returns /// - 'd' for directories /// - 'c' for character devices /// - 'b' for block devices /// - '-' for regular files /// - 'p' for FIFOs (named pipes) /// - 'l' for symbolic links /// - 's' for sockets /// - '?' for any other unrecognized file types #[cfg(unix)] fn get_file_display(mode: mode_t) -> char { match mode & S_IFMT { S_IFDIR => 'd', S_IFCHR => 'c', S_IFBLK => 'b', S_IFREG => '-', S_IFIFO => 'p', S_IFLNK => 'l', S_IFSOCK => 's', // TODO: Other file types _ => '?', } } // The logic below is more readable written this way. #[allow(clippy::if_not_else)] #[allow(clippy::cognitive_complexity)] #[cfg(unix)] /// Display the permissions of a file on a unix like system pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String { let mut result; if display_file_type { result = String::with_capacity(10); result.push(get_file_display(mode)); } else { result = String::with_capacity(9); } result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); result.push(if has!(mode, S_ISUID as mode_t) { if has!(mode, S_IXUSR) { 's' } else { 'S' } } else if has!(mode, S_IXUSR) { 'x' } else { '-' }); result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); result.push(if has!(mode, S_ISGID as mode_t) { if has!(mode, S_IXGRP) { 's' } else { 'S' } } else if has!(mode, S_IXGRP) { 'x' } else { '-' }); result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); result.push(if has!(mode, S_ISVTX as mode_t) { if has!(mode, S_IXOTH) { 't' } else { 'T' } } else if has!(mode, S_IXOTH) { 'x' } else { '-' }); result } /// For some programs like install or mkdir, dir/. can be provided /// Special case to match GNU's behavior: /// install -d foo/. should work and just create foo/ /// std::fs::create_dir("foo/."); fails in pure Rust pub fn dir_strip_dot_for_creation(path: &Path) -> PathBuf { if path.to_string_lossy().ends_with("/.") { // Do a simple dance to strip the "/." Path::new(&path).components().collect::() } else { path.to_path_buf() } } /// Checks if `p1` and `p2` are the same file. /// If error happens when trying to get files' metadata, returns false pub fn paths_refer_to_same_file>(p1: P, p2: P, dereference: bool) -> bool { infos_refer_to_same_file( FileInformation::from_path(p1, dereference), FileInformation::from_path(p2, dereference), ) } /// Checks if `p1` and `p2` are the same file information. /// If error happens when trying to get files' metadata, returns false pub fn infos_refer_to_same_file( info1: IOResult, info2: IOResult, ) -> bool { if let Ok(info1) = info1 { if let Ok(info2) = info2 { return info1 == info2; } } false } /// Converts absolute `path` to be relative to absolute `to` path. pub fn make_path_relative_to, P2: AsRef>(path: P1, to: P2) -> PathBuf { let path = path.as_ref(); let to = to.as_ref(); let common_prefix_size = path .components() .zip(to.components()) .take_while(|(first, second)| first == second) .count(); let path_suffix = path .components() .skip(common_prefix_size) .map(|x| x.as_os_str()); let mut components: Vec<_> = to .components() .skip(common_prefix_size) .map(|_| Component::ParentDir.as_os_str()) .chain(path_suffix) .collect(); if components.is_empty() { components.push(Component::CurDir.as_os_str()); } components.iter().collect() } /// Checks if there is a symlink loop in the given path. /// /// A symlink loop is a chain of symlinks where the last symlink points back to one of the previous symlinks in the chain. /// /// # Arguments /// /// * `path` - A reference to a `Path` representing the starting path to check for symlink loops. /// /// # Returns /// /// * `bool` - Returns `true` if a symlink loop is detected, `false` otherwise. pub fn is_symlink_loop(path: &Path) -> bool { let mut visited_symlinks = HashSet::new(); let mut current_path = path.to_path_buf(); while let (Ok(metadata), Ok(link)) = ( current_path.symlink_metadata(), fs::read_link(¤t_path), ) { if !metadata.file_type().is_symlink() { return false; } if !visited_symlinks.insert(current_path.clone()) { return true; } current_path = link; } false } #[cfg(not(unix))] // Hard link comparison is not supported on non-Unix platforms pub fn are_hardlinks_to_same_file(_source: &Path, _target: &Path) -> bool { false } /// Checks if two paths are hard links to the same file. /// /// # Arguments /// /// * `source` - A reference to a `Path` representing the source path. /// * `target` - A reference to a `Path` representing the target path. /// /// # Returns /// /// * `bool` - Returns `true` if the paths are hard links to the same file, and `false` otherwise. #[cfg(unix)] pub fn are_hardlinks_to_same_file(source: &Path, target: &Path) -> bool { let source_metadata = match fs::symlink_metadata(source) { Ok(metadata) => metadata, Err(_) => return false, }; let target_metadata = match fs::symlink_metadata(target) { Ok(metadata) => metadata, Err(_) => return false, }; source_metadata.ino() == target_metadata.ino() && source_metadata.dev() == target_metadata.dev() } #[cfg(not(unix))] pub fn are_hardlinks_or_one_way_symlink_to_same_file(_source: &Path, _target: &Path) -> bool { false } /// Checks if either two paths are hard links to the same file or if the source path is a symbolic link which when fully resolved points to target path /// /// # Arguments /// /// * `source` - A reference to a `Path` representing the source path. /// * `target` - A reference to a `Path` representing the target path. /// /// # Returns /// /// * `bool` - Returns `true` if either of above conditions are true, and `false` otherwise. #[cfg(unix)] pub fn are_hardlinks_or_one_way_symlink_to_same_file(source: &Path, target: &Path) -> bool { let source_metadata = match fs::metadata(source) { Ok(metadata) => metadata, Err(_) => return false, }; let target_metadata = match fs::symlink_metadata(target) { Ok(metadata) => metadata, Err(_) => return false, }; source_metadata.ino() == target_metadata.ino() && source_metadata.dev() == target_metadata.dev() } #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. use super::*; #[cfg(unix)] use std::io::Write; #[cfg(unix)] use std::os::unix; #[cfg(unix)] use tempfile::{tempdir, NamedTempFile}; struct NormalizePathTestCase<'a> { path: &'a str, test: &'a str, } const NORMALIZE_PATH_TESTS: [NormalizePathTestCase; 8] = [ NormalizePathTestCase { path: "./foo/bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "bar/../foo/bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "foo///bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "foo///bar", test: "foo/bar", }, NormalizePathTestCase { path: "foo//./bar", test: "foo/bar", }, NormalizePathTestCase { path: "/foo//./bar", test: "/foo/bar", }, NormalizePathTestCase { path: r"C:/you/later/", test: "C:/you/later", }, NormalizePathTestCase { path: "\\networkShare/a//foo//./bar", test: "\\networkShare/a/foo/bar", }, ]; #[test] fn test_normalize_path() { for test in &NORMALIZE_PATH_TESTS { let path = Path::new(test.path); let normalized = normalize_path(path); assert_eq!( test.test .replace('/', std::path::MAIN_SEPARATOR.to_string().as_str()), normalized.to_str().expect("Path is not valid utf-8!") ); } } #[cfg(unix)] #[test] fn test_display_permissions() { // spell-checker:ignore (perms) brwsr drwxr rwxr assert_eq!( "drwxr-xr-x", display_permissions_unix(S_IFDIR | 0o755, true) ); assert_eq!( "rwxr-xr-x", display_permissions_unix(S_IFDIR | 0o755, false) ); assert_eq!( "-rw-r--r--", display_permissions_unix(S_IFREG | 0o644, true) ); assert_eq!( "srw-r-----", display_permissions_unix(S_IFSOCK | 0o640, true) ); assert_eq!( "lrw-r-xr-x", display_permissions_unix(S_IFLNK | 0o655, true) ); assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true)); assert_eq!( "brwSr-xr-x", display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true) ); assert_eq!( "brwsr-xr-x", display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true) ); assert_eq!( "prw---sr--", display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true) ); assert_eq!( "prw---Sr--", display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true) ); assert_eq!( "c---r-xr-t", display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true) ); assert_eq!( "c---r-xr-T", display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true) ); } #[cfg(unix)] #[test] fn test_is_symlink_loop_no_loop() { let temp_dir = tempdir().unwrap(); let file_path = temp_dir.path().join("file.txt"); let symlink_path = temp_dir.path().join("symlink"); fs::write(&file_path, "test content").unwrap(); unix::fs::symlink(&file_path, &symlink_path).unwrap(); assert!(!is_symlink_loop(&symlink_path)); } #[cfg(unix)] #[test] fn test_is_symlink_loop_direct_loop() { let temp_dir = tempdir().unwrap(); let symlink_path = temp_dir.path().join("loop"); unix::fs::symlink(&symlink_path, &symlink_path).unwrap(); assert!(is_symlink_loop(&symlink_path)); } #[cfg(unix)] #[test] fn test_is_symlink_loop_indirect_loop() { let temp_dir = tempdir().unwrap(); let symlink1_path = temp_dir.path().join("symlink1"); let symlink2_path = temp_dir.path().join("symlink2"); unix::fs::symlink(&symlink1_path, &symlink2_path).unwrap(); unix::fs::symlink(&symlink2_path, &symlink1_path).unwrap(); assert!(is_symlink_loop(&symlink1_path)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_same_file() { let mut temp_file = NamedTempFile::new().unwrap(); writeln!(temp_file, "Test content").unwrap(); let path1 = temp_file.path(); let path2 = temp_file.path(); assert!(are_hardlinks_to_same_file(&path1, &path2)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_different_files() { let mut temp_file1 = NamedTempFile::new().unwrap(); writeln!(temp_file1, "Test content 1").unwrap(); let mut temp_file2 = NamedTempFile::new().unwrap(); writeln!(temp_file2, "Test content 2").unwrap(); let path1 = temp_file1.path(); let path2 = temp_file2.path(); assert!(!are_hardlinks_to_same_file(&path1, &path2)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_hard_link() { let mut temp_file = NamedTempFile::new().unwrap(); writeln!(temp_file, "Test content").unwrap(); let path1 = temp_file.path(); let path2 = temp_file.path().with_extension("hardlink"); fs::hard_link(&path1, &path2).unwrap(); assert!(are_hardlinks_to_same_file(&path1, &path2)); } #[cfg(unix)] #[test] fn test_get_file_display() { assert_eq!(get_file_display(S_IFDIR | 0o755), 'd'); assert_eq!(get_file_display(S_IFCHR | 0o644), 'c'); assert_eq!(get_file_display(S_IFBLK | 0o600), 'b'); assert_eq!(get_file_display(S_IFREG | 0o777), '-'); assert_eq!(get_file_display(S_IFIFO | 0o666), 'p'); assert_eq!(get_file_display(S_IFLNK | 0o777), 'l'); assert_eq!(get_file_display(S_IFSOCK | 0o600), 's'); assert_eq!(get_file_display(0o777), '?'); } } uucore-0.0.23/src/lib/features/fsext.rs000064400000000000000000001047531046102023000160740ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to manage file systems // spell-checker:ignore DATETIME getmntinfo subsecond (arch) bitrig ; (fs) cifs smbfs use time::macros::format_description; use time::UtcOffset; pub use crate::*; // import macros from `../../macros.rs` #[cfg(any(target_os = "linux", target_os = "android"))] const LINUX_MTAB: &str = "/etc/mtab"; #[cfg(any(target_os = "linux", target_os = "android"))] const LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; static MOUNT_OPT_BIND: &str = "bind"; #[cfg(windows)] const MAX_PATH: usize = 266; #[cfg(not(unix))] static EXIT_ERR: i32 = 1; #[cfg(windows)] use std::ffi::OsStr; #[cfg(windows)] use std::os::windows::ffi::OsStrExt; #[cfg(windows)] use windows_sys::Win32::Foundation::{ERROR_NO_MORE_FILES, INVALID_HANDLE_VALUE}; #[cfg(windows)] use windows_sys::Win32::Storage::FileSystem::{ FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDiskFreeSpaceW, GetDriveTypeW, GetVolumeInformationW, GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, }; #[cfg(windows)] use windows_sys::Win32::System::WindowsProgramming::DRIVE_REMOTE; // Warning: the pointer has to be used *immediately* or the Vec // it points to will be dropped! #[cfg(windows)] macro_rules! String2LPWSTR { ($str: expr) => { OsStr::new(&$str) .encode_wide() .chain(Some(0)) .collect::>() .as_ptr() }; } #[cfg(windows)] #[allow(non_snake_case)] fn LPWSTR2String(buf: &[u16]) -> String { let len = buf.iter().position(|&n| n == 0).unwrap(); String::from_utf16(&buf[..len]).unwrap() } #[cfg(unix)] use libc::{ mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, }; use std::borrow::Cow; use std::convert::{AsRef, From}; #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd", target_os = "linux", target_os = "android", target_os = "illumos", target_os = "solaris", ))] use std::ffi::CStr; #[cfg(not(windows))] use std::ffi::CString; use std::io::Error as IOError; #[cfg(unix)] use std::mem; #[cfg(not(unix))] use std::path::Path; use std::time::UNIX_EPOCH; #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd", target_os = "openbsd" ))] pub use libc::statfs as StatFs; #[cfg(any( target_os = "netbsd", target_os = "bitrig", target_os = "dragonfly", target_os = "illumos", target_os = "solaris", target_os = "redox" ))] pub use libc::statvfs as StatFs; #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd", target_os = "openbsd", target_os = "redox" ))] pub use libc::statfs as statfs_fn; #[cfg(any( target_os = "netbsd", target_os = "bitrig", target_os = "illumos", target_os = "solaris", target_os = "dragonfly" ))] pub use libc::statvfs as statfs_fn; pub trait BirthTime { fn pretty_birth(&self) -> String; fn birth(&self) -> u64; } use std::fs::Metadata; impl BirthTime for Metadata { fn pretty_birth(&self) -> String { self.created() .ok() .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) .map(|e| pretty_time(e.as_secs() as i64, i64::from(e.subsec_nanos()))) .unwrap_or_else(|| "-".to_owned()) } fn birth(&self) -> u64 { self.created() .ok() .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) .map(|e| e.as_secs()) .unwrap_or_default() } } #[derive(Debug, Clone)] pub struct MountInfo { // it stores `volume_name` in windows platform and `dev_id` in unix platform pub dev_id: String, pub dev_name: String, pub fs_type: String, pub mount_dir: String, pub mount_option: String, // we only care "bind" option pub mount_root: String, pub remote: bool, pub dummy: bool, } impl MountInfo { fn set_missing_fields(&mut self) { #[cfg(unix)] { use std::os::unix::fs::MetadataExt; // We want to keep the dev_id on Windows // but set dev_id if let Ok(stat) = std::fs::metadata(&self.mount_dir) { // Why do we cast this to i32? self.dev_id = (stat.dev() as i32).to_string(); } else { self.dev_id = String::new(); } } // set MountInfo::dummy // spell-checker:disable match self.fs_type.as_ref() { "autofs" | "proc" | "subfs" /* for Linux 2.6/3.x */ | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" /* FreeBSD, Linux 2.4 */ | "devfs" /* for NetBSD 3.0 */ | "kernfs" /* for Irix 6.5 */ | "ignore" => self.dummy = true, _ => self.dummy = self.fs_type == "none" && !self.mount_option.contains(MOUNT_OPT_BIND) } // spell-checker:enable // set MountInfo::remote #[cfg(windows)] { self.remote = DRIVE_REMOTE == unsafe { GetDriveTypeW(String2LPWSTR!(self.mount_root)) }; } #[cfg(unix)] { self.remote = self.dev_name.find(':').is_some() || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" || self.fs_type == "cifs") || self.dev_name == "-hosts"; } } #[cfg(any(target_os = "linux", target_os = "android"))] fn new(file_name: &str, raw: &[&str]) -> Option { match file_name { // spell-checker:ignore (word) noatime // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue // "man proc" for more details LINUX_MOUNTINFO => { const FIELDS_OFFSET: usize = 6; let after_fields = raw[FIELDS_OFFSET..].iter().position(|c| *c == "-").unwrap() + FIELDS_OFFSET + 1; let mut m = Self { dev_id: String::new(), dev_name: raw[after_fields + 1].to_string(), fs_type: raw[after_fields].to_string(), mount_root: raw[3].to_string(), mount_dir: raw[4].to_string(), mount_option: raw[5].to_string(), remote: false, dummy: false, }; m.set_missing_fields(); Some(m) } LINUX_MTAB => { let mut m = Self { dev_id: String::new(), dev_name: raw[0].to_string(), fs_type: raw[2].to_string(), mount_root: String::new(), mount_dir: raw[1].to_string(), mount_option: raw[3].to_string(), remote: false, dummy: false, }; m.set_missing_fields(); Some(m) } _ => None, } } #[cfg(windows)] fn new(mut volume_name: String) -> Option { let mut dev_name_buf = [0u16; MAX_PATH]; volume_name.pop(); unsafe { QueryDosDeviceW( OsStr::new(&volume_name) .encode_wide() .chain(Some(0)) .skip(4) .collect::>() .as_ptr(), dev_name_buf.as_mut_ptr(), dev_name_buf.len() as u32, ) }; volume_name.push('\\'); let dev_name = LPWSTR2String(&dev_name_buf); let mut mount_root_buf = [0u16; MAX_PATH]; let success = unsafe { GetVolumePathNamesForVolumeNameW( String2LPWSTR!(volume_name), mount_root_buf.as_mut_ptr(), mount_root_buf.len() as u32, ptr::null_mut(), ) }; if 0 == success { // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` return None; } let mount_root = LPWSTR2String(&mount_root_buf); let mut fs_type_buf = [0u16; MAX_PATH]; let success = unsafe { GetVolumeInformationW( String2LPWSTR!(mount_root), ptr::null_mut(), 0, ptr::null_mut(), ptr::null_mut(), ptr::null_mut(), fs_type_buf.as_mut_ptr(), fs_type_buf.len() as u32, ) }; let fs_type = if 0 == success { None } else { Some(LPWSTR2String(&fs_type_buf)) }; let mut mn_info = Self { dev_id: volume_name, dev_name, fs_type: fs_type.unwrap_or_default(), mount_root, mount_dir: String::new(), mount_option: String::new(), remote: false, dummy: false, }; mn_info.set_missing_fields(); Some(mn_info) } } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd", ))] impl From for MountInfo { fn from(statfs: StatFs) -> Self { let mut info = Self { dev_id: String::new(), dev_name: unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_mntfromname[0]) .to_string_lossy() .into_owned() }, fs_type: unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_fstypename[0]) .to_string_lossy() .into_owned() }, mount_dir: unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_mntonname[0]) .to_string_lossy() .into_owned() }, mount_root: String::new(), mount_option: String::new(), remote: false, dummy: false, }; info.set_missing_fields(); info } } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] use libc::c_int; #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] extern "C" { #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] #[link_name = "getmntinfo$INODE64"] // spell-checker:disable-line fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; #[cfg(any( target_os = "netbsd", target_os = "openbsd", all(target_vendor = "apple", target_arch = "aarch64") ))] #[link_name = "getmntinfo"] // spell-checker:disable-line fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; // Rust on FreeBSD uses 11.x ABI for filesystem metadata syscalls. // Call the right version of the symbol for getmntinfo() result to // match libc StatFS layout. #[cfg(target_os = "freebsd")] #[link_name = "getmntinfo@FBSD_1.0"] // spell-checker:disable-line fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; } #[cfg(any(target_os = "linux", target_os = "android"))] use std::fs::File; #[cfg(any(target_os = "linux", target_os = "android"))] use std::io::{BufRead, BufReader}; #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "windows", target_os = "netbsd", target_os = "openbsd" ))] use std::ptr; #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd" ))] use std::slice; /// Read file system list. pub fn read_fs_list() -> Result, std::io::Error> { #[cfg(any(target_os = "linux", target_os = "android"))] { let (file_name, f) = File::open(LINUX_MOUNTINFO) .map(|f| (LINUX_MOUNTINFO, f)) .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f)))?; let reader = BufReader::new(f); Ok(reader .lines() .map_while(Result::ok) .filter_map(|line| { let raw_data = line.split_whitespace().collect::>(); MountInfo::new(file_name, &raw_data) }) .collect::>()) } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] { let mut mount_buffer_ptr: *mut StatFs = ptr::null_mut(); let len = unsafe { get_mount_info(&mut mount_buffer_ptr, 1_i32) }; if len < 0 { crash!(1, "get_mount_info() failed"); } let mounts = unsafe { slice::from_raw_parts(mount_buffer_ptr, len as usize) }; Ok(mounts .iter() .map(|m| MountInfo::from(*m)) .collect::>()) } #[cfg(windows)] { let mut volume_name_buf = [0u16; MAX_PATH]; // As recommended in the MS documentation, retrieve the first volume before the others let find_handle = unsafe { FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as u32) }; if INVALID_HANDLE_VALUE == find_handle { crash!( EXIT_ERR, "FindFirstVolumeW failed: {}", IOError::last_os_error() ); } let mut mounts = Vec::::new(); loop { let volume_name = LPWSTR2String(&volume_name_buf); if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { show_warning!("A bad path was skipped: {}", volume_name); continue; } if let Some(m) = MountInfo::new(volume_name) { mounts.push(m); } if 0 == unsafe { FindNextVolumeW( find_handle, volume_name_buf.as_mut_ptr(), volume_name_buf.len() as u32, ) } { let err = IOError::last_os_error(); if err.raw_os_error() != Some(ERROR_NO_MORE_FILES as i32) { crash!(EXIT_ERR, "FindNextVolumeW failed: {}", err); } break; } } unsafe { FindVolumeClose(find_handle); } Ok(mounts) } #[cfg(any(target_os = "redox", target_os = "illumos", target_os = "solaris"))] { // No method to read mounts, yet Ok(Vec::new()) } } #[derive(Debug, Clone)] pub struct FsUsage { pub blocksize: u64, pub blocks: u64, pub bfree: u64, pub bavail: u64, pub bavail_top_bit_set: bool, pub files: u64, pub ffree: u64, } impl FsUsage { #[cfg(unix)] pub fn new(statvfs: StatFs) -> Self { { #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks, bfree: statvfs.f_bfree, bavail: statvfs.f_bavail, bavail_top_bit_set: ((statvfs.f_bavail) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files, ffree: statvfs.f_ffree, }; #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks.into(), bfree: statvfs.f_bfree.into(), bavail: statvfs.f_bavail.into(), bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files.into(), ffree: statvfs.f_ffree.into(), }; #[cfg(target_os = "freebsd")] return Self { blocksize: statvfs.f_bsize, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks, bfree: statvfs.f_bfree, bavail: statvfs.f_bavail.try_into().unwrap(), bavail_top_bit_set: ((std::convert::TryInto::::try_into(statvfs.f_bavail) .unwrap()) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files, ffree: statvfs.f_ffree.try_into().unwrap(), }; } } #[cfg(not(unix))] pub fn new(path: &Path) -> Self { let mut root_path = [0u16; MAX_PATH]; let success = unsafe { GetVolumePathNamesForVolumeNameW( //path_utf8.as_ptr(), String2LPWSTR!(path.as_os_str()), root_path.as_mut_ptr(), root_path.len() as u32, ptr::null_mut(), ) }; if 0 == success { crash!( EXIT_ERR, "GetVolumePathNamesForVolumeNameW failed: {}", IOError::last_os_error() ); } let mut sectors_per_cluster = 0; let mut bytes_per_sector = 0; let mut number_of_free_clusters = 0; let mut total_number_of_clusters = 0; let success = unsafe { GetDiskFreeSpaceW( String2LPWSTR!(path.as_os_str()), &mut sectors_per_cluster, &mut bytes_per_sector, &mut number_of_free_clusters, &mut total_number_of_clusters, ) }; if 0 == success { // Fails in case of CD for example // crash!( // EXIT_ERR, // "GetDiskFreeSpaceW failed: {}", // IOError::last_os_error() // ); } let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; Self { // f_bsize File system block size. blocksize: bytes_per_cluster, // f_blocks - Total number of blocks on the file system, in units of f_frsize. // frsize = Fundamental file system block size (fragment size). blocks: total_number_of_clusters as u64, // Total number of free blocks. bfree: number_of_free_clusters as u64, // Total number of free blocks available to non-privileged processes. bavail: 0, bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, // Total number of file nodes (inodes) on the file system. files: 0, // Not available on windows // Total number of free file nodes (inodes). ffree: 0, // Meaningless on Windows } } } #[cfg(unix)] pub trait FsMeta { fn fs_type(&self) -> i64; fn io_size(&self) -> u64; fn block_size(&self) -> i64; fn total_blocks(&self) -> u64; fn free_blocks(&self) -> u64; fn avail_blocks(&self) -> u64; fn total_file_nodes(&self) -> u64; fn free_file_nodes(&self) -> u64; fn fsid(&self) -> u64; fn namelen(&self) -> u64; } #[cfg(unix)] impl FsMeta for StatFs { fn block_size(&self) -> i64 { #[cfg(all( not(target_env = "musl"), not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "freebsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_arch = "s390x"), target_pointer_width = "64" ))] return self.f_bsize; #[cfg(all( not(target_env = "musl"), not(target_os = "freebsd"), any( target_arch = "s390x", target_vendor = "apple", target_os = "android", not(target_pointer_width = "64") ) ))] return self.f_bsize.into(); #[cfg(any( target_env = "musl", target_os = "freebsd", target_os = "illumos", target_os = "solaris" ))] return self.f_bsize.try_into().unwrap(); } fn total_blocks(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_blocks; #[cfg(not(target_pointer_width = "64"))] return self.f_blocks.into(); } fn free_blocks(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_bfree; #[cfg(not(target_pointer_width = "64"))] return self.f_bfree.into(); } fn avail_blocks(&self) -> u64 { #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] return self.f_bavail; #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] return self.f_bavail.into(); #[cfg(target_os = "freebsd")] return self.f_bavail.try_into().unwrap(); } fn total_file_nodes(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_files; #[cfg(not(target_pointer_width = "64"))] return self.f_files.into(); } fn free_file_nodes(&self) -> u64 { #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] return self.f_ffree; #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] return self.f_ffree.into(); #[cfg(target_os = "freebsd")] return self.f_ffree.try_into().unwrap(); } #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd" ))] fn fs_type(&self) -> i64 { #[cfg(all( not(target_env = "musl"), not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "freebsd"), not(target_arch = "s390x"), target_pointer_width = "64" ))] return self.f_type; #[cfg(all( not(target_env = "musl"), any( target_vendor = "apple", target_os = "android", target_os = "freebsd", target_arch = "s390x", not(target_pointer_width = "64") ) ))] return self.f_type.into(); #[cfg(target_env = "musl")] return self.f_type.try_into().unwrap(); } #[cfg(not(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd" )))] fn fs_type(&self) -> i64 { // FIXME: statvfs doesn't have an equivalent, so we need to do something else unimplemented!() } #[cfg(any(target_os = "linux", target_os = "android"))] fn io_size(&self) -> u64 { self.f_frsize as u64 } #[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "netbsd"))] fn io_size(&self) -> u64 { #[cfg(target_os = "freebsd")] return self.f_iosize; #[cfg(not(target_os = "freebsd"))] return self.f_iosize as u64; } // XXX: dunno if this is right #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "netbsd" )))] fn io_size(&self) -> u64 { self.f_bsize as u64 } // Linux, SunOS, HP-UX, 4.4BSD, FreeBSD have a system call statfs() that returns // a struct statfs, containing a fsid_t f_fsid, where fsid_t is defined // as struct { int val[2]; } // // Solaris, Irix and POSIX have a system call statvfs(2) that returns a // struct statvfs, containing an unsigned long f_fsid #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "openbsd" ))] fn fsid(&self) -> u64 { let f_fsid: &[u32; 2] = unsafe { &*(&self.f_fsid as *const nix::sys::statfs::fsid_t as *const [u32; 2]) }; (u64::from(f_fsid[0])) << 32 | u64::from(f_fsid[1]) } #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "openbsd" )))] fn fsid(&self) -> u64 { self.f_fsid as u64 } #[cfg(any(target_os = "linux", target_os = "android"))] fn namelen(&self) -> u64 { self.f_namelen as u64 } #[cfg(target_vendor = "apple")] fn namelen(&self) -> u64 { 1024 } #[cfg(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] fn namelen(&self) -> u64 { self.f_namemax as u64 // spell-checker:disable-line } // XXX: should everything just use statvfs? #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "netbsd", target_os = "openbsd" )))] fn namelen(&self) -> u64 { self.f_namemax as u64 // spell-checker:disable-line } } #[cfg(unix)] pub fn statfs

(path: P) -> Result where P: Into>, { match CString::new(path) { Ok(p) => { let mut buffer: StatFs = unsafe { mem::zeroed() }; unsafe { match statfs_fn(p.as_ptr(), &mut buffer) { 0 => Ok(buffer), _ => { let errno = IOError::last_os_error().raw_os_error().unwrap_or(0); Err(CStr::from_ptr(strerror(errno)) .to_str() .map_err(|_| "Error message contains invalid UTF-8".to_owned())? .to_owned()) } } } } Err(e) => Err(e.to_string()), } } // match strftime "%Y-%m-%d %H:%M:%S.%f %z" const PRETTY_DATETIME_FORMAT: &[time::format_description::FormatItem] = format_description!( "\ [year]-[month]-[day padding:zero] \ [hour]:[minute]:[second].[subsecond digits:9] \ [offset_hour sign:mandatory][offset_minute]" ); pub fn pretty_time(sec: i64, nsec: i64) -> String { // sec == seconds since UNIX_EPOCH // nsec == nanoseconds since (UNIX_EPOCH + sec) let ts_nanos: i128 = (sec * 1_000_000_000 + nsec).into(); // Return the date in UTC let tm = match time::OffsetDateTime::from_unix_timestamp_nanos(ts_nanos) { Ok(tm) => tm, Err(e) => { panic!("error: {e}"); } }; // Get the offset to convert to local time // Because of DST (daylight saving), we get the local time back when // the date was set let local_offset = match UtcOffset::local_offset_at(tm) { Ok(lo) => lo, Err(e) => { panic!("error: {e}"); } }; // Include the conversion to local time let res = tm .to_offset(local_offset) .format(&PRETTY_DATETIME_FORMAT) .unwrap(); if res.ends_with(" -0000") { res.replace(" -0000", " +0000") } else { res } } #[cfg(unix)] pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { match mode & S_IFMT { S_IFREG => { if size == 0 { "regular empty file" } else { "regular file" } } S_IFDIR => "directory", S_IFLNK => "symbolic link", S_IFCHR => "character special file", S_IFBLK => "block special file", S_IFIFO => "fifo", S_IFSOCK => "socket", // TODO: Other file types // See coreutils/gnulib/lib/file-type.c // spell-checker:disable-line _ => "weird file", } } pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { // spell-checker:disable match fstype { 0x6163_6673 => "acfs".into(), 0xADF5 => "adfs".into(), 0xADFF => "affs".into(), 0x5346_414F => "afs".into(), 0x0904_1934 => "anon-inode FS".into(), 0x6175_6673 => "aufs".into(), 0x0187 => "autofs".into(), 0x4246_5331 => "befs".into(), 0x6264_6576 => "bdevfs".into(), 0x1BAD_FACE => "bfs".into(), 0xCAFE_4A11 => "bpf_fs".into(), 0x4249_4E4D => "binfmt_misc".into(), 0x9123_683E => "btrfs".into(), 0x7372_7279 => "btrfs_test".into(), 0x00C3_6400 => "ceph".into(), 0x0027_E0EB => "cgroupfs".into(), 0xFF53_4D42 => "cifs".into(), 0x7375_7245 => "coda".into(), 0x012F_F7B7 => "coh".into(), 0x6265_6570 => "configfs".into(), 0x28CD_3D45 => "cramfs".into(), 0x453D_CD28 => "cramfs-wend".into(), 0x6462_6720 => "debugfs".into(), 0x1373 => "devfs".into(), 0x1CD1 => "devpts".into(), 0xF15F => "ecryptfs".into(), 0xDE5E_81E4 => "efivarfs".into(), 0x0041_4A53 => "efs".into(), 0x5DF5 => "exofs".into(), 0x137D => "ext".into(), 0xEF53 => "ext2/ext3".into(), 0xEF51 => "ext2".into(), 0xF2F5_2010 => "f2fs".into(), 0x4006 => "fat".into(), 0x1983_0326 => "fhgfs".into(), 0x6573_5546 => "fuseblk".into(), 0x6573_5543 => "fusectl".into(), 0x0BAD_1DEA => "futexfs".into(), 0x0116_1970 => "gfs/gfs2".into(), 0x4750_4653 => "gpfs".into(), 0x4244 => "hfs".into(), 0x482B => "hfs+".into(), 0x4858 => "hfsx".into(), 0x00C0_FFEE => "hostfs".into(), 0xF995_E849 => "hpfs".into(), 0x9584_58F6 => "hugetlbfs".into(), 0x1130_7854 => "inodefs".into(), 0x0131_11A8 => "ibrix".into(), 0x2BAD_1DEA => "inotifyfs".into(), 0x9660 => "isofs".into(), 0x4004 => "isofs".into(), 0x4000 => "isofs".into(), 0x07C0 => "jffs".into(), 0x72B6 => "jffs2".into(), 0x3153_464A => "jfs".into(), 0x6B41_4653 => "k-afs".into(), 0xC97E_8168 => "logfs".into(), 0x0BD0_0BD0 => "lustre".into(), 0x5346_314D => "m1fs".into(), 0x137F => "minix".into(), 0x138F => "minix (30 char.)".into(), 0x2468 => "minix v2".into(), 0x2478 => "minix v2 (30 char.)".into(), 0x4D5A => "minix3".into(), 0x1980_0202 => "mqueue".into(), 0x4D44 => "msdos".into(), 0x564C => "novell".into(), 0x6969 => "nfs".into(), 0x6E66_7364 => "nfsd".into(), 0x3434 => "nilfs".into(), 0x6E73_6673 => "nsfs".into(), 0x5346_544E => "ntfs".into(), 0x9FA1 => "openprom".into(), 0x7461_636F => "ocfs2".into(), 0x794C_7630 => "overlayfs".into(), 0xAAD7_AAEA => "panfs".into(), 0x5049_5045 => "pipefs".into(), 0x7C7C_6673 => "prl_fs".into(), 0x9FA0 => "proc".into(), 0x6165_676C => "pstorefs".into(), 0x002F => "qnx4".into(), 0x6819_1122 => "qnx6".into(), 0x8584_58F6 => "ramfs".into(), 0x5265_4973 => "reiserfs".into(), 0x7275 => "romfs".into(), 0x6759_6969 => "rpc_pipefs".into(), 0x7363_6673 => "securityfs".into(), 0xF97C_FF8C => "selinux".into(), 0x4341_5D53 => "smackfs".into(), 0x517B => "smb".into(), 0xFE53_4D42 => "smb2".into(), 0xBEEF_DEAD => "snfs".into(), 0x534F_434B => "sockfs".into(), 0x7371_7368 => "squashfs".into(), 0x6265_6572 => "sysfs".into(), 0x012F_F7B6 => "sysv2".into(), 0x012F_F7B5 => "sysv4".into(), 0x0102_1994 => "tmpfs".into(), 0x7472_6163 => "tracefs".into(), 0x2405_1905 => "ubifs".into(), 0x1501_3346 => "udf".into(), 0x0001_1954 => "ufs".into(), 0x5419_0100 => "ufs".into(), 0x9FA2 => "usbdevfs".into(), 0x0102_1997 => "v9fs".into(), 0xBACB_ACBC => "vmhgfs".into(), 0xA501_FCF5 => "vxfs".into(), 0x565A_4653 => "vzfs".into(), 0x5346_4846 => "wslfs".into(), 0xABBA_1974 => "xenfs".into(), 0x012F_F7B4 => "xenix".into(), 0x5846_5342 => "xfs".into(), 0x012F_D16D => "xia".into(), 0x2FC1_2FC1 => "zfs".into(), other => format!("UNKNOWN ({other:#x})").into(), } // spell-checker:enable } #[cfg(test)] mod tests { use super::*; #[test] #[cfg(unix)] fn test_file_type() { assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); assert_eq!("weird file", pretty_filetype(0, 0)); } #[test] fn test_fs_type() { // spell-checker:disable assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); assert_eq!("tmpfs", pretty_fstype(0x01021994)); assert_eq!("nfs", pretty_fstype(0x6969)); assert_eq!("btrfs", pretty_fstype(0x9123683e)); assert_eq!("xfs", pretty_fstype(0x58465342)); assert_eq!("zfs", pretty_fstype(0x2FC12FC1)); assert_eq!("ntfs", pretty_fstype(0x5346544e)); assert_eq!("fat", pretty_fstype(0x4006)); assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); // spell-checker:enable } #[test] #[cfg(any(target_os = "linux", target_os = "android"))] fn test_mountinfo() { // spell-checker:ignore (word) relatime let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.mount_root, "/"); assert_eq!(info.mount_dir, "/mnt"); assert_eq!(info.mount_option, "rw,relatime"); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); // Test parsing with different amounts of optional fields. let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime master:1 - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime master:1 shared:2 - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); } } uucore-0.0.23/src/lib/features/lines.rs000064400000000000000000000071331046102023000160470ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) //! Iterate over lines, including the line ending character(s). //! //! This module provides the [`lines`] function, similar to the //! [`BufRead::lines`] method. While the [`BufRead::lines`] method //! yields [`String`] instances that do not include the line ending //! characters (`"\n"` or `"\r\n"`), our functions yield //! [`Vec`]<['u8']> instances that include the line ending //! characters. This is useful if the input data does not end with a //! newline character and you want to preserve the exact form of the //! input data. use std::io::BufRead; /// Returns an iterator over the lines, including line ending characters. /// /// This function is just like [`BufRead::lines`], but it includes the /// line ending characters in each yielded [`String`] if the input /// data has them. Set the `sep` parameter to the line ending /// character; for Unix line endings, use `b'\n'`. /// /// # Examples /// /// Use `sep` to specify an alternate character for line endings. For /// example, if lines are terminated by the null character `b'\0'`: /// /// ```rust,ignore /// use std::io::BufRead; /// use std::io::Cursor; /// /// let cursor = Cursor::new(b"x\0y\0z\0"); /// let mut it = lines(cursor, b'\0').map(|l| l.unwrap()); /// /// assert_eq!(it.next(), Some(Vec::from("x\0"))); /// assert_eq!(it.next(), Some(Vec::from("y\0"))); /// assert_eq!(it.next(), Some(Vec::from("z\0"))); /// assert_eq!(it.next(), None); /// ``` /// /// If the input data does not end with a newline character (`'\n'`), /// then the last [`String`] yielded by this iterator also does not /// end with a newline: /// /// ```rust,ignore /// let cursor = Cursor::new(b"x\ny\nz"); /// let mut it = lines(cursor, b'\n').map(|l| l.unwrap()); /// /// assert_eq!(it.next(), Some(Vec::from("x\n"))); /// assert_eq!(it.next(), Some(Vec::from("y\n"))); /// assert_eq!(it.next(), Some(Vec::from("z"))); /// assert_eq!(it.next(), None); /// ``` pub fn lines(reader: B, sep: u8) -> Lines where B: BufRead, { Lines { buf: reader, sep } } /// An iterator over the lines of an instance of `BufRead`. /// /// This struct is generally created by calling [`lines`] on a `BufRead`. /// Please see the documentation of [`lines`] for more details. pub struct Lines { buf: B, sep: u8, } impl Iterator for Lines { type Item = std::io::Result>; fn next(&mut self) -> Option>> { let mut buf = Vec::new(); match self.buf.read_until(self.sep, &mut buf) { Ok(0) => None, Ok(_n) => Some(Ok(buf)), Err(e) => Some(Err(e)), } } } #[cfg(test)] mod tests { use crate::lines::lines; use std::io::Cursor; #[test] fn test_lines() { let cursor = Cursor::new(b"x\ny\nz"); let mut it = lines(cursor, b'\n').map(|l| l.unwrap()); assert_eq!(it.next(), Some(Vec::from("x\n"))); assert_eq!(it.next(), Some(Vec::from("y\n"))); assert_eq!(it.next(), Some(Vec::from("z"))); assert_eq!(it.next(), None); } #[test] fn test_lines_zero_terminated() { use std::io::Cursor; let cursor = Cursor::new(b"x\0y\0z\0"); let mut it = lines(cursor, b'\0').map(|l| l.unwrap()); assert_eq!(it.next(), Some(Vec::from("x\0"))); assert_eq!(it.next(), Some(Vec::from("y\0"))); assert_eq!(it.next(), Some(Vec::from("z\0"))); assert_eq!(it.next(), None); } } uucore-0.0.23/src/lib/features/memo.rs000064400000000000000000000115661046102023000156770ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Main entry point for our implementation of printf. //! //! The [`printf`] and [`sprintf`] closely match the behavior of the //! corresponding C functions: the former renders a formatted string //! to stdout, the latter renders to a new [`String`] object. use crate::display::Quotable; use crate::error::{UResult, USimpleError}; use crate::features::tokenize::sub::SubParser; use crate::features::tokenize::token::Token; use crate::features::tokenize::unescaped_text::UnescapedText; use crate::show_warning; use itertools::put_back_n; use std::io::{stdout, Cursor, Write}; use std::iter::Peekable; use std::slice::Iter; /// Memo runner of printf /// Takes a format string and arguments /// 1. tokenize format string into tokens, consuming /// any subst. arguments along the way. /// 2. feeds remaining arguments into function /// that prints tokens. struct Memo { tokens: Vec, } fn warn_excess_args(first_arg: &str) { show_warning!( "ignoring excess arguments, starting with {}", first_arg.quote() ); } impl Memo { fn new( writer: &mut W, pf_string: &str, pf_args_it: &mut Peekable>, ) -> UResult where W: Write, { let mut pm = Self { tokens: Vec::new() }; let mut it = put_back_n(pf_string.chars()); let mut has_sub = false; loop { if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) { pm.tokens.push(x); } if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? { if !has_sub { has_sub = true; } pm.tokens.push(x); } if let Some(x) = it.next() { it.put_back(x); } else { break; } } if !has_sub { let mut drain = false; if let Some(first_arg) = pf_args_it.peek() { warn_excess_args(first_arg); drain = true; } if drain { loop { // drain remaining args; if pf_args_it.next().is_none() { break; } } } } Ok(pm) } fn apply(&self, writer: &mut W, pf_args_it: &mut Peekable>) where W: Write, { for tkn in &self.tokens { tkn.write(writer, pf_args_it); } } fn run_all(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()> where W: Write, { let mut arg_it = pf_args.iter().peekable(); let pm = Self::new(writer, pf_string, &mut arg_it)?; loop { if arg_it.peek().is_none() { return Ok(()); } pm.apply(writer, &mut arg_it); } } } /// Write a formatted string to stdout. /// /// `format_string` contains the template and `args` contains the /// arguments to render into the template. /// /// See also [`sprintf`], which creates a new formatted [`String`]. /// /// # Examples /// /// ```rust /// use uucore::memo::printf; /// /// printf("hello %s", &["world".to_string()]).unwrap(); /// // prints "hello world" /// ``` pub fn printf(format_string: &str, args: &[String]) -> UResult<()> { let mut writer = stdout(); Memo::run_all(&mut writer, format_string, args) } /// Create a new formatted string. /// /// `format_string` contains the template and `args` contains the /// arguments to render into the template. /// /// See also [`printf`], which prints to stdout. /// /// # Examples /// /// ```rust /// use uucore::memo::sprintf; /// /// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); /// assert_eq!(s, "hello world".to_string()); /// ``` pub fn sprintf(format_string: &str, args: &[String]) -> UResult { let mut writer = Cursor::new(vec![]); Memo::run_all(&mut writer, format_string, args)?; let buf = writer.into_inner(); match String::from_utf8(buf) { Ok(s) => Ok(s), Err(e) => Err(USimpleError::new( 1, format!("failed to parse formatted string as UTF-8: {e}"), )), } } #[cfg(test)] mod tests { use crate::memo::sprintf; #[test] fn test_sprintf_smoke() { assert_eq!(sprintf("", &[]).unwrap(), "".to_string()); } #[test] fn test_sprintf_no_args() { assert_eq!( sprintf("hello world", &[]).unwrap(), "hello world".to_string() ); } #[test] fn test_sprintf_string() { assert_eq!( sprintf("hello %s", &["world".to_string()]).unwrap(), "hello world".to_string() ); } } uucore-0.0.23/src/lib/features/mode.rs000064400000000000000000000156271046102023000156700ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to parse modes // spell-checker:ignore (vars) fperm srwx use libc::{mode_t, umask, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; pub fn parse_numeric(fperm: u32, mut mode: &str, considering_dir: bool) -> Result { let (op, pos) = parse_op(mode).map_or_else(|_| (None, 0), |(op, pos)| (Some(op), pos)); mode = mode[pos..].trim(); let change = if mode.is_empty() { 0 } else { u32::from_str_radix(mode, 8).map_err(|e| e.to_string())? }; if change > 0o7777 { Err(format!("mode is too large ({change} > 7777")) } else { Ok(match op { Some('+') => fperm | change, Some('-') => fperm & !change, // If this is a directory, we keep the setgid and setuid bits, // unless the mode contains 5 or more octal digits or the mode is "=" None if considering_dir && mode.len() < 5 => change | (fperm & (0o4000 | 0o2000)), None | Some('=') => change, Some(_) => unreachable!(), }) } } pub fn parse_symbolic( mut fperm: u32, mut mode: &str, umask: u32, considering_dir: bool, ) -> Result { let (mask, pos) = parse_levels(mode); if pos == mode.len() { return Err(format!("invalid mode ({mode})")); } let respect_umask = pos == 0; mode = &mode[pos..]; while !mode.is_empty() { let (op, pos) = parse_op(mode)?; mode = &mode[pos..]; let (mut srwx, pos) = parse_change(mode, fperm, considering_dir); if respect_umask { srwx &= !umask; } mode = &mode[pos..]; match op { '+' => fperm |= srwx & mask, '-' => fperm &= !(srwx & mask), '=' => { if considering_dir { // keep the setgid and setuid bits for directories srwx |= fperm & (0o4000 | 0o2000); } fperm = (fperm & !mask) | (srwx & mask); } _ => unreachable!(), } } Ok(fperm) } fn parse_levels(mode: &str) -> (u32, usize) { let mut mask = 0; let mut pos = 0; for ch in mode.chars() { mask |= match ch { 'u' => 0o4700, 'g' => 0o2070, 'o' => 0o1007, 'a' => 0o7777, _ => break, }; pos += 1; } if pos == 0 { mask = 0o7777; // default to 'a' } (mask, pos) } fn parse_op(mode: &str) -> Result<(char, usize), String> { let ch = mode .chars() .next() .ok_or_else(|| "unexpected end of mode".to_owned())?; match ch { '+' | '-' | '=' => Ok((ch, 1)), _ => Err(format!( "invalid operator (expected +, -, or =, but found {ch})" )), } } fn parse_change(mode: &str, fperm: u32, considering_dir: bool) -> (u32, usize) { let mut srwx = 0; let mut pos = 0; for ch in mode.chars() { match ch { 'r' => srwx |= 0o444, 'w' => srwx |= 0o222, 'x' => srwx |= 0o111, 'X' => { if considering_dir || (fperm & 0o0111) != 0 { srwx |= 0o111; } } 's' => srwx |= 0o4000 | 0o2000, 't' => srwx |= 0o1000, 'u' => srwx = (fperm & 0o700) | ((fperm >> 3) & 0o070) | ((fperm >> 6) & 0o007), 'g' => srwx = ((fperm << 3) & 0o700) | (fperm & 0o070) | ((fperm >> 3) & 0o007), 'o' => srwx = ((fperm << 6) & 0o700) | ((fperm << 3) & 0o070) | (fperm & 0o007), _ => break, }; if ch == 'u' || ch == 'g' || ch == 'o' { // symbolic modes only allows perms to be a single letter of 'ugo' // therefore this must either be the first char or it is unexpected if pos != 0 { break; } pos = 1; break; } pos += 1; } if pos == 0 { srwx = 0; } (srwx, pos) } pub fn parse_mode(mode: &str) -> Result { #[cfg(all( not(target_os = "freebsd"), not(target_vendor = "apple"), not(target_os = "android") ))] let fperm = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; #[cfg(any(target_os = "freebsd", target_vendor = "apple", target_os = "android"))] let fperm = (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) as u32; let result = if mode.chars().any(|c| c.is_ascii_digit()) { parse_numeric(fperm, mode, true) } else { parse_symbolic(fperm, mode, get_umask(), true) }; result.map(|mode| mode as mode_t) } pub fn get_umask() -> u32 { // There's no portable way to read the umask without changing it. // We have to replace it and then quickly set it back, hopefully before // some other thread is affected. // On modern Linux kernels the current umask could instead be read // from /proc/self/status. But that's a lot of work. // SAFETY: umask always succeeds and doesn't operate on memory. Races are // possible but it can't violate Rust's guarantees. let mask = unsafe { umask(0) }; unsafe { umask(mask) }; #[cfg(all( not(target_os = "freebsd"), not(target_vendor = "apple"), not(target_os = "android") ))] return mask; #[cfg(any(target_os = "freebsd", target_vendor = "apple", target_os = "android"))] return mask.into(); } // Iterate 'args' and delete the first occurrence // of a prefix '-' if it's associated with MODE // e.g. "chmod -v -xw -R FILE" -> "chmod -v xw -R FILE" pub fn strip_minus_from_mode(args: &mut Vec) -> bool { for arg in args { if arg == "--" { break; } if let Some(arg_stripped) = arg.strip_prefix('-') { if let Some(second) = arg.chars().nth(1) { match second { 'r' | 'w' | 'x' | 'X' | 's' | 't' | 'u' | 'g' | 'o' | '0'..='7' => { *arg = arg_stripped.to_string(); return true; } _ => {} } } } } false } #[cfg(test)] mod test { #[test] fn symbolic_modes() { assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); assert_eq!( super::parse_mode("+x").unwrap(), if crate::os::is_wsl_1() { 0o776 } else { 0o777 } ); assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); } #[test] fn numeric_modes() { assert_eq!(super::parse_mode("644").unwrap(), 0o644); assert_eq!(super::parse_mode("+100").unwrap(), 0o766); assert_eq!(super::parse_mode("-4").unwrap(), 0o662); } } uucore-0.0.23/src/lib/features/perms.rs000064400000000000000000000461551046102023000160720ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Common functions to manage permissions use crate::display::Quotable; use crate::error::strip_errno; use crate::error::UResult; use crate::error::USimpleError; pub use crate::features::entries; use crate::fs::resolve_relative_path; use crate::show_error; use clap::Arg; use clap::ArgMatches; use clap::Command; use libc::{self, gid_t, uid_t}; use walkdir::WalkDir; use std::io::Error as IOError; use std::io::Result as IOResult; use std::ffi::CString; use std::fs::Metadata; use std::os::unix::fs::MetadataExt; use std::os::unix::ffi::OsStrExt; use std::path::Path; /// The various level of verbosity #[derive(PartialEq, Eq, Clone, Debug)] pub enum VerbosityLevel { Silent, Changes, Verbose, Normal, } #[derive(PartialEq, Eq, Clone, Debug)] pub struct Verbosity { pub groups_only: bool, pub level: VerbosityLevel, } /// Actually perform the change of owner on a path fn chown>(path: P, uid: uid_t, gid: gid_t, follow: bool) -> IOResult<()> { let path = path.as_ref(); let s = CString::new(path.as_os_str().as_bytes()).unwrap(); let ret = unsafe { if follow { libc::chown(s.as_ptr(), uid, gid) } else { libc::lchown(s.as_ptr(), uid, gid) } }; if ret == 0 { Ok(()) } else { Err(IOError::last_os_error()) } } /// Perform the change of owner on a path /// with the various options /// and error messages management pub fn wrap_chown>( path: P, meta: &Metadata, dest_uid: Option, dest_gid: Option, follow: bool, verbosity: Verbosity, ) -> Result { let dest_uid = dest_uid.unwrap_or_else(|| meta.uid()); let dest_gid = dest_gid.unwrap_or_else(|| meta.gid()); let path = path.as_ref(); let mut out: String = String::new(); if let Err(e) = chown(path, dest_uid, dest_gid, follow) { match verbosity.level { VerbosityLevel::Silent => (), level => { out = format!( "changing {} of {}: {}", if verbosity.groups_only { "group" } else { "ownership" }, path.quote(), e ); if level == VerbosityLevel::Verbose { out = if verbosity.groups_only { let gid = meta.gid(); format!( "{}\nfailed to change group of {} from {} to {}", out, path.quote(), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) } else { let uid = meta.uid(); let gid = meta.gid(); format!( "{}\nfailed to change ownership of {} from {}:{} to {}:{}", out, path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; }; } } return Err(out); } else { let changed = dest_uid != meta.uid() || dest_gid != meta.gid(); if changed { match verbosity.level { VerbosityLevel::Changes | VerbosityLevel::Verbose => { let gid = meta.gid(); out = if verbosity.groups_only { format!( "changed group of {} from {} to {}", path.quote(), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) } else { let gid = meta.gid(); let uid = meta.uid(); format!( "changed ownership of {} from {}:{} to {}:{}", path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; } _ => (), }; } else if verbosity.level == VerbosityLevel::Verbose { out = if verbosity.groups_only { format!( "group of {} retained as {}", path.quote(), entries::gid2grp(dest_gid).unwrap_or_default() ) } else { format!( "ownership of {} retained as {}:{}", path.quote(), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; } } Ok(out) } pub enum IfFrom { All, User(u32), Group(u32), UserGroup(u32, u32), } #[derive(PartialEq, Eq)] pub enum TraverseSymlinks { None, First, All, } pub struct ChownExecutor { pub dest_uid: Option, pub dest_gid: Option, pub raw_owner: String, // The owner of the file as input by the user in the command line. pub traverse_symlinks: TraverseSymlinks, pub verbosity: Verbosity, pub filter: IfFrom, pub files: Vec, pub recursive: bool, pub preserve_root: bool, pub dereference: bool, } impl ChownExecutor { pub fn exec(&self) -> UResult<()> { let mut ret = 0; for f in &self.files { ret |= self.traverse(f); } if ret != 0 { return Err(ret.into()); } Ok(()) } #[allow(clippy::cognitive_complexity)] fn traverse>(&self, root: P) -> i32 { let path = root.as_ref(); let meta = match self.obtain_meta(path, self.dereference) { Some(m) => m, _ => { if self.verbosity.level == VerbosityLevel::Verbose { println!( "failed to change ownership of {} to {}", path.quote(), self.raw_owner ); } return 1; } }; // Prohibit only if: // (--preserve-root and -R present) && // ( // (argument is not symlink && resolved to be '/') || // (argument is symlink && should follow argument && resolved to be '/') // ) if self.recursive && self.preserve_root { let may_exist = if self.dereference { path.canonicalize().ok() } else { let real = resolve_relative_path(path); if real.is_dir() { Some(real.canonicalize().expect("failed to get real path")) } else { Some(real.into_owned()) } }; if let Some(p) = may_exist { if p.parent().is_none() { show_error!("it is dangerous to operate recursively on '/'"); show_error!("use --no-preserve-root to override this failsafe"); return 1; } } } let ret = if self.matched(meta.uid(), meta.gid()) { match wrap_chown( path, &meta, self.dest_uid, self.dest_gid, self.dereference, self.verbosity.clone(), ) { Ok(n) => { if !n.is_empty() { show_error!("{}", n); } 0 } Err(e) => { if self.verbosity.level != VerbosityLevel::Silent { show_error!("{}", e); } 1 } } } else { self.print_verbose_ownership_retained_as( path, meta.uid(), self.dest_gid.map(|_| meta.gid()), ); 0 }; if self.recursive { ret | self.dive_into(&root) } else { ret } } #[allow(clippy::cognitive_complexity)] fn dive_into>(&self, root: P) -> i32 { let root = root.as_ref(); // walkdir always dereferences the root directory, so we have to check it ourselves if self.traverse_symlinks == TraverseSymlinks::None && root.is_symlink() { return 0; } let mut ret = 0; let mut iterator = WalkDir::new(root) .follow_links(self.traverse_symlinks == TraverseSymlinks::All) .min_depth(1) .into_iter(); // We can't use a for loop because we need to manipulate the iterator inside the loop. while let Some(entry) = iterator.next() { let entry = match entry { Err(e) => { ret = 1; if let Some(path) = e.path() { show_error!( "cannot access '{}': {}", path.display(), if let Some(error) = e.io_error() { strip_errno(error) } else { "Too many levels of symbolic links".into() } ); } else { show_error!("{}", e); } continue; } Ok(entry) => entry, }; let path = entry.path(); let meta = match self.obtain_meta(path, self.dereference) { Some(m) => m, _ => { ret = 1; if entry.file_type().is_dir() { // Instruct walkdir to skip this directory to avoid getting another error // when walkdir tries to query the children of this directory. iterator.skip_current_dir(); } continue; } }; if !self.matched(meta.uid(), meta.gid()) { self.print_verbose_ownership_retained_as( path, meta.uid(), self.dest_gid.map(|_| meta.gid()), ); continue; } ret = match wrap_chown( path, &meta, self.dest_uid, self.dest_gid, self.dereference, self.verbosity.clone(), ) { Ok(n) => { if !n.is_empty() { show_error!("{}", n); } 0 } Err(e) => { if self.verbosity.level != VerbosityLevel::Silent { show_error!("{}", e); } 1 } } } ret } fn obtain_meta>(&self, path: P, follow: bool) -> Option { let path = path.as_ref(); let meta = if follow { path.metadata() } else { path.symlink_metadata() }; match meta { Err(e) => { match self.verbosity.level { VerbosityLevel::Silent => (), _ => show_error!( "cannot {} {}: {}", if follow { "dereference" } else { "access" }, path.quote(), strip_errno(&e) ), } None } Ok(meta) => Some(meta), } } #[inline] fn matched(&self, uid: uid_t, gid: gid_t) -> bool { match self.filter { IfFrom::All => true, IfFrom::User(u) => u == uid, IfFrom::Group(g) => g == gid, IfFrom::UserGroup(u, g) => u == uid && g == gid, } } fn print_verbose_ownership_retained_as(&self, path: &Path, uid: u32, gid: Option) { if self.verbosity.level == VerbosityLevel::Verbose { match (self.dest_uid, self.dest_gid, gid) { (Some(_), Some(_), Some(gid)) => { println!( "ownership of {} retained as {}:{}", path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), ); } (None, Some(_), Some(gid)) => { println!( "ownership of {} retained as {}", path.quote(), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), ); } (_, _, _) => { println!( "ownership of {} retained as {}", path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), ); } } } } } pub mod options { pub const HELP: &str = "help"; pub mod verbosity { pub const CHANGES: &str = "changes"; pub const QUIET: &str = "quiet"; pub const SILENT: &str = "silent"; pub const VERBOSE: &str = "verbose"; } pub mod preserve_root { pub const PRESERVE: &str = "preserve-root"; pub const NO_PRESERVE: &str = "no-preserve-root"; } pub mod dereference { pub const DEREFERENCE: &str = "dereference"; pub const NO_DEREFERENCE: &str = "no-dereference"; } pub const FROM: &str = "from"; pub const RECURSIVE: &str = "recursive"; pub mod traverse { pub const TRAVERSE: &str = "H"; pub const NO_TRAVERSE: &str = "P"; pub const EVERY: &str = "L"; } pub const REFERENCE: &str = "reference"; pub const ARG_OWNER: &str = "OWNER"; pub const ARG_GROUP: &str = "GROUP"; pub const ARG_FILES: &str = "FILE"; } pub struct GidUidOwnerFilter { pub dest_gid: Option, pub dest_uid: Option, pub raw_owner: String, pub filter: IfFrom, } type GidUidFilterOwnerParser = fn(&ArgMatches) -> UResult; /// Base implementation for `chgrp` and `chown`. /// /// An argument called `add_arg_if_not_reference` will be added to `command` if /// `args` does not contain the `--reference` option. /// `parse_gid_uid_and_filter` will be called to obtain the target gid and uid, and the filter, /// from `ArgMatches`. /// `groups_only` determines whether verbose output will only mention the group. #[allow(clippy::cognitive_complexity)] pub fn chown_base( mut command: Command, args: impl crate::Args, add_arg_if_not_reference: &'static str, parse_gid_uid_and_filter: GidUidFilterOwnerParser, groups_only: bool, ) -> UResult<()> { let args: Vec<_> = args.collect(); let mut reference = false; let mut help = false; // stop processing options on -- for arg in args.iter().take_while(|s| *s != "--") { if arg.to_string_lossy().starts_with("--reference=") || arg == "--reference" { reference = true; } else if arg == "--help" { // we stop processing once we see --help, // as it doesn't matter if we've seen reference or not help = true; break; } } if help || !reference { // add both positional arguments // arg_group is only required if command = command.arg( Arg::new(add_arg_if_not_reference) .value_name(add_arg_if_not_reference) .required(true), ); } command = command.arg( Arg::new(options::ARG_FILES) .value_name(options::ARG_FILES) .value_hint(clap::ValueHint::FilePath) .action(clap::ArgAction::Append) .required(true) .num_args(1..), ); let matches = command.try_get_matches_from(args)?; let files: Vec = matches .get_many::(options::ARG_FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); let preserve_root = matches.get_flag(options::preserve_root::PRESERVE); let mut dereference = if matches.get_flag(options::dereference::DEREFERENCE) { Some(true) } else if matches.get_flag(options::dereference::NO_DEREFERENCE) { Some(false) } else { None }; let mut traverse_symlinks = if matches.get_flag(options::traverse::TRAVERSE) { TraverseSymlinks::First } else if matches.get_flag(options::traverse::EVERY) { TraverseSymlinks::All } else { TraverseSymlinks::None }; let recursive = matches.get_flag(options::RECURSIVE); if recursive { if traverse_symlinks == TraverseSymlinks::None { if dereference == Some(true) { return Err(USimpleError::new(1, "-R --dereference requires -H or -L")); } dereference = Some(false); } } else { traverse_symlinks = TraverseSymlinks::None; } let verbosity_level = if matches.get_flag(options::verbosity::CHANGES) { VerbosityLevel::Changes } else if matches.get_flag(options::verbosity::SILENT) || matches.get_flag(options::verbosity::QUIET) { VerbosityLevel::Silent } else if matches.get_flag(options::verbosity::VERBOSE) { VerbosityLevel::Verbose } else { VerbosityLevel::Normal }; let GidUidOwnerFilter { dest_gid, dest_uid, raw_owner, filter, } = parse_gid_uid_and_filter(&matches)?; let executor = ChownExecutor { traverse_symlinks, dest_gid, dest_uid, raw_owner, verbosity: Verbosity { groups_only, level: verbosity_level, }, recursive, dereference: dereference.unwrap_or(true), preserve_root, files, filter, }; executor.exec() } uucore-0.0.23/src/lib/features/pipes.rs000064400000000000000000000051411046102023000160520ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. /// Thin pipe-related wrappers around functions from the `nix` crate. use std::fs::File; #[cfg(any(target_os = "linux", target_os = "android"))] use std::io::IoSlice; #[cfg(any(target_os = "linux", target_os = "android"))] use std::os::unix::io::AsRawFd; use std::os::unix::io::FromRawFd; #[cfg(any(target_os = "linux", target_os = "android"))] use nix::fcntl::SpliceFFlags; pub use nix::{Error, Result}; /// A wrapper around [`nix::unistd::pipe`] that ensures the pipe is cleaned up. /// /// Returns two `File` objects: everything written to the second can be read /// from the first. pub fn pipe() -> Result<(File, File)> { let (read, write) = nix::unistd::pipe()?; // SAFETY: The file descriptors do not have other owners. unsafe { Ok((File::from_raw_fd(read), File::from_raw_fd(write))) } } /// Less noisy wrapper around [`nix::fcntl::splice`]. /// /// Up to `len` bytes are moved from `source` to `target`. Returns the number /// of successfully moved bytes. /// /// At least one of `source` and `target` must be some sort of pipe. /// To get around this requirement, consider splicing from your source into /// a [`pipe`] and then from the pipe into your target (with `splice_exact`): /// this is still very efficient. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn splice(source: &impl AsRawFd, target: &impl AsRawFd, len: usize) -> Result { nix::fcntl::splice( source.as_raw_fd(), None, target.as_raw_fd(), None, len, SpliceFFlags::empty(), ) } /// Splice wrapper which fully finishes the write. /// /// Exactly `len` bytes are moved from `source` into `target`. /// /// Panics if `source` runs out of data before `len` bytes have been moved. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn splice_exact(source: &impl AsRawFd, target: &impl AsRawFd, len: usize) -> Result<()> { let mut left = len; while left != 0 { let written = splice(source, target, left)?; assert_ne!(written, 0, "unexpected end of data"); left -= written; } Ok(()) } /// Copy data from `bytes` into `target`, which must be a pipe. /// /// Returns the number of successfully copied bytes. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn vmsplice(target: &impl AsRawFd, bytes: &[u8]) -> Result { nix::fcntl::vmsplice( target.as_raw_fd(), &[IoSlice::new(bytes)], SpliceFFlags::empty(), ) } uucore-0.0.23/src/lib/features/process.rs000064400000000000000000000062551046102023000164170ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) cvar exitstatus // spell-checker:ignore (sys/unix) WIFSIGNALED //! Set of functions to manage IDs use libc::{gid_t, pid_t, uid_t}; use std::io; use std::process::Child; use std::process::ExitStatus; use std::thread; use std::time::{Duration, Instant}; // SAFETY: These functions always succeed and return simple integers. /// `geteuid()` returns the effective user ID of the calling process. pub fn geteuid() -> uid_t { unsafe { libc::geteuid() } } /// `getegid()` returns the effective group ID of the calling process. pub fn getegid() -> gid_t { unsafe { libc::getegid() } } /// `getgid()` returns the real group ID of the calling process. pub fn getgid() -> gid_t { unsafe { libc::getgid() } } /// `getuid()` returns the real user ID of the calling process. pub fn getuid() -> uid_t { unsafe { libc::getuid() } } /// Missing methods for Child objects pub trait ChildExt { /// Send a signal to a Child process. /// /// Caller beware: if the process already exited then you may accidentally /// send the signal to an unrelated process that recycled the PID. fn send_signal(&mut self, signal: usize) -> io::Result<()>; /// Send a signal to a process group. fn send_signal_group(&mut self, signal: usize) -> io::Result<()>; /// Wait for a process to finish or return after the specified duration. /// A `timeout` of zero disables the timeout. fn wait_or_timeout(&mut self, timeout: Duration) -> io::Result>; } impl ChildExt for Child { fn send_signal(&mut self, signal: usize) -> io::Result<()> { if unsafe { libc::kill(self.id() as pid_t, signal as i32) } == 0 { Ok(()) } else { Err(io::Error::last_os_error()) } } fn send_signal_group(&mut self, signal: usize) -> io::Result<()> { // Ignore the signal, so we don't go into a signal loop. if unsafe { libc::signal(signal as i32, libc::SIG_IGN) } != 0 { return Err(io::Error::last_os_error()); } if unsafe { libc::kill(0, signal as i32) } == 0 { Ok(()) } else { Err(io::Error::last_os_error()) } } fn wait_or_timeout(&mut self, timeout: Duration) -> io::Result> { if timeout == Duration::from_micros(0) { return self.wait().map(Some); } // .try_wait() doesn't drop stdin, so we do it manually drop(self.stdin.take()); let start = Instant::now(); loop { if let Some(status) = self.try_wait()? { return Ok(Some(status)); } if start.elapsed() >= timeout { break; } // XXX: this is kinda gross, but it's cleaner than starting a thread just to wait // (which was the previous solution). We might want to use a different duration // here as well thread::sleep(Duration::from_millis(100)); } Ok(None) } } uucore-0.0.23/src/lib/features/quoting_style.rs000064400000000000000000000627471046102023000176570ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use std::char::from_digit; use std::ffi::OsStr; use std::fmt; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#']; const SPECIAL_SHELL_CHARS: &str = "`$&*()|[]{};\\'\"<>?! "; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum QuotingStyle { Shell { escape: bool, always_quote: bool, show_control: bool, }, C { quotes: Quotes, }, Literal { show_control: bool, }, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Quotes { None, Single, Double, // TODO: Locale } // This implementation is heavily inspired by the std::char::EscapeDefault implementation // in the Rust standard library. This custom implementation is needed because the // characters \a, \b, \e, \f & \v are not recognized by Rust. struct EscapedChar { state: EscapeState, } enum EscapeState { Done, Char(char), Backslash(char), ForceQuote(char), Octal(EscapeOctal), } struct EscapeOctal { c: char, state: EscapeOctalState, idx: usize, } enum EscapeOctalState { Done, Backslash, Value, } impl Iterator for EscapeOctal { type Item = char; fn next(&mut self) -> Option { match self.state { EscapeOctalState::Done => None, EscapeOctalState::Backslash => { self.state = EscapeOctalState::Value; Some('\\') } EscapeOctalState::Value => { let octal_digit = ((self.c as u32) >> (self.idx * 3)) & 0o7; if self.idx == 0 { self.state = EscapeOctalState::Done; } else { self.idx -= 1; } Some(from_digit(octal_digit, 8).unwrap()) } } } } impl EscapeOctal { fn from(c: char) -> Self { Self { c, idx: 2, state: EscapeOctalState::Backslash, } } } impl EscapedChar { fn new_literal(c: char) -> Self { Self { state: EscapeState::Char(c), } } fn new_c(c: char, quotes: Quotes) -> Self { use EscapeState::*; let init_state = match c { '\x07' => Backslash('a'), '\x08' => Backslash('b'), '\t' => Backslash('t'), '\n' => Backslash('n'), '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), '\\' => Backslash('\\'), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), }, '"' => match quotes { Quotes::Double => Backslash('"'), _ => Char('"'), }, ' ' => match quotes { Quotes::None => Backslash(' '), _ => Char(' '), }, _ if c.is_ascii_control() => Octal(EscapeOctal::from(c)), _ => Char(c), }; Self { state: init_state } } fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { use EscapeState::*; let init_state = match c { _ if !escape && c.is_control() => Char(c), '\x07' => Backslash('a'), '\x08' => Backslash('b'), '\t' => Backslash('t'), '\n' => Backslash('n'), '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), '\x00'..='\x1F' | '\x7F' => Octal(EscapeOctal::from(c)), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), }, _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), _ => Char(c), }; Self { state: init_state } } fn hide_control(self) -> Self { match self.state { EscapeState::Char(c) if c.is_control() => Self { state: EscapeState::Char('?'), }, _ => self, } } } impl Iterator for EscapedChar { type Item = char; fn next(&mut self) -> Option { match self.state { EscapeState::Backslash(c) => { self.state = EscapeState::Char(c); Some('\\') } EscapeState::Char(c) | EscapeState::ForceQuote(c) => { self.state = EscapeState::Done; Some(c) } EscapeState::Done => None, EscapeState::Octal(ref mut iter) => iter.next(), } } } fn shell_without_escape(name: &str, quotes: Quotes, show_control_chars: bool) -> (String, bool) { let mut must_quote = false; let mut escaped_str = String::with_capacity(name.len()); for c in name.chars() { let escaped = { let ec = EscapedChar::new_shell(c, false, quotes); if show_control_chars { ec } else { ec.hide_control() } }; match escaped.state { EscapeState::Backslash('\'') => escaped_str.push_str("'\\''"), EscapeState::ForceQuote(x) => { must_quote = true; escaped_str.push(x); } _ => { for char in escaped { escaped_str.push(char); } } } } must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } fn shell_with_escape(name: &str, quotes: Quotes) -> (String, bool) { // We need to keep track of whether we are in a dollar expression // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' let mut in_dollar = false; let mut must_quote = false; let mut escaped_str = String::with_capacity(name.len()); for c in name.chars() { let escaped = EscapedChar::new_shell(c, true, quotes); match escaped.state { EscapeState::Char(x) => { if in_dollar { escaped_str.push_str("''"); in_dollar = false; } escaped_str.push(x); } EscapeState::ForceQuote(x) => { if in_dollar { escaped_str.push_str("''"); in_dollar = false; } must_quote = true; escaped_str.push(x); } // Single quotes are not put in dollar expressions, but are escaped // if the string also contains double quotes. In that case, they must // be handled separately. EscapeState::Backslash('\'') => { must_quote = true; in_dollar = false; escaped_str.push_str("'\\''"); } _ => { if !in_dollar { escaped_str.push_str("'$'"); in_dollar = true; } must_quote = true; for char in escaped { escaped_str.push(char); } } } } must_quote = must_quote || name.starts_with(SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { match style { QuotingStyle::Literal { show_control } => { if *show_control { name.to_string_lossy().into_owned() } else { name.to_string_lossy() .chars() .flat_map(|c| EscapedChar::new_literal(c).hide_control()) .collect() } } QuotingStyle::C { quotes } => { let escaped_str: String = name .to_string_lossy() .chars() .flat_map(|c| EscapedChar::new_c(c, *quotes)) .collect(); match quotes { Quotes::Single => format!("'{escaped_str}'"), Quotes::Double => format!("\"{escaped_str}\""), Quotes::None => escaped_str, } } QuotingStyle::Shell { escape, always_quote, show_control, } => { let name = name.to_string_lossy(); let (quotes, must_quote) = if name.contains(&['"', '`', '$', '\\'][..]) { (Quotes::Single, true) } else if name.contains('\'') { (Quotes::Double, true) } else if *always_quote { (Quotes::Single, true) } else { (Quotes::Single, false) }; let (escaped_str, contains_quote_chars) = if *escape { shell_with_escape(&name, quotes) } else { shell_without_escape(&name, quotes, *show_control) }; match (must_quote | contains_quote_chars, quotes) { (true, Quotes::Single) => format!("'{escaped_str}'"), (true, Quotes::Double) => format!("\"{escaped_str}\""), _ => escaped_str, } } } } impl fmt::Display for QuotingStyle { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Self::Shell { escape, always_quote, show_control, } => { let mut style = "shell".to_string(); if escape { style.push_str("-escape"); } if always_quote { style.push_str("-always-quote"); } if show_control { style.push_str("-show-control"); } f.write_str(&style) } Self::C { .. } => f.write_str("C"), Self::Literal { .. } => f.write_str("literal"), } } } impl fmt::Display for Quotes { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Self::None => f.write_str("None"), Self::Single => f.write_str("Single"), Self::Double => f.write_str("Double"), } } } #[cfg(test)] mod tests { use crate::quoting_style::{escape_name, Quotes, QuotingStyle}; // spell-checker:ignore (tests/words) one\'two one'two fn get_style(s: &str) -> QuotingStyle { match s { "literal" => QuotingStyle::Literal { show_control: false, }, "literal-show" => QuotingStyle::Literal { show_control: true }, "escape" => QuotingStyle::C { quotes: Quotes::None, }, "c" => QuotingStyle::C { quotes: Quotes::Double, }, "shell" => QuotingStyle::Shell { escape: false, always_quote: false, show_control: false, }, "shell-show" => QuotingStyle::Shell { escape: false, always_quote: false, show_control: true, }, "shell-always" => QuotingStyle::Shell { escape: false, always_quote: true, show_control: false, }, "shell-always-show" => QuotingStyle::Shell { escape: false, always_quote: true, show_control: true, }, "shell-escape" => QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }, "shell-escape-always" => QuotingStyle::Shell { escape: true, always_quote: true, show_control: false, }, _ => panic!("Invalid name!"), } } fn check_names(name: &str, map: &[(&str, &str)]) { assert_eq!( map.iter() .map(|(_, style)| escape_name(name.as_ref(), &get_style(style))) .collect::>(), map.iter() .map(|(correct, _)| correct.to_string()) .collect::>() ); } #[test] fn test_simple_names() { check_names( "one_two", &[ ("one_two", "literal"), ("one_two", "literal-show"), ("one_two", "escape"), ("\"one_two\"", "c"), ("one_two", "shell"), ("one_two", "shell-show"), ("\'one_two\'", "shell-always"), ("\'one_two\'", "shell-always-show"), ("one_two", "shell-escape"), ("\'one_two\'", "shell-escape-always"), ], ); } #[test] fn test_spaces() { check_names( "one two", &[ ("one two", "literal"), ("one two", "literal-show"), ("one\\ two", "escape"), ("\"one two\"", "c"), ("\'one two\'", "shell"), ("\'one two\'", "shell-show"), ("\'one two\'", "shell-always"), ("\'one two\'", "shell-always-show"), ("\'one two\'", "shell-escape"), ("\'one two\'", "shell-escape-always"), ], ); check_names( " one", &[ (" one", "literal"), (" one", "literal-show"), ("\\ one", "escape"), ("\" one\"", "c"), ("' one'", "shell"), ("' one'", "shell-show"), ("' one'", "shell-always"), ("' one'", "shell-always-show"), ("' one'", "shell-escape"), ("' one'", "shell-escape-always"), ], ); } #[test] fn test_quotes() { // One double quote check_names( "one\"two", &[ ("one\"two", "literal"), ("one\"two", "literal-show"), ("one\"two", "escape"), ("\"one\\\"two\"", "c"), ("'one\"two'", "shell"), ("'one\"two'", "shell-show"), ("'one\"two'", "shell-always"), ("'one\"two'", "shell-always-show"), ("'one\"two'", "shell-escape"), ("'one\"two'", "shell-escape-always"), ], ); // One single quote check_names( "one\'two", &[ ("one'two", "literal"), ("one'two", "literal-show"), ("one'two", "escape"), ("\"one'two\"", "c"), ("\"one'two\"", "shell"), ("\"one'two\"", "shell-show"), ("\"one'two\"", "shell-always"), ("\"one'two\"", "shell-always-show"), ("\"one'two\"", "shell-escape"), ("\"one'two\"", "shell-escape-always"), ], ); // One single quote and one double quote check_names( "one'two\"three", &[ ("one'two\"three", "literal"), ("one'two\"three", "literal-show"), ("one'two\"three", "escape"), ("\"one'two\\\"three\"", "c"), ("'one'\\''two\"three'", "shell"), ("'one'\\''two\"three'", "shell-show"), ("'one'\\''two\"three'", "shell-always"), ("'one'\\''two\"three'", "shell-always-show"), ("'one'\\''two\"three'", "shell-escape"), ("'one'\\''two\"three'", "shell-escape-always"), ], ); // Consecutive quotes check_names( "one''two\"\"three", &[ ("one''two\"\"three", "literal"), ("one''two\"\"three", "literal-show"), ("one''two\"\"three", "escape"), ("\"one''two\\\"\\\"three\"", "c"), ("'one'\\'''\\''two\"\"three'", "shell"), ("'one'\\'''\\''two\"\"three'", "shell-show"), ("'one'\\'''\\''two\"\"three'", "shell-always"), ("'one'\\'''\\''two\"\"three'", "shell-always-show"), ("'one'\\'''\\''two\"\"three'", "shell-escape"), ("'one'\\'''\\''two\"\"three'", "shell-escape-always"), ], ); } #[test] fn test_control_chars() { // A simple newline check_names( "one\ntwo", &[ ("one?two", "literal"), ("one\ntwo", "literal-show"), ("one\\ntwo", "escape"), ("\"one\\ntwo\"", "c"), ("one?two", "shell"), ("one\ntwo", "shell-show"), ("'one?two'", "shell-always"), ("'one\ntwo'", "shell-always-show"), ("'one'$'\\n''two'", "shell-escape"), ("'one'$'\\n''two'", "shell-escape-always"), ], ); // A control character followed by a special shell character check_names( "one\n&two", &[ ("one?&two", "literal"), ("one\n&two", "literal-show"), ("one\\n&two", "escape"), ("\"one\\n&two\"", "c"), ("'one?&two'", "shell"), ("'one\n&two'", "shell-show"), ("'one?&two'", "shell-always"), ("'one\n&two'", "shell-always-show"), ("'one'$'\\n''&two'", "shell-escape"), ("'one'$'\\n''&two'", "shell-escape-always"), ], ); // The first 16 control characters. NUL is also included, even though it is of // no importance for file names. check_names( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", &[ ("????????????????", "literal"), ( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "literal-show", ), ( "\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017", "escape", ), ( "\"\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017\"", "c", ), ("????????????????", "shell"), ( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "shell-show", ), ("'????????????????'", "shell-always"), ( "'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F'", "shell-always-show", ), ( "''$'\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017'", "shell-escape", ), ( "''$'\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017'", "shell-escape-always", ), ], ); // The last 16 control characters. check_names( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", &[ ("????????????????", "literal"), ( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "literal-show", ), ( "\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037", "escape", ), ( "\"\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037\"", "c", ), ("????????????????", "shell"), ( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "shell-show", ), ("'????????????????'", "shell-always"), ( "'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'", "shell-always-show", ), ( "''$'\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037'", "shell-escape", ), ( "''$'\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037'", "shell-escape-always", ), ], ); // DEL check_names( "\x7F", &[ ("?", "literal"), ("\x7F", "literal-show"), ("\\177", "escape"), ("\"\\177\"", "c"), ("?", "shell"), ("\x7F", "shell-show"), ("'?'", "shell-always"), ("'\x7F'", "shell-always-show"), ("''$'\\177'", "shell-escape"), ("''$'\\177'", "shell-escape-always"), ], ); } #[test] fn test_question_mark() { // A question mark must force quotes in shell and shell-always, unless // it is in place of a control character (that case is already covered // in other tests) check_names( "one?two", &[ ("one?two", "literal"), ("one?two", "literal-show"), ("one?two", "escape"), ("\"one?two\"", "c"), ("'one?two'", "shell"), ("'one?two'", "shell-show"), ("'one?two'", "shell-always"), ("'one?two'", "shell-always-show"), ("'one?two'", "shell-escape"), ("'one?two'", "shell-escape-always"), ], ); } #[test] fn test_backslash() { // Escaped in C-style, but not in Shell-style escaping check_names( "one\\two", &[ ("one\\two", "literal"), ("one\\two", "literal-show"), ("one\\\\two", "escape"), ("\"one\\\\two\"", "c"), ("'one\\two'", "shell"), ("\'one\\two\'", "shell-always"), ("'one\\two'", "shell-escape"), ("'one\\two'", "shell-escape-always"), ], ); } #[test] fn test_tilde_and_hash() { check_names("~", &[("'~'", "shell"), ("'~'", "shell-escape")]); check_names( "~name", &[("'~name'", "shell"), ("'~name'", "shell-escape")], ); check_names( "some~name", &[("some~name", "shell"), ("some~name", "shell-escape")], ); check_names("name~", &[("name~", "shell"), ("name~", "shell-escape")]); check_names("#", &[("'#'", "shell"), ("'#'", "shell-escape")]); check_names( "#name", &[("'#name'", "shell"), ("'#name'", "shell-escape")], ); check_names( "some#name", &[("some#name", "shell"), ("some#name", "shell-escape")], ); check_names("name#", &[("name#", "shell"), ("name#", "shell-escape")]); } #[test] fn test_special_chars_in_double_quotes() { check_names( "can'$t", &[ ("'can'\\''$t'", "shell"), ("'can'\\''$t'", "shell-always"), ("'can'\\''$t'", "shell-escape"), ("'can'\\''$t'", "shell-escape-always"), ], ); check_names( "can'`t", &[ ("'can'\\''`t'", "shell"), ("'can'\\''`t'", "shell-always"), ("'can'\\''`t'", "shell-escape"), ("'can'\\''`t'", "shell-escape-always"), ], ); check_names( "can'\\t", &[ ("'can'\\''\\t'", "shell"), ("'can'\\''\\t'", "shell-always"), ("'can'\\''\\t'", "shell-escape"), ("'can'\\''\\t'", "shell-escape-always"), ], ); } #[test] fn test_quoting_style_display() { let style = QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }; assert_eq!(format!("{}", style), "shell-escape"); let style = QuotingStyle::Shell { escape: false, always_quote: true, show_control: false, }; assert_eq!(format!("{}", style), "shell-always-quote"); let style = QuotingStyle::Shell { escape: false, always_quote: false, show_control: true, }; assert_eq!(format!("{}", style), "shell-show-control"); let style = QuotingStyle::C { quotes: Quotes::Double, }; assert_eq!(format!("{}", style), "C"); let style = QuotingStyle::Literal { show_control: false, }; assert_eq!(format!("{}", style), "literal"); } #[test] fn test_quotes_display() { assert_eq!(format!("{}", Quotes::None), "None"); assert_eq!(format!("{}", Quotes::Single), "Single"); assert_eq!(format!("{}", Quotes::Double), "Double"); } } uucore-0.0.23/src/lib/features/ranges.rs000064400000000000000000000171221046102023000162130ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) inval use std::cmp::max; use std::str::FromStr; use crate::display::Quotable; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Range { pub low: usize, pub high: usize, } impl FromStr for Range { type Err = &'static str; /// Parse a string of the form `a-b` into a `Range` /// /// ``` /// use std::str::FromStr; /// use uucore::ranges::Range; /// assert_eq!(Range::from_str("5"), Ok(Range { low: 5, high: 5 })); /// assert_eq!(Range::from_str("4-"), Ok(Range { low: 4, high: usize::MAX - 1 })); /// assert_eq!(Range::from_str("-4"), Ok(Range { low: 1, high: 4 })); /// assert_eq!(Range::from_str("2-4"), Ok(Range { low: 2, high: 4 })); /// assert!(Range::from_str("0-4").is_err()); /// assert!(Range::from_str("4-2").is_err()); /// assert!(Range::from_str("-").is_err()); /// assert!(Range::from_str("a").is_err()); /// assert!(Range::from_str("a-b").is_err()); /// ``` fn from_str(s: &str) -> Result { fn parse(s: &str) -> Result { match s.parse::() { Ok(0) => Err("fields and positions are numbered from 1"), // GNU fails when we are at the limit. Match their behavior Ok(n) if n == usize::MAX => Err("byte/character offset is too large"), Ok(n) => Ok(n), Err(_) => Err("failed to parse range"), } } Ok(match s.split_once('-') { None => { let n = parse(s)?; Self { low: n, high: n } } Some(("", "")) => return Err("invalid range with no endpoint"), Some((low, "")) => Self { low: parse(low)?, high: usize::MAX - 1, }, Some(("", high)) => Self { low: 1, high: parse(high)?, }, Some((low, high)) => { let (low, high) = (parse(low)?, parse(high)?); if low <= high { Self { low, high } } else { return Err("high end of range less than low end"); } } }) } } impl Range { pub fn from_list(list: &str) -> Result, String> { let mut ranges = Vec::new(); for item in list.split(&[',', ' ']) { let range_item = FromStr::from_str(item) .map_err(|e| format!("range {} was invalid: {}", item.quote(), e))?; ranges.push(range_item); } Ok(Self::merge(ranges)) } /// Merge any overlapping ranges /// /// Is guaranteed to return only disjoint ranges in a sorted order. fn merge(mut ranges: Vec) -> Vec { ranges.sort(); // merge overlapping ranges for i in 0..ranges.len() { let j = i + 1; // The +1 is a small optimization, because we can merge adjacent Ranges. // For example (1,3) and (4,6), because in the integers, there are no // possible values between 3 and 4, this is equivalent to (1,6). while j < ranges.len() && ranges[j].low <= ranges[i].high + 1 { let j_high = ranges.remove(j).high; ranges[i].high = max(ranges[i].high, j_high); } } ranges } } pub fn complement(ranges: &[Range]) -> Vec { let mut prev_high = 0; let mut complements = Vec::with_capacity(ranges.len() + 1); for range in ranges { if range.low > prev_high + 1 { complements.push(Range { low: prev_high + 1, high: range.low - 1, }); } prev_high = range.high; } if prev_high < usize::MAX - 1 { complements.push(Range { low: prev_high + 1, high: usize::MAX - 1, }); } complements } /// Test if at least one of the given Ranges contain the supplied value. /// /// Examples: /// /// ``` /// let ranges = uucore::ranges::Range::from_list("11,2,6-8").unwrap(); /// /// assert!(!uucore::ranges::contain(&ranges, 0)); /// assert!(!uucore::ranges::contain(&ranges, 1)); /// assert!(!uucore::ranges::contain(&ranges, 5)); /// assert!(!uucore::ranges::contain(&ranges, 10)); /// /// assert!(uucore::ranges::contain(&ranges, 2)); /// assert!(uucore::ranges::contain(&ranges, 6)); /// assert!(uucore::ranges::contain(&ranges, 7)); /// assert!(uucore::ranges::contain(&ranges, 8)); /// assert!(uucore::ranges::contain(&ranges, 11)); /// ``` pub fn contain(ranges: &[Range], n: usize) -> bool { for range in ranges { if n >= range.low && n <= range.high { return true; } } false } #[cfg(test)] mod test { use super::{complement, Range}; use std::str::FromStr; fn m(a: Vec, b: &[Range]) { assert_eq!(Range::merge(a), b); } fn r(low: usize, high: usize) -> Range { Range { low, high } } #[test] fn merging() { // Single element m(vec![r(1, 2)], &[r(1, 2)]); // Disjoint in wrong order m(vec![r(4, 5), r(1, 2)], &[r(1, 2), r(4, 5)]); // Two elements must be merged m(vec![r(1, 3), r(2, 4), r(6, 7)], &[r(1, 4), r(6, 7)]); // Two merges and a duplicate m( vec![r(1, 3), r(6, 7), r(2, 4), r(6, 7)], &[r(1, 4), r(6, 7)], ); // One giant m( vec![ r(110, 120), r(10, 20), r(100, 200), r(130, 140), r(150, 160), ], &[r(10, 20), r(100, 200)], ); // Last one joins the previous two m(vec![r(10, 20), r(30, 40), r(20, 30)], &[r(10, 40)]); m( vec![r(10, 20), r(30, 40), r(50, 60), r(20, 30)], &[r(10, 40), r(50, 60)], ); // Merge adjacent ranges m(vec![r(1, 3), r(4, 6)], &[r(1, 6)]); } #[test] fn complementing() { // Simple assert_eq!(complement(&[r(3, 4)]), vec![r(1, 2), r(5, usize::MAX - 1)]); // With start assert_eq!( complement(&[r(1, 3), r(6, 10)]), vec![r(4, 5), r(11, usize::MAX - 1)] ); // With end assert_eq!( complement(&[r(2, 4), r(6, usize::MAX - 1)]), vec![r(1, 1), r(5, 5)] ); // With start and end assert_eq!(complement(&[r(1, 4), r(6, usize::MAX - 1)]), vec![r(5, 5)]); } #[test] fn test_from_str() { assert_eq!(Range::from_str("5"), Ok(Range { low: 5, high: 5 })); assert_eq!(Range::from_str("3-5"), Ok(Range { low: 3, high: 5 })); assert_eq!( Range::from_str("5-3"), Err("high end of range less than low end") ); assert_eq!(Range::from_str("-"), Err("invalid range with no endpoint")); assert_eq!( Range::from_str("3-"), Ok(Range { low: 3, high: usize::MAX - 1 }) ); assert_eq!(Range::from_str("-5"), Ok(Range { low: 1, high: 5 })); assert_eq!( Range::from_str("0"), Err("fields and positions are numbered from 1") ); let max_value = format!("{}", usize::MAX); assert_eq!( Range::from_str(&max_value), Err("byte/character offset is too large") ); } } uucore-0.0.23/src/lib/features/ringbuffer.rs000064400000000000000000000101661046102023000170660ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! A fixed-size ring buffer. use std::collections::VecDeque; /// A fixed-size ring buffer backed by a `VecDeque`. /// /// If the ring buffer is not full, then calling the [`push_back`] /// method appends elements, as in a [`VecDeque`]. If the ring buffer /// is full, then calling [`push_back`] removes the element at the /// front of the buffer (in a first-in, first-out manner) before /// appending the new element to the back of the buffer. /// /// Use [`from_iter`] to take the last `size` elements from an /// iterator. /// /// # Examples /// /// After exceeding the size limit, the oldest elements are dropped in /// favor of the newest element: /// /// ```rust,ignore /// let mut buffer: RingBuffer = RingBuffer::new(2); /// buffer.push_back(0); /// buffer.push_back(1); /// buffer.push_back(2); /// assert_eq!(vec![1, 2], buffer.data); /// ``` /// /// Take the last `n` elements from an iterator: /// /// ```rust,ignore /// let iter = [0, 1, 2].iter(); /// let actual = RingBuffer::from_iter(iter, 2).data; /// let expected = VecDeque::from_iter([1, 2].iter()); /// assert_eq!(expected, actual); /// ``` /// /// [`push_back`]: struct.RingBuffer.html#method.push_back /// [`from_iter`]: struct.RingBuffer.html#method.from_iter pub struct RingBuffer { pub data: VecDeque, size: usize, } impl RingBuffer { pub fn new(size: usize) -> Self { Self { data: VecDeque::new(), size, } } pub fn from_iter(iter: impl Iterator, size: usize) -> Self { let mut ring_buffer = Self::new(size); for value in iter { ring_buffer.push_back(value); } ring_buffer } /// Append a value to the end of the ring buffer. /// /// If the ring buffer is not full, this method return [`None`]. If /// the ring buffer is full, appending a new element will cause the /// oldest element to be evicted. In that case this method returns /// that element, or `None`. /// /// In the special case where the size limit is zero, each call to /// this method with input `value` returns `Some(value)`, because /// the input is immediately evicted. /// /// # Examples /// /// Appending an element when the buffer is full returns the oldest /// element: /// /// ```rust,ignore /// let mut buf = RingBuffer::new(3); /// assert_eq!(None, buf.push_back(0)); /// assert_eq!(None, buf.push_back(1)); /// assert_eq!(None, buf.push_back(2)); /// assert_eq!(Some(0), buf.push_back(3)); /// ``` /// /// If the size limit is zero, then this method always returns the /// input value: /// /// ```rust,ignore /// let mut buf = RingBuffer::new(0); /// assert_eq!(Some(0), buf.push_back(0)); /// assert_eq!(Some(1), buf.push_back(1)); /// assert_eq!(Some(2), buf.push_back(2)); /// ``` pub fn push_back(&mut self, value: T) -> Option { if self.size == 0 { return Some(value); } let result = if self.size <= self.data.len() { self.data.pop_front() } else { None }; self.data.push_back(value); result } } #[cfg(test)] mod tests { use crate::ringbuffer::RingBuffer; use std::collections::VecDeque; #[test] fn test_size_limit_zero() { let mut buf = RingBuffer::new(0); assert_eq!(Some(0), buf.push_back(0)); assert_eq!(Some(1), buf.push_back(1)); assert_eq!(Some(2), buf.push_back(2)); } #[test] fn test_evict_oldest() { let mut buf = RingBuffer::new(2); assert_eq!(None, buf.push_back(0)); assert_eq!(None, buf.push_back(1)); assert_eq!(Some(0), buf.push_back(2)); } #[test] fn test_from_iter() { let iter = [0, 1, 2].iter(); let actual = RingBuffer::from_iter(iter, 2).data; let expected: VecDeque<&i32> = [1, 2].iter().collect(); assert_eq!(expected, actual); } } uucore-0.0.23/src/lib/features/signals.rs000064400000000000000000000344421046102023000164000ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars/api) fcntl setrlimit setitimer rubout pollable occured sysconf // spell-checker:ignore (vars/signals) ABRT ALRM CHLD SEGV SIGABRT SIGALRM SIGBUS SIGCHLD SIGCONT SIGEMT SIGFPE SIGHUP SIGILL SIGINFO SIGINT SIGIO SIGIOT SIGKILL SIGPIPE SIGPROF SIGPWR SIGQUIT SIGSEGV SIGSTOP SIGSYS SIGTERM SIGTRAP SIGTSTP SIGTHR SIGTTIN SIGTTOU SIGURG SIGUSR SIGVTALRM SIGWINCH SIGXCPU SIGXFSZ STKFLT PWR THR TSTP TTIN TTOU VTALRM XCPU XFSZ SIGCLD SIGPOLL SIGWAITING SIGAIOCANCEL SIGLWP SIGFREEZE SIGTHAW SIGCANCEL SIGLOST SIGXRES SIGJVM SIGRTMIN SIGRT SIGRTMAX AIOCANCEL XRES RTMIN RTMAX #[cfg(unix)] use nix::errno::Errno; #[cfg(unix)] use nix::sys::signal::{ signal, SigHandler::SigDfl, SigHandler::SigIgn, Signal::SIGINT, Signal::SIGPIPE, }; pub static DEFAULT_SIGNAL: usize = 15; /* Linux Programmer's Manual 1 HUP 2 INT 3 QUIT 4 ILL 5 TRAP 6 ABRT 7 BUS 8 FPE 9 KILL 10 USR1 11 SEGV 12 USR2 13 PIPE 14 ALRM 15 TERM 16 STKFLT 17 CHLD 18 CONT 19 STOP 20 TSTP 21 TTIN 22 TTOU 23 URG 24 XCPU 25 XFSZ 26 VTALRM 27 PROF 28 WINCH 29 POLL 30 PWR 31 SYS */ #[cfg(any(target_os = "linux", target_os = "android"))] pub static ALL_SIGNALS: [&str; 32] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "BUS", "FPE", "KILL", "USR1", "SEGV", "USR2", "PIPE", "ALRM", "TERM", "STKFLT", "CHLD", "CONT", "STOP", "TSTP", "TTIN", "TTOU", "URG", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "POLL", "PWR", "SYS", ]; /* https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/signal.3.html No Name Default Action Description 1 SIGHUP terminate process terminal line hangup 2 SIGINT terminate process interrupt program 3 SIGQUIT create core image quit program 4 SIGILL create core image illegal instruction 5 SIGTRAP create core image trace trap 6 SIGABRT create core image abort program (formerly SIGIOT) 7 SIGEMT create core image emulate instruction executed 8 SIGFPE create core image floating-point exception 9 SIGKILL terminate process kill program 10 SIGBUS create core image bus error 11 SIGSEGV create core image segmentation violation 12 SIGSYS create core image non-existent system call invoked 13 SIGPIPE terminate process write on a pipe with no reader 14 SIGALRM terminate process real-time timer expired 15 SIGTERM terminate process software termination signal 16 SIGURG discard signal urgent condition present on socket 17 SIGSTOP stop process stop (cannot be caught or ignored) 18 SIGTSTP stop process stop signal generated from keyboard 19 SIGCONT discard signal continue after stop 20 SIGCHLD discard signal child status has changed 21 SIGTTIN stop process background read attempted from control terminal 22 SIGTTOU stop process background write attempted to control terminal 23 SIGIO discard signal I/O is possible on a descriptor (see fcntl(2)) 24 SIGXCPU terminate process cpu time limit exceeded (see setrlimit(2)) 25 SIGXFSZ terminate process file size limit exceeded (see setrlimit(2)) 26 SIGVTALRM terminate process virtual time alarm (see setitimer(2)) 27 SIGPROF terminate process profiling timer alarm (see setitimer(2)) 28 SIGWINCH discard signal Window size change 29 SIGINFO discard signal status request from keyboard 30 SIGUSR1 terminate process User defined signal 1 31 SIGUSR2 terminate process User defined signal 2 */ #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] pub static ALL_SIGNALS: [&str; 32] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", ]; /* The following signals are defined in NetBSD: SIGHUP 1 Hangup SIGINT 2 Interrupt SIGQUIT 3 Quit SIGILL 4 Illegal instruction SIGTRAP 5 Trace/BPT trap SIGABRT 6 Abort trap SIGEMT 7 EMT trap SIGFPE 8 Floating point exception SIGKILL 9 Killed SIGBUS 10 Bus error SIGSEGV 11 Segmentation fault SIGSYS 12 Bad system call SIGPIPE 13 Broken pipe SIGALRM 14 Alarm clock SIGTERM 15 Terminated SIGURG 16 Urgent I/O condition SIGSTOP 17 Suspended (signal) SIGTSTP 18 Suspended SIGCONT 19 Continued SIGCHLD 20 Child exited, stopped or continued SIGTTIN 21 Stopped (tty input) SIGTTOU 22 Stopped (tty output) SIGIO 23 I/O possible SIGXCPU 24 CPU time limit exceeded SIGXFSZ 25 File size limit exceeded SIGVTALRM 26 Virtual timer expired SIGPROF 27 Profiling timer expired SIGWINCH 28 Window size changed SIGINFO 29 Information request SIGUSR1 30 User defined signal 1 SIGUSR2 31 User defined signal 2 SIGPWR 32 Power fail/restart */ #[cfg(target_os = "netbsd")] pub static ALL_SIGNALS: [&str; 33] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", "PWR", ]; /* The following signals are defined in OpenBSD: SIGHUP terminate process terminal line hangup SIGINT terminate process interrupt program SIGQUIT create core image quit program SIGILL create core image illegal instruction SIGTRAP create core image trace trap SIGABRT create core image abort(3) call (formerly SIGIOT) SIGEMT create core image emulate instruction executed SIGFPE create core image floating-point exception SIGKILL terminate process kill program (cannot be caught or ignored) SIGBUS create core image bus error SIGSEGV create core image segmentation violation SIGSYS create core image system call given invalid argument SIGPIPE terminate process write on a pipe with no reader SIGALRM terminate process real-time timer expired SIGTERM terminate process software termination signal SIGURG discard signal urgent condition present on socket SIGSTOP stop process stop (cannot be caught or ignored) SIGTSTP stop process stop signal generated from keyboard SIGCONT discard signal continue after stop SIGCHLD discard signal child status has changed SIGTTIN stop process background read attempted from control terminal SIGTTOU stop process background write attempted to control terminal SIGIO discard signal I/O is possible on a descriptor (see fcntl(2)) SIGXCPU terminate process CPU time limit exceeded (see setrlimit(2)) SIGXFSZ terminate process file size limit exceeded (see setrlimit(2)) SIGVTALRM terminate process virtual time alarm (see setitimer(2)) SIGPROF terminate process profiling timer alarm (see setitimer(2)) SIGWINCH discard signal window size change SIGINFO discard signal status request from keyboard SIGUSR1 terminate process user-defined signal 1 SIGUSR2 terminate process user-defined signal 2 SIGTHR discard signal thread AST */ #[cfg(target_os = "openbsd")] pub static ALL_SIGNALS: [&str; 33] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", "THR", ]; /* The following signals are defined in Solaris and illumos; (the signals for illumos are the same as Solaris, but illumos still has SIGLWP as well as the alias for SIGLWP (SIGAIOCANCEL)): SIGHUP 1 hangup SIGINT 2 interrupt (rubout) SIGQUIT 3 quit (ASCII FS) SIGILL 4 illegal instruction (not reset when caught) SIGTRAP 5 trace trap (not reset when caught) SIGIOT 6 IOT instruction SIGABRT 6 used by abort, replace SIGIOT in the future SIGEMT 7 EMT instruction SIGFPE 8 floating point exception SIGKILL 9 kill (cannot be caught or ignored) SIGBUS 10 bus error SIGSEGV 11 segmentation violation SIGSYS 12 bad argument to system call SIGPIPE 13 write on a pipe with no one to read it SIGALRM 14 alarm clock SIGTERM 15 software termination signal from kill SIGUSR1 16 user defined signal 1 SIGUSR2 17 user defined signal 2 SIGCLD 18 child status change SIGCHLD 18 child status change alias (POSIX) SIGPWR 19 power-fail restart SIGWINCH 20 window size change SIGURG 21 urgent socket condition SIGPOLL 22 pollable event occured SIGIO SIGPOLL socket I/O possible (SIGPOLL alias) SIGSTOP 23 stop (cannot be caught or ignored) SIGTSTP 24 user stop requested from tty SIGCONT 25 stopped process has been continued SIGTTIN 26 background tty read attempted SIGTTOU 27 background tty write attempted SIGVTALRM 28 virtual timer expired SIGPROF 29 profiling timer expired SIGXCPU 30 exceeded cpu limit SIGXFSZ 31 exceeded file size limit SIGWAITING 32 reserved signal no longer used by threading code SIGAIOCANCEL 33 reserved signal no longer used by threading code (formerly SIGLWP) SIGFREEZE 34 special signal used by CPR SIGTHAW 35 special signal used by CPR SIGCANCEL 36 reserved signal for thread cancellation SIGLOST 37 resource lost (eg, record-lock lost) SIGXRES 38 resource control exceeded SIGJVM1 39 reserved signal for Java Virtual Machine SIGJVM2 40 reserved signal for Java Virtual Machine SIGINFO 41 information request SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) first realtime signal SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) last realtime signal */ #[cfg(target_os = "solaris")] const SIGNALS_SIZE: usize = 46; #[cfg(target_os = "illumos")] const SIGNALS_SIZE: usize = 47; #[cfg(any(target_os = "solaris", target_os = "illumos"))] static ALL_SIGNALS: [&str; SIGNALS_SIZE] = [ "HUP", "INT", "QUIT", "ILL", "TRAP", "IOT", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "USR1", "USR2", "CLD", "CHLD", "PWR", "WINCH", "URG", "POLL", "IO", "STOP", "TSTP", "CONT", "TTIN", "TTOU", "VTALRM", "PROF", "XCPU", "XFSZ", "WAITING", "AIOCANCEL", #[cfg(target_os = "illumos")] "LWP", "FREEZE", "THAW", "CANCEL", "LOST", "XRES", "JVM1", "JVM2", "INFO", "RTMIN", "RTMAX", ]; pub fn signal_by_name_or_value(signal_name_or_value: &str) -> Option { if let Ok(value) = signal_name_or_value.parse() { if is_signal(value) { return Some(value); } else { return None; } } let signal_name = signal_name_or_value.trim_start_matches("SIG"); ALL_SIGNALS.iter().position(|&s| s == signal_name) } pub fn is_signal(num: usize) -> bool { num < ALL_SIGNALS.len() } pub fn signal_name_by_value(signal_value: usize) -> Option<&'static str> { ALL_SIGNALS.get(signal_value).copied() } #[cfg(unix)] pub fn enable_pipe_errors() -> Result<(), Errno> { // We pass the error as is, the return value would just be Ok(SigDfl), so we can safely ignore it. // SAFETY: this function is safe as long as we do not use a custom SigHandler -- we use the default one. unsafe { signal(SIGPIPE, SigDfl) }.map(|_| ()) } #[cfg(unix)] pub fn ignore_interrupts() -> Result<(), Errno> { // We pass the error as is, the return value would just be Ok(SigIgn), so we can safely ignore it. // SAFETY: this function is safe as long as we do not use a custom SigHandler -- we use the default one. unsafe { signal(SIGINT, SigIgn) }.map(|_| ()) } #[test] fn signal_by_value() { assert_eq!(signal_by_name_or_value("0"), Some(0)); for (value, _signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_by_name_or_value(&value.to_string()), Some(value)); } } #[test] fn signal_by_short_name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_by_name_or_value(signal), Some(value)); } } #[test] fn signal_by_long_name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!( signal_by_name_or_value(&format!("SIG{signal}")), Some(value) ); } } #[test] fn name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_name_by_value(value), Some(*signal)); } } uucore-0.0.23/src/lib/features/sum.rs000064400000000000000000000340371046102023000155440ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore memmem algo //! Implementations of digest functions, like md5 and sha1. //! //! The [`Digest`] trait represents the interface for providing inputs //! to these digest functions and accessing the resulting hash. The //! [`DigestWriter`] struct provides a wrapper around [`Digest`] that //! implements the [`Write`] trait, for use in situations where calling //! [`write`] would be useful. use std::io::Write; use hex::encode; #[cfg(windows)] use memchr::memmem; pub trait Digest { fn new() -> Self where Self: Sized; fn hash_update(&mut self, input: &[u8]); fn hash_finalize(&mut self, out: &mut [u8]); fn reset(&mut self); fn output_bits(&self) -> usize; fn output_bytes(&self) -> usize { (self.output_bits() + 7) / 8 } fn result_str(&mut self) -> String { let mut buf: Vec = vec![0; self.output_bytes()]; self.hash_finalize(&mut buf); encode(buf) } } /// first element of the tuple is the blake2b state /// second is the number of output bits pub struct Blake2b(blake2b_simd::State, usize); impl Blake2b { /// Return a new Blake2b instance with a custom output bytes length pub fn with_output_bytes(output_bytes: usize) -> Self { let mut params = blake2b_simd::Params::new(); params.hash_length(output_bytes); let state = params.to_state(); Self(state, output_bytes * 8) } } impl Digest for Blake2b { fn new() -> Self { // by default, Blake2b output is 512 bits long (= 64B) Self::with_output_bytes(64) } fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { let hash_result = &self.0.finalize(); out.copy_from_slice(hash_result.as_bytes()); } fn reset(&mut self) { *self = Self::with_output_bytes(self.output_bytes()); } fn output_bits(&self) -> usize { self.1 } } pub struct Blake3(blake3::Hasher); impl Digest for Blake3 { fn new() -> Self { Self(blake3::Hasher::new()) } fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { let hash_result = &self.0.finalize(); out.copy_from_slice(hash_result.as_bytes()); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } pub struct Sm3(sm3::Sm3); impl Digest for Sm3 { fn new() -> Self { Self(::new()) } fn hash_update(&mut self, input: &[u8]) { ::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { out.copy_from_slice(&::finalize(self.0.clone())); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } // NOTE: CRC_TABLE_LEN *must* be <= 256 as we cast 0..CRC_TABLE_LEN to u8 const CRC_TABLE_LEN: usize = 256; pub struct CRC { state: u32, size: usize, crc_table: [u32; CRC_TABLE_LEN], } impl CRC { fn generate_crc_table() -> [u32; CRC_TABLE_LEN] { let mut table = [0; CRC_TABLE_LEN]; for (i, elt) in table.iter_mut().enumerate().take(CRC_TABLE_LEN) { *elt = Self::crc_entry(i as u8); } table } fn crc_entry(input: u8) -> u32 { let mut crc = (input as u32) << 24; let mut i = 0; while i < 8 { let if_condition = crc & 0x8000_0000; let if_body = (crc << 1) ^ 0x04c1_1db7; let else_body = crc << 1; // NOTE: i feel like this is easier to understand than emulating an if statement in bitwise // ops let condition_table = [else_body, if_body]; crc = condition_table[(if_condition != 0) as usize]; i += 1; } crc } fn update(&mut self, input: u8) { self.state = (self.state << 8) ^ self.crc_table[((self.state >> 24) as usize ^ input as usize) & 0xFF]; } } impl Digest for CRC { fn new() -> Self { Self { state: 0, size: 0, crc_table: Self::generate_crc_table(), } } fn hash_update(&mut self, input: &[u8]) { for &elt in input { self.update(elt); } self.size += input.len(); } fn hash_finalize(&mut self, out: &mut [u8]) { let mut sz = self.size; while sz != 0 { self.update(sz as u8); sz >>= 8; } self.state = !self.state; out.copy_from_slice(&self.state.to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 4]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } // This can be replaced with usize::div_ceil once it is stabilized. // This implementation approach is optimized for when `b` is a constant, // particularly a power of two. pub fn div_ceil(a: usize, b: usize) -> usize { (a + b - 1) / b } pub struct BSD { state: u16, } impl Digest for BSD { fn new() -> Self { Self { state: 0 } } fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = (self.state >> 1) + ((self.state & 1) << 15); self.state = self.state.wrapping_add(u16::from(byte)); } } fn hash_finalize(&mut self, out: &mut [u8]) { out.copy_from_slice(&self.state.to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 2]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 128 } } pub struct SYSV { state: u32, } impl Digest for SYSV { fn new() -> Self { Self { state: 0 } } fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = self.state.wrapping_add(u32::from(byte)); } } fn hash_finalize(&mut self, out: &mut [u8]) { self.state = (self.state & 0xffff) + (self.state >> 16); self.state = (self.state & 0xffff) + (self.state >> 16); out.copy_from_slice(&(self.state as u16).to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 2]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 512 } } // Implements the Digest trait for sha2 / sha3 algorithms with fixed output macro_rules! impl_digest_common { ($algo_type: ty, $size: expr) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) } fn hash_update(&mut self, input: &[u8]) { digest::Digest::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { digest::Digest::finalize_into_reset(&mut self.0, out.into()); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { $size } } }; } // Implements the Digest trait for sha2 / sha3 algorithms with variable output macro_rules! impl_digest_shake { ($algo_type: ty) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) } fn hash_update(&mut self, input: &[u8]) { digest::Update::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { digest::ExtendableOutputReset::finalize_xof_reset_into(&mut self.0, out); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 0 } } }; } pub struct Md5(md5::Md5); pub struct Sha1(sha1::Sha1); pub struct Sha224(sha2::Sha224); pub struct Sha256(sha2::Sha256); pub struct Sha384(sha2::Sha384); pub struct Sha512(sha2::Sha512); impl_digest_common!(Md5, 128); impl_digest_common!(Sha1, 160); impl_digest_common!(Sha224, 224); impl_digest_common!(Sha256, 256); impl_digest_common!(Sha384, 384); impl_digest_common!(Sha512, 512); pub struct Sha3_224(sha3::Sha3_224); pub struct Sha3_256(sha3::Sha3_256); pub struct Sha3_384(sha3::Sha3_384); pub struct Sha3_512(sha3::Sha3_512); impl_digest_common!(Sha3_224, 224); impl_digest_common!(Sha3_256, 256); impl_digest_common!(Sha3_384, 384); impl_digest_common!(Sha3_512, 512); pub struct Shake128(sha3::Shake128); pub struct Shake256(sha3::Shake256); impl_digest_shake!(Shake128); impl_digest_shake!(Shake256); /// A struct that writes to a digest. /// /// This struct wraps a [`Digest`] and provides a [`Write`] /// implementation that passes input bytes directly to the /// [`Digest::hash_update`]. /// /// On Windows, if `binary` is `false`, then the [`write`] /// implementation replaces instances of "\r\n" with "\n" before passing /// the input bytes to the [`digest`]. pub struct DigestWriter<'a> { digest: &'a mut Box, /// Whether to write to the digest in binary mode or text mode on Windows. /// /// If this is `false`, then instances of "\r\n" are replaced with /// "\n" before passing input bytes to the [`digest`]. #[allow(dead_code)] binary: bool, /// Whether the previous #[allow(dead_code)] was_last_character_carriage_return: bool, // TODO These are dead code only on non-Windows operating systems. // It might be better to use a `#[cfg(windows)]` guard here. } impl<'a> DigestWriter<'a> { pub fn new(digest: &'a mut Box, binary: bool) -> DigestWriter { let was_last_character_carriage_return = false; DigestWriter { digest, binary, was_last_character_carriage_return, } } pub fn finalize(&mut self) -> bool { if self.was_last_character_carriage_return { self.digest.hash_update(&[b'\r']); true } else { false } } } impl<'a> Write for DigestWriter<'a> { #[cfg(not(windows))] fn write(&mut self, buf: &[u8]) -> std::io::Result { self.digest.hash_update(buf); Ok(buf.len()) } #[cfg(windows)] fn write(&mut self, buf: &[u8]) -> std::io::Result { if self.binary { self.digest.hash_update(buf); return Ok(buf.len()); } // The remaining code handles Windows text mode, where we must // replace each occurrence of "\r\n" with "\n". // // First, if the last character written was "\r" and the first // character in the current buffer to write is not "\n", then we // need to write the "\r" that we buffered from the previous // call to `write()`. let n = buf.len(); if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' { self.digest.hash_update(&[b'\r']); } // Next, find all occurrences of "\r\n", inputting the slice // just before the "\n" in the previous instance of "\r\n" and // the beginning of this "\r\n". let mut i_prev = 0; for i in memmem::find_iter(buf, b"\r\n") { self.digest.hash_update(&buf[i_prev..i]); i_prev = i + 1; } // Finally, check whether the last character is "\r". If so, // buffer it until we know that the next character is not "\n", // which can only be known on the next call to `write()`. // // This all assumes that `write()` will be called on adjacent // blocks of the input. if n > 0 && buf[n - 1] == b'\r' { self.was_last_character_carriage_return = true; self.digest.hash_update(&buf[i_prev..n - 1]); } else { self.was_last_character_carriage_return = false; self.digest.hash_update(&buf[i_prev..n]); } // Even though we dropped a "\r" for each "\r\n" we found, we // still report the number of bytes written as `n`. This is // because the meaning of the returned number is supposed to be // the number of bytes consumed by the writer, so that if the // calling code were calling `write()` in a loop, it would know // where the next contiguous slice of the buffer starts. Ok(n) } fn flush(&mut self) -> std::io::Result<()> { Ok(()) } } #[cfg(test)] mod tests { /// Test for replacing a "\r\n" sequence with "\n" when the "\r" is /// at the end of one block and the "\n" is at the beginning of the /// next block, when reading in blocks. #[cfg(windows)] #[test] fn test_crlf_across_blocks() { use std::io::Write; use crate::digest::Digest; use crate::digest::DigestWriter; // Writing "\r" in one call to `write()`, and then "\n" in another. let mut digest = Box::new(md5::Md5::new()) as Box; let mut writer_crlf = DigestWriter::new(&mut digest, false); writer_crlf.write_all(&[b'\r']).unwrap(); writer_crlf.write_all(&[b'\n']).unwrap(); writer_crlf.hash_finalize(); let result_crlf = digest.result_str(); // We expect "\r\n" to be replaced with "\n" in text mode on Windows. let mut digest = Box::new(md5::Md5::new()) as Box; let mut writer_lf = DigestWriter::new(&mut digest, false); writer_lf.write_all(&[b'\n']).unwrap(); writer_lf.hash_finalize(); let result_lf = digest.result_str(); assert_eq!(result_crlf, result_lf); } } uucore-0.0.23/src/lib/features/tokenize/mod.rs000064400000000000000000000004371046102023000173440ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. #[allow(clippy::module_inception)] mod num_format; pub mod sub; pub mod token; pub mod unescaped_text; uucore-0.0.23/src/lib/features/tokenize/num_format/format_field.rs000064400000000000000000000014261046102023000233660ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety //! Primitives used by Sub Tokenizer //! and num_format modules #[derive(Clone)] pub enum FieldType { Strf, Floatf, CninetyNineHexFloatf, Scif, Decf, Intf, Charf, } // a Sub Tokens' fields are stored // as a single object so they can be more simply // passed by ref to num_format in a Sub method #[derive(Clone)] pub struct FormatField<'a> { pub min_width: Option, pub second_field: Option, pub field_char: &'a char, pub field_type: &'a FieldType, pub orig: &'a String, } uucore-0.0.23/src/lib/features/tokenize/num_format/formatter.rs000064400000000000000000000036111046102023000227340ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Primitives used by num_format and sub_modules. //! never dealt with above (e.g. Sub Tokenizer never uses these) use crate::{display::Quotable, show_error}; use itertools::{put_back_n, PutBackN}; use std::str::Chars; use super::format_field::FormatField; // contains the rough ingredients to final // output for a number, organized together // to allow for easy generalization of output manipulation // (e.g. max number of digits after decimal) #[derive(Default)] pub struct FormatPrimitive { pub prefix: Option, pub pre_decimal: Option, pub post_decimal: Option, pub suffix: Option, } #[derive(Clone, PartialEq, Eq)] pub enum Base { Ten = 10, Hex = 16, Octal = 8, } // information from the beginning of a numeric argument // the precedes the beginning of a numeric value pub struct InitialPrefix { pub radix_in: Base, pub sign: i8, pub offset: usize, } pub trait Formatter { // return a FormatPrimitive for // particular field char(s), given the argument // string and prefix information (sign, radix) fn get_primitive( &self, field: &FormatField, in_prefix: &InitialPrefix, str_in: &str, ) -> Option; // return a string from a FormatPrimitive, // given information about the field fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String; } pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN { put_back_n(str_in[offset..].chars()) } // TODO: put this somewhere better pub fn warn_incomplete_conv(pf_arg: &str) { // important: keep println here not print show_error!("{}: value not completely converted", pf_arg.maybe_quote()); } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs000064400000000000000000000167221046102023000256440ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec { let mut carry: u16 = 0; let mut rem: u16; let mut new_amount: u16; let fact: u16 = u16::from(base_ten_int_fact); let base: u16 = u16::from(basenum); let mut ret_rev: Vec = Vec::new(); let mut it = arr_num.iter().rev(); loop { let i = it.next(); match i { Some(u) => { new_amount = (u16::from(*u) * fact) + carry; rem = new_amount % base; carry = (new_amount - rem) / base; ret_rev.push(rem as u8); } None => { while carry != 0 { rem = carry % base; carry = (carry - rem) / base; ret_rev.push(rem as u8); } break; } } } let ret: Vec = ret_rev.into_iter().rev().collect(); ret } #[allow(dead_code)] pub struct Remainder<'a> { pub position: usize, pub replace: Vec, pub arr_num: &'a Vec, } #[allow(dead_code)] pub struct DivOut<'a> { pub quotient: u8, pub remainder: Remainder<'a>, } #[allow(dead_code)] pub fn arrnum_int_div_step<'a>( rem_in: &'a Remainder, radix_in: u8, base_ten_int_divisor: u8, after_decimal: bool, ) -> DivOut<'a> { let mut rem_out = Remainder { position: rem_in.position, replace: Vec::new(), arr_num: rem_in.arr_num, }; let mut bufferval: u16 = 0; let base: u16 = u16::from(radix_in); let divisor: u16 = u16::from(base_ten_int_divisor); let mut traversed = 0; let mut quotient = 0; let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..]; let mut it_replace = rem_in.replace.iter(); let mut it_f = refd_vals.iter(); loop { let u = match it_replace.next() { Some(u_rep) => u16::from(*u_rep), None => match it_f.next() { Some(u_orig) => u16::from(*u_orig), None => { if !after_decimal { break; } 0 } }, }; traversed += 1; bufferval += u; if bufferval > divisor { while bufferval >= divisor { quotient += 1; bufferval -= divisor; } rem_out.replace = if bufferval == 0 { Vec::new() } else { let remainder_as_arrnum = unsigned_to_arrnum(bufferval); base_conv_vec(&remainder_as_arrnum, 10, radix_in) }; rem_out.position += 1 + (traversed - rem_out.replace.len()); break; } else { bufferval *= base; } } DivOut { quotient, remainder: rem_out, } } pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec { let mut carry: u16 = u16::from(base_ten_int_term); let mut rem: u16; let mut new_amount: u16; let base: u16 = u16::from(basenum); let mut ret_rev: Vec = Vec::new(); let mut it = arrnum.iter().rev(); loop { let i = it.next(); match i { Some(u) => { new_amount = u16::from(*u) + carry; rem = new_amount % base; carry = (new_amount - rem) / base; ret_rev.push(rem as u8); } None => { while carry != 0 { rem = carry % base; carry = (carry - rem) / base; ret_rev.push(rem as u8); } break; } } } let ret: Vec = ret_rev.into_iter().rev().collect(); ret } pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec { let mut result = vec![0]; for i in src { result = arrnum_int_mult(&result, radix_dest, radix_src); result = arrnum_int_add(&result, radix_dest, *i); } result } #[allow(dead_code)] pub fn unsigned_to_arrnum(src: u16) -> Vec { let mut result: Vec = Vec::new(); let mut src_tmp: u16 = src; while src_tmp > 0 { result.push((src_tmp % 10) as u8); src_tmp /= 10; } result.reverse(); result } // temporary needs-improvement-function pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 { // it would require a lot of addl code // to implement this for arbitrary string input. // until then, the below operates as an outline // of how it would work. let mut factor: f64 = 1_f64; let radix_src_float: f64 = f64::from(radix_src); let mut r: f64 = 0_f64; for (i, u) in src.iter().enumerate() { if i > 15 { break; } factor /= radix_src_float; r += factor * f64::from(*u); } r } pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec { let mut intermed_in: Vec = Vec::new(); for c in src.chars() { #[allow(clippy::single_match)] match radix_def_src.parse_char(c) { Some(u) => { intermed_in.push(u); } None => {} //todo err msg on incorrect } } intermed_in } pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String { let mut str_out = String::new(); for u in src { #[allow(clippy::single_match)] match radix_def_dest.format_u8(*u) { Some(c) => { str_out.push(c); } None => {} //todo } } str_out } pub fn base_conv_str( src: &str, radix_def_src: &dyn RadixDef, radix_def_dest: &dyn RadixDef, ) -> String { let intermed_in: Vec = str_to_arrnum(src, radix_def_src); let intermed_out = base_conv_vec( &intermed_in, radix_def_src.get_max(), radix_def_dest.get_max(), ); arrnum_to_str(&intermed_out, radix_def_dest) } pub trait RadixDef { fn get_max(&self) -> u8; fn parse_char(&self, x: char) -> Option; fn format_u8(&self, x: u8) -> Option; } pub struct RadixTen; const ZERO_ASC: u8 = b'0'; const UPPER_A_ASC: u8 = b'A'; const LOWER_A_ASC: u8 = b'a'; impl RadixDef for RadixTen { fn get_max(&self) -> u8 { 10 } fn parse_char(&self, c: char) -> Option { match c { '0'..='9' => Some(c as u8 - ZERO_ASC), _ => None, } } fn format_u8(&self, u: u8) -> Option { match u { 0..=9 => Some((ZERO_ASC + u) as char), _ => None, } } } pub struct RadixHex; impl RadixDef for RadixHex { fn get_max(&self) -> u8 { 16 } fn parse_char(&self, c: char) -> Option { match c { '0'..='9' => Some(c as u8 - ZERO_ASC), 'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC), 'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC), _ => None, } } fn format_u8(&self, u: u8) -> Option { match u { 0..=9 => Some((ZERO_ASC + u) as char), 10..=15 => Some((UPPER_A_ASC + (u - 10)) as char), _ => None, } } } mod tests; uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs000064400000000000000000000034711046102023000262240ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) arrnum mult #[cfg(test)] use super::*; #[test] fn test_arrnum_int_mult() { // (in base 10) 12 * 4 = 48 let factor: Vec = vec![1, 2]; let base_num = 10; let base_ten_int_fact: u8 = 4; let should_output: Vec = vec![4, 8]; let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); assert!(product == should_output); } #[test] fn test_arrnum_int_non_base_10() { // (in base 3) // 5 * 4 = 20 let factor: Vec = vec![1, 2]; let base_num = 3; let base_ten_int_fact: u8 = 4; let should_output: Vec = vec![2, 0, 2]; let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); assert!(product == should_output); } #[test] fn test_arrnum_int_div_short_circuit() { // ( let arrnum: Vec = vec![5, 5, 5, 5, 0]; let base_num = 10; let base_ten_int_divisor: u8 = 41; let remainder_passed_in = Remainder { position: 1, replace: vec![1, 3], arr_num: &arrnum, }; // the "replace" should mean the number being divided // is 1350, the first time you can get 41 to go into // 1350, its at 135, where you can get a quotient of // 3 and a remainder of 12; let quotient_should_be: u8 = 3; let remainder_position_should_be: usize = 3; let remainder_replace_should_be = vec![1, 2]; let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false); assert!(quotient_should_be == result.quotient); assert!(remainder_position_should_be == result.remainder.position); assert!(remainder_replace_should_be == result.remainder.replace); } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs000064400000000000000000000101461046102023000273440ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety // spell-checker:ignore (ToDO) arrnum //! formatter for %a %F C99 Hex-floating-point subs use super::super::format_field::FormatField; use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; use super::base_conv; use super::base_conv::RadixDef; use super::float_common::{primitive_to_str_common, FloatAnalysis}; #[derive(Default)] pub struct CninetyNineHexFloatf { #[allow(dead_code)] as_num: f64, } impl CninetyNineHexFloatf { pub fn new() -> Self { Self::default() } } impl Formatter for CninetyNineHexFloatf { fn get_primitive( &self, field: &FormatField, initial_prefix: &InitialPrefix, str_in: &str, ) -> Option { let second_field = field.second_field.unwrap_or(6) + 1; let analysis = FloatAnalysis::analyze( str_in, initial_prefix, Some(second_field as usize), None, true, ); let f = get_primitive_hex( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, *field.field_char == 'A', ); Some(f) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) } } // c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around) // on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden. fn get_primitive_hex( initial_prefix: &InitialPrefix, _str_in: &str, _analysis: &FloatAnalysis, _last_dec_place: usize, capitalized: bool, ) -> FormatPrimitive { let prefix = Some(String::from(if initial_prefix.sign == -1 { "-0x" } else { "0x" })); // TODO actual conversion, make sure to get back mantissa. // for hex to hex, it's really just a matter of moving the // decimal point and calculating the mantissa by its initial // position and its moves, with every position counting for // the addition or subtraction of 4 (2**4, because 4 bits in a hex digit) // to the exponent. // decimal's going to be a little more complicated. correct simulation // of glibc will require after-decimal division to a specified precision. // the difficult part of this (arrnum_int_div_step) is already implemented. // the hex float name may be a bit misleading in terms of how to go about the // conversion. The best way to do it is to just convert the float number // directly to base 2 and then at the end translate back to hex. let mantissa = 0; let suffix = Some({ let ind = if capitalized { "P" } else { "p" }; if mantissa >= 0 { format!("{ind}+{mantissa}") } else { format!("{ind}{mantissa}") } }); FormatPrimitive { prefix, suffix, ..Default::default() } } #[allow(dead_code)] fn to_hex(src: &str, before_decimal: bool) -> String { let radix_ten = base_conv::RadixTen; let radix_hex = base_conv::RadixHex; if before_decimal { base_conv::base_conv_str(src, &radix_ten, &radix_hex) } else { let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten); let s = format!( "{}", base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max()) ); if s.len() > 2 { String::from(&s[2..]) } else { // zero s } } } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/decf.rs000064400000000000000000000164661046102023000240340ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety //! formatter for %g %G decimal subs use super::super::format_field::FormatField; use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; const SIGNIFICANT_FIGURES: usize = 6; // Parse a numeric string as the nearest integer with a given significance. // This is a helper function for round(). // Examples: // round_to_significance("456", 1) == 500 // round_to_significance("456", 2) == 460 // round_to_significance("456", 9) == 456 fn round_to_significance(input: &str, significant_figures: usize) -> u32 { if significant_figures < input.len() { // If the input has too many digits, use a float intermediary // to round it before converting to an integer. Otherwise, // converting straight to integer will truncate. // There might be a cleaner way to do this... let digits = &input[..significant_figures + 1]; let float_representation = digits.parse::().unwrap(); (float_representation / 10.0).round() as u32 } else { input.parse::().unwrap_or(0) } } // Removing trailing zeroes, expressing the result as an integer where // possible. This is a helper function for round(). fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { if let Some(ref post_dec) = format.post_decimal { let trimmed = post_dec.trim_end_matches('0'); if trimmed.is_empty() { // If there are no nonzero digits after the decimal point, // use integer formatting by clearing post_decimal and suffix. format.post_decimal = Some(String::new()); if format.suffix == Some("e+00".into()) { format.suffix = Some(String::new()); } } else if trimmed.len() != post_dec.len() { // Otherwise, update the format to remove only the trailing // zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were // no trailing zeroes, do nothing. format.post_decimal = Some(trimmed.to_owned()); } } format } // Round a format to six significant figures and remove trailing zeroes. fn round(mut format: FormatPrimitive) -> FormatPrimitive { let mut significant_digits_remaining = SIGNIFICANT_FIGURES; // First, take as many significant digits as possible from pre_decimal, if format.pre_decimal.is_some() { let input = format.pre_decimal.as_ref().unwrap(); let rounded = round_to_significance(input, significant_digits_remaining); let mut rounded_str = rounded.to_string(); significant_digits_remaining -= rounded_str.len(); // If the pre_decimal has exactly enough significant digits, // round the input to the nearest integer. If the first // post_decimal digit is 5 or higher, round up by incrementing // the pre_decimal number. Otherwise, use the pre_decimal as-is. if significant_digits_remaining == 0 { if let Some(digits) = &format.post_decimal { if digits.chars().next().unwrap_or('0') >= '5' { let rounded = rounded + 1; rounded_str = rounded.to_string(); } } } format.pre_decimal = Some(rounded_str); } // If no significant digits remain, or there's no post_decimal to // round, return the rounded pre_decimal value with no post_decimal. // Otherwise, round the post_decimal to the remaining significance. if significant_digits_remaining == 0 { format.post_decimal = Some(String::new()); } else if let Some(input) = format.post_decimal { let leading_zeroes = input.len() - input.trim_start_matches('0').len(); let digits = &input[leading_zeroes..]; // In the post_decimal, leading zeroes are significant. "01.0010" // has one significant digit in pre_decimal, and 3 from post_decimal. let mut post_decimal_str = String::with_capacity(significant_digits_remaining); for _ in 0..leading_zeroes { post_decimal_str.push('0'); } if leading_zeroes < significant_digits_remaining { // After significant leading zeroes, round the remaining digits // to any remaining significance. let rounded = round_to_significance(digits, significant_digits_remaining); post_decimal_str.push_str(&rounded.to_string()); } else if leading_zeroes == significant_digits_remaining && digits.chars().next().unwrap_or('0') >= '5' { // If necessary, round up the post_decimal ("1.000009" should // round to 1.00001, instead of truncating after the last // significant leading zero). post_decimal_str.pop(); post_decimal_str.push('1'); } else { // If the rounded post_decimal is entirely zeroes, discard // it and use integer formatting instead. post_decimal_str = String::new(); } format.post_decimal = Some(post_decimal_str); } truncate(format) } // Given an exponent used in scientific notation, return whether the // number is small enough to be expressed as a decimal instead. "Small // enough" is based only on the number's magnitude, not the length of // any string representation. fn should_represent_as_decimal(suffix: &Option) -> bool { match suffix { Some(exponent) => { if exponent.chars().nth(1) == Some('-') { exponent < &"e-05".into() } else { exponent < &"e+06".into() } } None => true, } } pub struct Decf; impl Decf { pub fn new() -> Self { Self } } impl Formatter for Decf { fn get_primitive( &self, field: &FormatField, initial_prefix: &InitialPrefix, str_in: &str, ) -> Option { let second_field = field.second_field.unwrap_or(6) + 1; // default to scif interpretation so as to not truncate input vals // (that would be displayed in scif) based on relation to decimal place let analysis = FloatAnalysis::analyze( str_in, initial_prefix, Some(second_field as usize + 1), None, false, ); let mut f_dec = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, Some(*field.field_char == 'G'), ); if should_represent_as_decimal(&f_dec.suffix) { // Use decimal formatting instead of scientific notation // if the input's magnitude is small. f_dec = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, None, ); } Some(round(f_dec)) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) } } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/float_common.rs000064400000000000000000000317721046102023000256050ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety // spell-checker:ignore (ToDO) arrnum use super::super::format_field::FormatField; use super::super::formatter::{ get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix, }; use super::base_conv; use super::base_conv::RadixDef; // if the memory, copy, and comparison cost of chars // becomes an issue, we can always operate in vec here // rather than just at de_hex pub struct FloatAnalysis { pub len_important: usize, // none means no decimal point. pub decimal_pos: Option, pub follow: Option, } fn has_enough_digits( hex_input: bool, hex_output: bool, string_position: usize, starting_position: usize, limit: usize, ) -> bool { // -1s are for rounding if hex_output { if hex_input { (string_position - 1) - starting_position >= limit } else { false //undecidable without converting } } else if hex_input { (((string_position - 1) - starting_position) * 9) / 8 >= limit } else { (string_position - 1) - starting_position >= limit } } impl FloatAnalysis { #[allow(clippy::cognitive_complexity)] pub fn analyze( str_in: &str, initial_prefix: &InitialPrefix, max_sd_opt: Option, max_after_dec_opt: Option, hex_output: bool, ) -> Self { // this fn assumes // the input string // has no leading spaces or 0s let str_it = get_it_at(initial_prefix.offset, str_in); let mut ret = Self { len_important: 0, decimal_pos: None, follow: None, }; let hex_input = match initial_prefix.radix_in { Base::Hex => true, Base::Ten => false, Base::Octal => { panic!("this should never happen: floats should never receive octal input"); } }; let mut i = 0; let mut pos_before_first_nonzero_after_decimal: Option = None; for c in str_it { match c { e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => { if !hex_input { match e { '0'..='9' => {} _ => { warn_incomplete_conv(str_in); break; } } } if ret.decimal_pos.is_some() && pos_before_first_nonzero_after_decimal.is_none() && e != '0' { pos_before_first_nonzero_after_decimal = Some(i - 1); } if let Some(max_sd) = max_sd_opt { if i == max_sd { // follow is used in cases of %g // where the character right after the last // sd is considered is rounded affecting // the previous digit in 1/2 of instances ret.follow = Some(e); } else if ret.decimal_pos.is_some() && i > max_sd { break; } } if let Some(max_after_dec) = max_after_dec_opt { if let Some(p) = ret.decimal_pos { if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) { break; } } } else if let Some(max_sd) = max_sd_opt { if let Some(p) = pos_before_first_nonzero_after_decimal { if has_enough_digits(hex_input, hex_output, i, p, max_sd) { break; } } } } '.' => { if ret.decimal_pos.is_none() { ret.decimal_pos = Some(i); } else { warn_incomplete_conv(str_in); break; } } _ => { warn_incomplete_conv(str_in); break; } }; i += 1; } ret.len_important = i; ret } } fn de_hex(src: &str, before_decimal: bool) -> String { let radix_ten = base_conv::RadixTen; let radix_hex = base_conv::RadixHex; if before_decimal { base_conv::base_conv_str(src, &radix_hex, &radix_ten) } else { let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex); let s = format!( "{}", base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max()) ); if s.len() > 2 { String::from(&s[2..]) } else { // zero s } } } // takes a string in, // truncates to a position, // bumps the last digit up one, // and if the digit was nine // propagate to the next, etc. // If before the decimal and the most // significant digit is a 9, it becomes a 1 fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) { let mut it = in_str[0..position].chars(); let mut rev = String::new(); let mut i = position; let mut finished_in_dec = false; while let Some(c) = it.next_back() { i -= 1; match c { '9' => { // If we're before the decimal // and on the most significant digit, // round 9 to 1, else to 0. if before_dec && i == 0 { rev.push('1'); } else { rev.push('0'); } } e => { rev.push(((e as u8) + 1) as char); finished_in_dec = true; break; } } } let mut fwd = String::from(&in_str[0..i]); for ch in rev.chars().rev() { fwd.push(ch); } (fwd, finished_in_dec) } fn round_terminal_digit( before_dec: String, after_dec: String, position: usize, ) -> (String, String, bool) { if position < after_dec.len() { let digit_at_pos: char; { digit_at_pos = after_dec[position..=position].chars().next().expect(""); } if let '5'..='9' = digit_at_pos { let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false); if finished_in_dec { return (before_dec, new_after_dec, false); } else { let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true); let mut dec_place_chg = false; let mut before_dec_chars = new_before_dec.chars(); if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') { // If the first digit is a one and remaining are zeros, we have // rounded to a new decimal place, so the decimal place must be updated. // Only update decimal place if the before decimal != 0 dec_place_chg = before_dec != "0"; } return (new_before_dec, new_after_dec, dec_place_chg); } // TODO } } (before_dec, after_dec, false) } #[allow(clippy::cognitive_complexity)] pub fn get_primitive_dec( initial_prefix: &InitialPrefix, str_in: &str, analysis: &FloatAnalysis, last_dec_place: usize, sci_mode: Option, ) -> FormatPrimitive { let mut f = FormatPrimitive::default(); // add negative sign section if initial_prefix.sign == -1 { f.prefix = Some(String::from("-")); } // assign the digits before and after the decimal points // to separate slices. If no digits after decimal point, // assign 0 let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos { Some(pos) => (&str_in[..pos], &str_in[pos + 1..]), None => (str_in, "0"), }; if first_segment_raw.is_empty() { first_segment_raw = "0"; } // convert to string, de_hexifying if input is in hex // spell-checker:disable-line let (first_segment, second_segment) = match initial_prefix.radix_in { Base::Hex => ( de_hex(first_segment_raw, true), de_hex(second_segment_raw, false), ), _ => ( String::from(first_segment_raw), String::from(second_segment_raw), ), }; let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() { if first_segment.len() > 1 { let mut post_dec = String::from(&first_segment[1..]); post_dec.push_str(&second_segment); ( String::from(&first_segment[0..1]), post_dec, first_segment.len() as isize - 1, ) } else { match first_segment .chars() .next() .expect("float_common: no chars in first segment.") { '0' => { let it = second_segment.chars().enumerate(); let mut m: isize = 0; let mut pre = String::from("0"); let mut post = String::from("0"); for (i, c) in it { match c { '0' => {} _ => { m = -((i as isize) + 1); pre = String::from(&second_segment[i..=i]); post = String::from(&second_segment[i + 1..]); break; } } } (pre, post, m) } _ => (first_segment, second_segment, 0), } } } else { (first_segment, second_segment, 0) }; let (pre_dec_draft, post_dec_draft, dec_place_chg) = round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1); f.post_decimal = Some(post_dec_draft); if let Some(capitalized) = sci_mode { let si_ind = if capitalized { 'E' } else { 'e' }; // Increase the mantissa if we're adding a decimal place if dec_place_chg { mantissa += 1; } f.suffix = Some(if mantissa >= 0 { format!("{si_ind}+{mantissa:02}") } else { // negative sign is considered in format!s // leading zeroes format!("{si_ind}{mantissa:03}") }); f.pre_decimal = Some(pre_dec_draft); } else if dec_place_chg { // We've rounded up to a new decimal place so append 0 f.pre_decimal = Some(pre_dec_draft + "0"); } else { f.pre_decimal = Some(pre_dec_draft); } f } pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String { let mut final_str = String::new(); if let Some(ref prefix) = prim.prefix { final_str.push_str(prefix); } match prim.pre_decimal { Some(ref pre_decimal) => { final_str.push_str(pre_decimal); } None => { panic!( "error, format primitives provided to int, will, incidentally under correct \ behavior, always have a pre_dec value." ); } } let decimal_places = field.second_field.unwrap_or(6); match prim.post_decimal { Some(ref post_decimal) => { if !post_decimal.is_empty() && decimal_places > 0 { final_str.push('.'); let len_avail = post_decimal.len() as u32; if decimal_places >= len_avail { // println!("dec {}, len avail {}", decimal_places, len_avail); final_str.push_str(post_decimal); if *field.field_char != 'g' && *field.field_char != 'G' { let diff = decimal_places - len_avail; for _ in 0..diff { final_str.push('0'); } } } else { // println!("printing to only {}", decimal_places); final_str.push_str(&post_decimal[0..decimal_places as usize]); } } } None => { panic!( "error, format primitives provided to int, will, incidentally under correct \ behavior, always have a pre_dec value." ); } } if let Some(ref suffix) = prim.suffix { final_str.push_str(suffix); } final_str } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/floatf.rs000064400000000000000000000026631046102023000244000ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety // spell-checker:ignore (ToDO) arrnum //! formatter for %f %F common-notation floating-point subs use super::super::format_field::FormatField; use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; #[derive(Default)] pub struct Floatf; impl Floatf { pub fn new() -> Self { Self } } impl Formatter for Floatf { fn get_primitive( &self, field: &FormatField, initial_prefix: &InitialPrefix, str_in: &str, ) -> Option { let second_field = field.second_field.unwrap_or(6) + 1; let analysis = FloatAnalysis::analyze( str_in, initial_prefix, None, Some(second_field as usize), false, ); let f = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, None, ); Some(f) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) } } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/intf.rs000064400000000000000000000261711046102023000240650ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety // spell-checker:ignore (ToDO) arrnum //! formatter for unsigned and signed int subs //! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64) //! signed int: %i %d (both base ten i64) use super::super::format_field::FormatField; use super::super::formatter::{ get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix, }; use std::i64; use std::u64; #[derive(Default)] pub struct Intf { _a: u32, } // see the Intf::analyze() function below struct IntAnalysis { check_past_max: bool, past_max: bool, is_zero: bool, len_digits: u8, } impl Intf { pub fn new() -> Self { Self::default() } // take a ref to argument string, and basic information // about prefix (offset, radix, sign), and analyze string // to gain the IntAnalysis information above // check_past_max: true if the number *may* be above max, // but we don't know either way. One of several reasons // we may have to parse as int. // past_max: true if the object is past max, false if not // in the future we should probably combine these into an // Option // is_zero: true if number is zero, false otherwise // len_digits: length of digits used to create the int // important, for example, if we run into a non-valid character #[allow(clippy::cognitive_complexity)] fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis { // the maximum number of digits we could conceivably // have before the decimal point without exceeding the // max let mut str_it = get_it_at(initial_prefix.offset, str_in); let max_sd_in = if signed_out { match initial_prefix.radix_in { Base::Ten => 19, Base::Octal => 21, Base::Hex => 16, } } else { match initial_prefix.radix_in { Base::Ten => 20, Base::Octal => 22, Base::Hex => 16, } }; let mut ret = IntAnalysis { check_past_max: false, past_max: false, is_zero: false, len_digits: 0, }; // todo turn this to a while let now that we know // no special behavior on EOI break loop { let c_opt = str_it.next(); if let Some(c) = c_opt { match c { '0'..='9' | 'a'..='f' | 'A'..='F' => { if ret.len_digits == 0 && c == '0' { ret.is_zero = true; } else if ret.is_zero { ret.is_zero = false; } ret.len_digits += 1; if ret.len_digits == max_sd_in { if let Some(next_ch) = str_it.next() { match next_ch { '0'..='9' => { ret.past_max = true; } _ => { // force conversion // to check if its above max. // todo: spin out convert // into fn, call it here to try // read val, on Ok() // save val for reuse later // that way on same-base in and out // we don't needlessly convert int // to str, we can just copy it over. ret.check_past_max = true; str_it.put_back(next_ch); } } if ret.past_max { break; } } else { ret.check_past_max = true; } } } _ => { warn_incomplete_conv(str_in); break; } } } else { // breaks on EOL break; } } ret } // get a FormatPrimitive of the maximum value for the field char // and given sign fn get_max(field_char: char, sign: i8) -> FormatPrimitive { let mut fmt_primitive = FormatPrimitive::default(); fmt_primitive.pre_decimal = Some(String::from(match field_char { 'd' | 'i' => match sign { 1 => "9223372036854775807", _ => { fmt_primitive.prefix = Some(String::from("-")); "9223372036854775808" } }, 'x' | 'X' => "ffffffffffffffff", 'o' => "1777777777777777777777", /* 'u' | */ _ => "18446744073709551615", })); fmt_primitive } // conv_from_segment contract: // 1. takes // - a string that begins with a non-zero digit, and proceeds // with zero or more following digits until the end of the string // - a radix to interpret those digits as // - a char that communicates: // whether to interpret+output the string as an i64 or u64 // what radix to write the parsed number as. // 2. parses it as a rust integral type // 3. outputs FormatPrimitive with: // - if the string falls within bounds: // number parsed and written in the correct radix // - if the string falls outside bounds: // for i64 output, the int minimum or int max (depending on sign) // for u64 output, the u64 max in the output radix fn conv_from_segment( segment: &str, radix_in: Base, field_char: char, sign: i8, ) -> FormatPrimitive { match field_char { 'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) { Ok(i) => { let mut fmt_prim = FormatPrimitive::default(); if sign == -1 { fmt_prim.prefix = Some(String::from("-")); } fmt_prim.pre_decimal = Some(format!("{i}")); fmt_prim } Err(_) => Self::get_max(field_char, sign), }, _ => match u64::from_str_radix(segment, radix_in as u32) { Ok(u) => { let mut fmt_prim = FormatPrimitive::default(); let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u }; fmt_prim.pre_decimal = Some(match field_char { 'X' => format!("{u_f:X}"), 'x' => format!("{u_f:x}"), 'o' => format!("{u_f:o}"), _ => format!("{u_f}"), }); fmt_prim } Err(_) => Self::get_max(field_char, sign), }, } } } impl Formatter for Intf { fn get_primitive( &self, field: &FormatField, initial_prefix: &InitialPrefix, str_in: &str, ) -> Option { let begin = initial_prefix.offset; // get information about the string. see Intf::Analyze // def above. let convert_hints = Self::analyze( str_in, *field.field_char == 'i' || *field.field_char == 'd', initial_prefix, ); // We always will have a format primitive to return Some(if convert_hints.len_digits == 0 || convert_hints.is_zero { // if non-digit or end is reached before a non-zero digit FormatPrimitive { pre_decimal: Some(String::from("0")), ..Default::default() } } else if !convert_hints.past_max { // if the number is or may be below the bounds limit let radix_out = match *field.field_char { 'd' | 'i' | 'u' => Base::Ten, 'x' | 'X' => Base::Hex, /* 'o' | */ _ => Base::Octal, }; let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in); let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i'; let end = begin + convert_hints.len_digits as usize; // convert to int if any one of these is true: // - number of digits in int indicates it may be past max // - we're subtracting from the max // - we're converting the base if convert_hints.check_past_max || decrease_from_max || radix_mismatch { // radix of in and out is the same. let segment = String::from(&str_in[begin..end]); Self::conv_from_segment( &segment, initial_prefix.radix_in.clone(), *field.field_char, initial_prefix.sign, ) } else { // otherwise just do a straight string copy. let mut fmt_prim = FormatPrimitive::default(); // this is here and not earlier because // zero doesn't get a sign, and conv_from_segment // creates its format primitive separately if initial_prefix.sign == -1 && *field.field_char == 'i' { fmt_prim.prefix = Some(String::from("-")); } fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end])); fmt_prim } } else { Self::get_max(*field.field_char, initial_prefix.sign) }) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { let mut final_str: String = String::new(); if let Some(ref prefix) = prim.prefix { final_str.push_str(prefix); } // integral second fields is zero-padded minimum-width // which gets handled before general minimum-width match prim.pre_decimal { Some(ref pre_decimal) => { if let Some(min) = field.second_field { let mut i = min; let len = pre_decimal.len() as u32; while i > len { final_str.push('0'); i -= 1; } } final_str.push_str(pre_decimal); } None => { panic!( "error, format primitives provided to int, will, incidentally under \ correct behavior, always have a pre_dec value." ); } } final_str } } uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/mod.rs000064400000000000000000000006171046102023000237010ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety mod base_conv; pub mod cninetyninehexfloatf; pub mod decf; mod float_common; pub mod floatf; pub mod intf; pub mod scif; uucore-0.0.23/src/lib/features/tokenize/num_format/formatters/scif.rs000064400000000000000000000026601046102023000240460ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety //! formatter for %e %E scientific notation subs use super::super::format_field::FormatField; use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; #[derive(Default)] pub struct Scif; impl Scif { pub fn new() -> Self { Self } } impl Formatter for Scif { fn get_primitive( &self, field: &FormatField, initial_prefix: &InitialPrefix, str_in: &str, ) -> Option { let second_field = field.second_field.unwrap_or(6) + 1; let analysis = FloatAnalysis::analyze( str_in, initial_prefix, Some(second_field as usize + 1), None, false, ); let f = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, Some(*field.field_char == 'E'), ); Some(f) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) } } uucore-0.0.23/src/lib/features/tokenize/num_format/mod.rs000064400000000000000000000004011046102023000215020ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. pub mod format_field; mod formatter; mod formatters; pub mod num_format; uucore-0.0.23/src/lib/features/tokenize/num_format/num_format.rs000064400000000000000000000243041046102023000231020ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety //! handles creating printed output for numeric substitutions // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety use std::env; use std::vec::Vec; use crate::display::Quotable; use crate::{show_error, show_warning}; use super::format_field::{FieldType, FormatField}; use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix}; use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf; use super::formatters::decf::Decf; use super::formatters::floatf::Floatf; use super::formatters::intf::Intf; use super::formatters::scif::Scif; pub fn warn_expected_numeric(pf_arg: &str) { // important: keep println here not print show_error!("{}: expected a numeric value", pf_arg.maybe_quote()); } // when character constant arguments have excess characters // issue a warning when POSIXLY_CORRECT is not set fn warn_char_constant_ign(remaining_bytes: &[u8]) { match env::var("POSIXLY_CORRECT") { Ok(_) => {} Err(e) => { if let env::VarError::NotPresent = e { show_warning!( "{:?}: character(s) following character \ constant have been ignored", remaining_bytes ); } } } } // this function looks at the first few // characters of an argument and returns a value if we can learn // a value from that (e.g. no argument? return 0, char constant? ret value) fn get_provided(str_in_opt: Option<&String>) -> Option { const C_S_QUOTE: u8 = 39; const C_D_QUOTE: u8 = 34; match str_in_opt { Some(str_in) => { let mut byte_it = str_in.bytes(); if let Some(ch) = byte_it.next() { match ch { C_S_QUOTE | C_D_QUOTE => { Some(match byte_it.next() { Some(second_byte) => { let mut ignored: Vec = Vec::new(); for cont in byte_it { ignored.push(cont); } if !ignored.is_empty() { warn_char_constant_ign(&ignored); } second_byte } // no byte after quote None => { let so_far = (ch as char).to_string(); warn_expected_numeric(&so_far); 0_u8 } }) } // first byte is not quote _ => None, // no first byte } } else { Some(0_u8) } } None => Some(0), } } // takes a string and returns // a sign, // a base, // and an offset for index after all // initial spacing, sign, base prefix, and leading zeroes #[allow(clippy::cognitive_complexity)] fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix { let mut str_it = str_in.chars(); let mut ret = InitialPrefix { radix_in: Base::Ten, sign: 1, offset: 0, }; let mut top_char = str_it.next(); // skip spaces and ensure top_char is the first non-space char // (or None if none exists) while let Some(' ') = top_char { ret.offset += 1; top_char = str_it.next(); } // parse sign match top_char { Some('+') => { ret.offset += 1; top_char = str_it.next(); } Some('-') => { ret.sign = -1; ret.offset += 1; top_char = str_it.next(); } _ => {} } // we want to exit with offset being // the index of the first non-zero // digit before the decimal point or // if there is none, the zero before the // decimal point, or, if there is none, // the decimal point. // while we are determining the offset // we will ensure as a convention // the offset is always on the first character // that we are yet unsure if it is the // final offset. If the zero could be before // a decimal point we don't move past the zero. let mut is_hex = false; if Some('0') == top_char { if let Some(base) = str_it.next() { // lead zeroes can only exist in // octal and hex base let mut do_clean_lead_zeroes = false; match base { 'x' | 'X' => { is_hex = true; ret.offset += 2; ret.radix_in = Base::Hex; do_clean_lead_zeroes = true; } e @ '0'..='9' => { ret.offset += 1; if let FieldType::Intf = *field_type { ret.radix_in = Base::Octal; } if e == '0' { do_clean_lead_zeroes = true; } } _ => {} } if do_clean_lead_zeroes { let mut first = true; for ch_zero in str_it { // see notes on offset above: // this is why the offset for octal and decimal numbers // that reach this branch is 1 even though // they have already eaten the characters '00' // this is also why when hex encounters its // first zero it does not move its offset // forward because it does not know for sure // that it's current offset (of that zero) // is not the final offset, // whereas at that point octal knows its // current offset is not the final offset. match ch_zero { '0' => { if !(is_hex && first) { ret.offset += 1; } } // if decimal, keep last zero if one exists // (it's possible for last zero to // not exist at this branch if we're in hex input) '.' => break, // other digit, etc. _ => { if !(is_hex && first) { ret.offset += 1; } break; } } if first { first = false; } } } } } ret } // this is the function a Sub's print will delegate to // if it is a numeric field, passing the field details // and an iterator to the argument pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option { let field_char = field.field_char; // num format mainly operates by further delegating to one of // several Formatter structs depending on the field // see formatter.rs for more details // to do switch to static dispatch let formatter: Box = match *field.field_type { FieldType::Intf => Box::new(Intf::new()), FieldType::Floatf => Box::new(Floatf::new()), FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()), FieldType::Scif => Box::new(Scif::new()), FieldType::Decf => Box::new(Decf::new()), _ => { panic!("asked to do num format with non-num field type"); } }; let prim_opt= // if we can get an assumed value from looking at the first // few characters, use that value to create the FormatPrimitive if let Some(provided_num) = get_provided(in_str_opt) { let mut tmp = FormatPrimitive::default(); match field_char { 'u' | 'i' | 'd' => { tmp.pre_decimal = Some( format!("{provided_num}")); }, 'x' | 'X' => { tmp.pre_decimal = Some( format!("{provided_num:x}")); }, 'o' => { tmp.pre_decimal = Some( format!("{provided_num:o}")); }, 'e' | 'E' | 'g' | 'G' => { let as_str = format!("{provided_num}"); let initial_prefix = get_initial_prefix( &as_str, field.field_type ); tmp=formatter.get_primitive(field, &initial_prefix, &as_str) .expect("err during default provided num"); }, _ => { tmp.pre_decimal = Some( format!("{provided_num}")); tmp.post_decimal = Some(String::from("0")); } } Some(tmp) } else { // otherwise we'll interpret the argument as a number // using the appropriate Formatter let in_str = in_str_opt.expect( "please send the devs this message: \n get_provided is failing to ret as Some(0) on no str "); // first get information about the beginning of the // numeric argument that would be useful for // any formatter (int or float) let initial_prefix = get_initial_prefix( in_str, field.field_type ); // then get the FormatPrimitive from the Formatter formatter.get_primitive(field, &initial_prefix, in_str) }; // if we have a formatPrimitive, print its results // according to the field-char appropriate Formatter prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone())) } uucore-0.0.23/src/lib/features/tokenize/sub.rs000064400000000000000000000413271046102023000173610ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety //! Sub is a token that represents a //! segment of the format string that is a substitution //! it is created by Sub's implementation of the Tokenizer trait //! Subs which have numeric field chars make use of the num_format //! submodule use crate::error::{UError, UResult}; use crate::quoting_style::{escape_name, QuotingStyle}; use itertools::{put_back_n, PutBackN}; use std::error::Error; use std::fmt::Display; use std::io::Write; use std::iter::Peekable; use std::process::exit; use std::slice::Iter; use std::str::Chars; use super::num_format::format_field::{FieldType, FormatField}; use super::num_format::num_format; use super::token; use super::unescaped_text::UnescapedText; const EXIT_ERR: i32 = 1; #[derive(Debug)] pub enum SubError { InvalidSpec(String), } impl Display for SubError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { match self { Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"), } } } impl Error for SubError {} impl UError for SubError {} fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize { // this is a costly way to parse the // args used for asterisk values into integers // from various bases. Actually doing it correctly // (going through the pipeline to intf, but returning // the integer instead of writing it to string and then // back) is on the refactoring TODO let field_type = FieldType::Intf; let field_char = 'i'; let field_info = FormatField { min_width: Some(0), second_field: Some(0), orig: &asterisk_arg.to_string(), field_type: &field_type, field_char: &field_char, }; num_format::num_format(&field_info, Some(&asterisk_arg.to_string())) .unwrap() .parse::() .unwrap() } pub enum CanAsterisk { Fixed(T), Asterisk, } // Sub is a tokenizer which creates tokens // for substitution segments of a format string pub struct Sub { min_width: CanAsterisk>, second_field: CanAsterisk>, field_char: char, field_type: FieldType, orig: String, prefix_char: char, } impl Sub { pub fn new( min_width: CanAsterisk>, second_field: CanAsterisk>, field_char: char, orig: String, prefix_char: char, ) -> Self { // for more dry printing, field characters are grouped // in initialization of token. let field_type = match field_char { 's' | 'b' | 'q' => FieldType::Strf, 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, 'f' | 'F' => FieldType::Floatf, 'a' | 'A' => FieldType::CninetyNineHexFloatf, 'e' | 'E' => FieldType::Scif, 'g' | 'G' => FieldType::Decf, 'c' => FieldType::Charf, _ => { // should be unreachable. println!("Invalid field type"); exit(EXIT_ERR); } }; Self { min_width, second_field, field_char, field_type, orig, prefix_char, } } } #[derive(Default)] pub(crate) struct SubParser { min_width_tmp: Option, min_width_is_asterisk: bool, past_decimal: bool, second_field_tmp: Option, second_field_is_asterisk: bool, specifiers_found: bool, field_char: Option, text_so_far: String, } impl SubParser { fn new() -> Self { Self::default() } pub(crate) fn from_it( writer: &mut W, it: &mut PutBackN, args: &mut Peekable>, ) -> UResult> where W: Write, { let mut parser = Self::new(); if parser.sub_vals_retrieved(it)? { let t = Self::build_token(parser); t.write(writer, args); Ok(Some(t)) } else { Ok(None) } } fn build_token(parser: Self) -> token::Token { // not a self method so as to allow move of sub-parser vals. // return new Sub struct as token let prefix_char = match &parser.min_width_tmp { Some(width) if width.starts_with('0') => '0', _ => ' ', }; token::Token::Sub(Sub::new( if parser.min_width_is_asterisk { CanAsterisk::Asterisk } else { CanAsterisk::Fixed( parser .min_width_tmp .map(|x| x.parse::().unwrap_or(1)), ) }, if parser.second_field_is_asterisk { CanAsterisk::Asterisk } else { CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::().unwrap())) }, parser.field_char.unwrap(), parser.text_so_far, prefix_char, )) } #[allow(clippy::cognitive_complexity)] fn sub_vals_retrieved(&mut self, it: &mut PutBackN) -> UResult { if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? { return Ok(false); } // this fn in particular is much longer than it needs to be // .could get a lot // of code savings just by cleaning it up. shouldn't use a regex // though, as we want to mimic the original behavior of printing // the field as interpreted up until the error in the field. let mut legal_fields = [ // 'a', 'A', //c99 hex float implementation not yet complete 'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 'q', 's', 'u', 'x', 'X', ]; let mut specifiers = ['h', 'j', 'l', 'L', 't', 'z']; legal_fields.sort_unstable(); specifiers.sort_unstable(); // divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z]) // into min_width, second_field, field_char for ch in it { self.text_so_far.push(ch); match ch { '-' | '*' | '0'..='9' => { if self.past_decimal { // second field should never have a // negative value if self.second_field_is_asterisk || ch == '-' || self.specifiers_found { return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } if self.second_field_tmp.is_none() { self.second_field_tmp = Some(String::new()); } match self.second_field_tmp.as_mut() { Some(x) => { if ch == '*' && !x.is_empty() { return Err( SubError::InvalidSpec(self.text_so_far.clone()).into() ); } if ch == '*' { self.second_field_is_asterisk = true; } x.push(ch); } None => { panic!("should be unreachable"); } } } else { if self.min_width_is_asterisk || self.specifiers_found { return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } if self.min_width_tmp.is_none() { self.min_width_tmp = Some(String::new()); } match self.min_width_tmp.as_mut() { Some(x) => { if (ch == '-' || ch == '*') && !x.is_empty() { return Err( SubError::InvalidSpec(self.text_so_far.clone()).into() ); } if ch == '*' { self.min_width_is_asterisk = true; } x.push(ch); } None => { panic!("should be unreachable"); } } } } '.' => { if self.past_decimal { return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } else { self.past_decimal = true; } } x if legal_fields.binary_search(&x).is_ok() => { self.field_char = Some(ch); break; } x if specifiers.binary_search(&x).is_ok() => { if !self.past_decimal { self.past_decimal = true; } if !self.specifiers_found { self.specifiers_found = true; } } _ => { return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } } } if self.field_char.is_none() { return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } let field_char_retrieved = self.field_char.unwrap(); if self.past_decimal && self.second_field_tmp.is_none() { self.second_field_tmp = Some(String::from("0")); } self.validate_field_params(field_char_retrieved)?; // if the dot is provided without a second field // printf interprets it as 0. if let Some(x) = self.second_field_tmp.as_mut() { if x.is_empty() { self.min_width_tmp = Some(String::from("0")); } } Ok(true) } fn successfully_eat_prefix( it: &mut PutBackN, text_so_far: &mut String, ) -> UResult { // get next two chars, // if they're '%%' we're not tokenizing it // else put chars back let preface = it.next(); let n_ch = it.next(); if preface == Some('%') && n_ch != Some('%') { match n_ch { Some(x) => { it.put_back(x); Ok(true) } None => { text_so_far.push('%'); Err(SubError::InvalidSpec(text_so_far.clone()).into()) } } } else { if let Some(x) = n_ch { it.put_back(x); }; if let Some(x) = preface { it.put_back(x); }; Ok(false) } } fn validate_field_params(&self, field_char: char) -> UResult<()> { // check for illegal combinations here when possible vs // on each application so we check less per application // to do: move these checks to Sub::new if (field_char == 's' && self.min_width_tmp == Some(String::from("0"))) || (field_char == 'c' && (self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) || ((field_char == 'b' || field_char == 'q') && (self.min_width_tmp.is_some() || self.past_decimal || self.second_field_tmp.is_some())) { // invalid string substitution // to do: include information about an invalid // string substitution return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); } Ok(()) } } impl Sub { #[allow(clippy::cognitive_complexity)] pub(crate) fn write(&self, writer: &mut W, pf_args_it: &mut Peekable>) where W: Write, { let field = FormatField { min_width: match self.min_width { CanAsterisk::Fixed(x) => x, CanAsterisk::Asterisk => { match pf_args_it.next() { // temporary, use intf.rs instead Some(x) => Some(convert_asterisk_arg_int(x)), None => Some(0), } } }, second_field: match self.second_field { CanAsterisk::Fixed(x) => x, CanAsterisk::Asterisk => { match pf_args_it.next() { // temporary, use intf.rs instead Some(x) => { let result = convert_asterisk_arg_int(x); if result < 0 { None } else { Some(result as u32) } } None => Some(0), } } }, field_char: &self.field_char, field_type: &self.field_type, orig: &self.orig, }; let pf_arg = pf_args_it.next(); // minimum width is handled independently of actual // field char let pre_min_width_opt: Option = match *field.field_type { // if %s just return arg // if %b use UnescapedText module's unescape-fn // if %c return first char of arg // if %q return arg which non-printable characters are escaped FieldType::Strf | FieldType::Charf => { match pf_arg { Some(arg_string) => { match *field.field_char { 's' => Some(match field.second_field { Some(max) => String::from(&arg_string[..max as usize]), None => arg_string.clone(), }), 'b' => { let mut a_it = put_back_n(arg_string.chars()); UnescapedText::from_it_core(writer, &mut a_it, true); None } 'q' => Some(escape_name( arg_string.as_ref(), &QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }, )), // get opt of first val // and map it to opt 'c' => arg_string.chars().next().map(|x| x.to_string()), _ => unreachable!(), } } None => None, } } _ => { // non string/char fields are delegated to num_format num_format::num_format(&field, pf_arg) } }; if let Some(pre_min_width) = pre_min_width_opt { // if have a string, print it, ensuring minimum width is met. write!( writer, "{}", match field.min_width { Some(min_width) => { let diff: isize = min_width.abs() - pre_min_width.len() as isize; if diff > 0 { let mut final_str = String::new(); // definitely more efficient ways // to do this. let pad_before = min_width > 0; if !pad_before { final_str.push_str(&pre_min_width); } for _ in 0..diff { final_str.push(self.prefix_char); } if pad_before { final_str.push_str(&pre_min_width); } final_str } else { pre_min_width } } None => pre_min_width, } ) .ok(); } } } uucore-0.0.23/src/lib/features/tokenize/token.rs000064400000000000000000000030121046102023000176750ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Traits and enums dealing with Tokenization of printf Format String use std::io::Write; use std::iter::Peekable; use std::slice::Iter; use crate::features::tokenize::sub::Sub; use crate::features::tokenize::unescaped_text::UnescapedText; // A token object is an object that can print the expected output // of a contiguous segment of the format string, and // requires at most 1 argument pub enum Token { Sub(Sub), UnescapedText(UnescapedText), } impl Token { pub(crate) fn write(&self, writer: &mut W, args: &mut Peekable>) where W: Write, { match self { Self::Sub(sub) => sub.write(writer, args), Self::UnescapedText(unescaped_text) => unescaped_text.write(writer), } } } // A tokenizer object is an object that takes an iterator // at a position in a format string, and sees whether // it can return a token of a type it knows how to produce // if so, return the token, move the iterator past the // format string text the token represents, and if an // argument is used move the argument iter forward one // creating token of a format string segment should also cause // printing of that token's value. Essentially tokenizing // a whole format string will print the format string and consume // a number of arguments equal to the number of argument-using tokens uucore-0.0.23/src/lib/features/tokenize/unescaped_text.rs000064400000000000000000000226241046102023000216020ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! UnescapedText is a tokenizer impl //! for tokenizing character literals, //! and escaped character literals (of allowed escapes), //! into an unescaped text byte array // spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice use itertools::PutBackN; use std::char::from_u32; use std::io::Write; use std::process::exit; use std::str::Chars; use super::token; const EXIT_OK: i32 = 0; const EXIT_ERR: i32 = 1; // by default stdout only flushes // to console when a newline is passed. macro_rules! write_and_flush { ($writer:expr, $($args:tt)+) => ({ write!($writer, "{}", $($args)+).ok(); $writer.flush().ok(); }) } fn flush_bytes(writer: &mut W, bslice: &[u8]) where W: Write, { writer.write_all(bslice).ok(); writer.flush().ok(); } #[derive(Default)] pub struct UnescapedText(Vec); impl UnescapedText { fn new() -> Self { Self::default() } // take an iterator to the format string // consume between min and max chars // and return it as a base-X number fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN) -> u32 { let mut retval: u32 = 0; let mut found = 0; while found < max_chars { // if end of input break let nc = it.next(); match nc { Some(digit) => { // if end of hexchars break match digit.to_digit(base) { Some(d) => { found += 1; retval *= base; retval += d; } None => { it.put_back(digit); break; } } } None => { break; } } } if found < min_chars { // only ever expected for hex println!("missing hexadecimal number in escape"); //todo stderr exit(EXIT_ERR); } retval } // validates against valid // IEC 10646 vals - these values // are pinned against the more popular // printf so as to not disrupt when // dropped-in as a replacement. fn validate_iec(val: u32, eight_word: bool) { let mut preface = 'u'; let leading_zeros = if eight_word { preface = 'U'; 8 } else { 4 }; let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}"); if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) { println!("{err_msg}"); //todo stderr exit(EXIT_ERR); } } // pass an iterator that succeeds an '/', // and process the remaining character // adding the unescaped bytes // to the passed byte_vec // in subs_mode change octal behavior fn handle_escaped( writer: &mut W, byte_vec: &mut Vec, it: &mut PutBackN, subs_mode: bool, ) where W: Write, { let ch = it.next().unwrap_or('\\'); match ch { '0'..='9' | 'x' => { let min_len = 1; let mut max_len = 2; let mut base = 16; let ignore = false; match ch { 'x' => {} e @ '0'..='9' => { max_len = 3; base = 8; // in practice, gnu coreutils printf // interprets octals without a // leading zero in %b // but it only skips leading zeros // in %b mode. // if we ever want to match gnu coreutil // printf's docs instead of its behavior // we'd set this to true. // if subs_mode && e != '0' // { ignore = true; } if !subs_mode || e != '0' { it.put_back(ch); } } _ => {} } if ignore { byte_vec.push(ch as u8); } else { let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8; byte_vec.push(val); let bvec = [val]; flush_bytes(writer, &bvec); } } e => { // only for hex and octal // is byte encoding specified. // otherwise, why not leave the door open // for other encodings unless it turns out // a bottleneck. let mut s = String::new(); let ch = match e { '\\' => '\\', '"' => '"', 'n' => '\n', 'r' => '\r', 't' => '\t', // bell 'a' => '\x07', // backspace 'b' => '\x08', // vertical tab 'v' => '\x0B', // form feed 'f' => '\x0C', // escape character 'e' => '\x1B', 'c' => exit(EXIT_OK), 'u' | 'U' => { let len = match e { 'u' => 4, /* 'U' | */ _ => 8, }; let val = Self::base_to_u32(len, len, 16, it); Self::validate_iec(val, false); if let Some(c) = from_u32(val) { c } else { '-' } } _ => { s.push('\\'); ch } }; s.push(ch); write_and_flush!(writer, &s); byte_vec.extend(s.bytes()); } }; } // take an iterator to a string, // and return a wrapper around a Vec of unescaped bytes // break on encounter of sub symbol ('%[^%]') unless called // through %b subst. #[allow(clippy::cognitive_complexity)] pub fn from_it_core( writer: &mut W, it: &mut PutBackN, subs_mode: bool, ) -> Option where W: Write, { let mut addchar = false; let mut new_text = Self::new(); let mut tmp_str = String::new(); { let new_vec: &mut Vec = &mut (new_text.0); while let Some(ch) = it.next() { if !addchar { addchar = true; } match ch { x if x != '\\' && x != '%' => { // lazy branch eval // remember this fn could be called // many times in a single exec through %b write_and_flush!(writer, ch); tmp_str.push(ch); } '\\' => { // the literal may be a literal bytecode // and not valid utf-8. Str only supports // valid utf-8. // if we find the unnecessary drain // on non hex or octal escapes is costly // then we can make it faster/more complex // with as-necessary draining. if !tmp_str.is_empty() { new_vec.extend(tmp_str.bytes()); tmp_str = String::new(); } Self::handle_escaped(writer, new_vec, it, subs_mode); } x if x == '%' && !subs_mode => { if let Some(follow) = it.next() { if follow == '%' { write_and_flush!(writer, ch); tmp_str.push(ch); } else { it.put_back(follow); it.put_back(ch); break; } } else { it.put_back(ch); break; } } _ => { write_and_flush!(writer, ch); tmp_str.push(ch); } } } if !tmp_str.is_empty() { new_vec.extend(tmp_str.bytes()); } } if addchar { Some(token::Token::UnescapedText(new_text)) } else { None } } } impl UnescapedText { pub(crate) fn write(&self, writer: &mut W) where W: Write, { flush_bytes(writer, &self.0[..]); } } uucore-0.0.23/src/lib/features/update_control.rs000064400000000000000000000100531046102023000177520ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Implement GNU-style update functionality. //! //! - pre-defined [`clap`-Arguments][1] for inclusion in utilities that //! implement updates //! - determination of the [update mode][2] //! //! Update-functionality is implemented by the following utilities: //! //! - `cp` //! - `mv` //! //! //! [1]: arguments //! [2]: `determine_update_mode()` //! //! //! # Usage example //! //! ``` //! #[macro_use] //! extern crate uucore; //! //! use clap::{Command, Arg, ArgMatches}; //! use uucore::update_control::{self, UpdateMode}; //! //! fn main() { //! let matches = Command::new("command") //! .arg(update_control::arguments::update()) //! .arg(update_control::arguments::update_no_args()) //! .get_matches_from(vec![ //! "command", "--update=older" //! ]); //! //! let update_mode = update_control::determine_update_mode(&matches); //! //! // handle cases //! if update_mode == UpdateMode::ReplaceIfOlder { //! // do //! } else { //! unreachable!() //! } //! } //! ``` use clap::ArgMatches; // Available update mode #[derive(Clone, Debug, Eq, PartialEq)] pub enum UpdateMode { // --update=`all`, `` ReplaceAll, // --update=`none` ReplaceNone, // --update=`older` // -u ReplaceIfOlder, } pub mod arguments { use clap::ArgAction; pub static OPT_UPDATE: &str = "update"; pub static OPT_UPDATE_NO_ARG: &str = "u"; // `--update` argument, defaults to `older` if no values are provided pub fn update() -> clap::Arg { clap::Arg::new(OPT_UPDATE) .long("update") .help("move only when the SOURCE file is newer than the destination file or when the destination file is missing") .value_parser(["none", "all", "older"]) .num_args(0..=1) .default_missing_value("older") .require_equals(true) .overrides_with("update") .action(clap::ArgAction::Set) } // `-u` argument pub fn update_no_args() -> clap::Arg { clap::Arg::new(OPT_UPDATE_NO_ARG) .short('u') .help("like --update but does not accept an argument") .action(ArgAction::SetTrue) } } /// Determine the "mode" for the update operation to perform, if any. /// /// Parses the backup options and converts them to an instance of /// `UpdateMode` for further processing. /// /// Takes [`clap::ArgMatches`] as argument which **must** contain the options /// from [`arguments::update()`] or [`arguments::update_no_args()`]. Otherwise /// the `ReplaceAll` mode is returned unconditionally. /// /// # Examples /// /// Here's how one would integrate the update mode determination into an /// application. /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::update_control::{self, UpdateMode}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(update_control::arguments::update()) /// .arg(update_control::arguments::update_no_args()) /// .get_matches_from(vec![ /// "command", "--update=all" /// ]); /// /// let update_mode = update_control::determine_update_mode(&matches); /// assert_eq!(update_mode, UpdateMode::ReplaceAll) /// } pub fn determine_update_mode(matches: &ArgMatches) -> UpdateMode { if let Some(mode) = matches.get_one::(arguments::OPT_UPDATE) { match mode.as_str() { "all" => UpdateMode::ReplaceAll, "none" => UpdateMode::ReplaceNone, "older" => UpdateMode::ReplaceIfOlder, _ => unreachable!("other args restricted by clap"), } } else if matches.get_flag(arguments::OPT_UPDATE_NO_ARG) { // short form of this option is equivalent to using --update=older UpdateMode::ReplaceIfOlder } else { // no option was present UpdateMode::ReplaceAll } } uucore-0.0.23/src/lib/features/utmpx.rs000064400000000000000000000246521046102023000161170ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // //! Aims to provide platform-independent methods to obtain login records //! //! **ONLY** support linux, macos and freebsd for the time being //! //! # Examples: //! //! ``` //! use uucore::utmpx::Utmpx; //! for ut in Utmpx::iter_all_records() { //! if ut.is_user_process() { //! println!("{}: {}", ut.host(), ut.user()) //! } //! } //! ``` //! //! Specifying the path to login record: //! //! ``` //! use uucore::utmpx::Utmpx; //! for ut in Utmpx::iter_all_records_from("/some/where/else") { //! if ut.is_user_process() { //! println!("{}: {}", ut.host(), ut.user()) //! } //! } //! ``` pub extern crate time; use std::ffi::CString; use std::io::Result as IOResult; use std::marker::PhantomData; use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::ptr; use std::sync::{Mutex, MutexGuard}; pub use self::ut::*; pub use libc::endutxent; pub use libc::getutxent; pub use libc::setutxent; use libc::utmpx; #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "netbsd"))] pub use libc::utmpxname; /// # Safety /// Just fixed the clippy warning. Please add description here. #[cfg(target_os = "freebsd")] pub unsafe extern "C" fn utmpxname(_file: *const libc::c_char) -> libc::c_int { 0 } use once_cell::sync::Lazy; use crate::*; // import macros from `../../macros.rs` // In case the c_char array doesn't end with NULL macro_rules! chars2string { ($arr:expr) => { $arr.iter() .take_while(|i| **i > 0) .map(|&i| i as u8 as char) .collect::() }; } #[cfg(target_os = "linux")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmp"; pub use libc::__UT_HOSTSIZE as UT_HOSTSIZE; pub use libc::__UT_LINESIZE as UT_LINESIZE; pub use libc::__UT_NAMESIZE as UT_NAMESIZE; pub const UT_IDSIZE: usize = 4; pub use libc::ACCOUNTING; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::USER_PROCESS; } #[cfg(target_vendor = "apple")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmpx"; pub use libc::_UTX_HOSTSIZE as UT_HOSTSIZE; pub use libc::_UTX_IDSIZE as UT_IDSIZE; pub use libc::_UTX_LINESIZE as UT_LINESIZE; pub use libc::_UTX_USERSIZE as UT_NAMESIZE; pub use libc::ACCOUNTING; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::SHUTDOWN_TIME; pub use libc::SIGNATURE; pub use libc::USER_PROCESS; } #[cfg(target_os = "freebsd")] mod ut { pub static DEFAULT_FILE: &str = ""; pub const UT_LINESIZE: usize = 16; pub const UT_NAMESIZE: usize = 32; pub const UT_IDSIZE: usize = 8; pub const UT_HOSTSIZE: usize = 128; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::SHUTDOWN_TIME; pub use libc::USER_PROCESS; } #[cfg(target_os = "netbsd")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmpx"; pub const ACCOUNTING: usize = 9; pub const SHUTDOWN_TIME: usize = 11; pub use libc::_UTX_HOSTSIZE as UT_HOSTSIZE; pub use libc::_UTX_IDSIZE as UT_IDSIZE; pub use libc::_UTX_LINESIZE as UT_LINESIZE; pub use libc::_UTX_USERSIZE as UT_NAMESIZE; pub use libc::ACCOUNTING; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::SIGNATURE; pub use libc::USER_PROCESS; } pub struct Utmpx { inner: utmpx, } impl Utmpx { /// A.K.A. ut.ut_type pub fn record_type(&self) -> i16 { self.inner.ut_type } /// A.K.A. ut.ut_pid pub fn pid(&self) -> i32 { self.inner.ut_pid } /// A.K.A. ut.ut_id pub fn terminal_suffix(&self) -> String { chars2string!(self.inner.ut_id) } /// A.K.A. ut.ut_user pub fn user(&self) -> String { chars2string!(self.inner.ut_user) } /// A.K.A. ut.ut_host pub fn host(&self) -> String { chars2string!(self.inner.ut_host) } /// A.K.A. ut.ut_line pub fn tty_device(&self) -> String { chars2string!(self.inner.ut_line) } /// A.K.A. ut.ut_tv pub fn login_time(&self) -> time::OffsetDateTime { #[allow(clippy::unnecessary_cast)] let ts_nanos: i128 = (1_000_000_000_i64 * self.inner.ut_tv.tv_sec as i64 + 1_000_i64 * self.inner.ut_tv.tv_usec as i64) .into(); let local_offset = time::OffsetDateTime::now_local().unwrap().offset(); time::OffsetDateTime::from_unix_timestamp_nanos(ts_nanos) .unwrap() .to_offset(local_offset) } /// A.K.A. ut.ut_exit /// /// Return (e_termination, e_exit) #[cfg(target_os = "linux")] pub fn exit_status(&self) -> (i16, i16) { (self.inner.ut_exit.e_termination, self.inner.ut_exit.e_exit) } /// A.K.A. ut.ut_exit /// /// Return (0, 0) on Non-Linux platform #[cfg(not(target_os = "linux"))] pub fn exit_status(&self) -> (i16, i16) { (0, 0) } /// Consumes the `Utmpx`, returning the underlying C struct utmpx pub fn into_inner(self) -> utmpx { self.inner } pub fn is_user_process(&self) -> bool { !self.user().is_empty() && self.record_type() == USER_PROCESS } /// Canonicalize host name using DNS pub fn canon_host(&self) -> IOResult { let host = self.host(); let (hostname, display) = host.split_once(':').unwrap_or((&host, "")); if !hostname.is_empty() { use dns_lookup::{getaddrinfo, AddrInfoHints}; const AI_CANONNAME: i32 = 0x2; let hints = AddrInfoHints { flags: AI_CANONNAME, ..AddrInfoHints::default() }; if let Ok(sockets) = getaddrinfo(Some(hostname), None, Some(hints)) { let sockets = sockets.collect::>>()?; for socket in sockets { if let Some(ai_canonname) = socket.canonname { return Ok(if display.is_empty() { ai_canonname } else { format!("{ai_canonname}:{display}") }); } } } else { // GNU coreutils has this behavior return Ok(hostname.to_string()); } } Ok(host.to_string()) } /// Iterate through all the utmp records. /// /// This will use the default location, or the path [`Utmpx::iter_all_records_from`] /// was most recently called with. /// /// Only one instance of [`UtmpxIter`] may be active at a time. This /// function will block as long as one is still active. Beware! pub fn iter_all_records() -> UtmpxIter { let iter = UtmpxIter::new(); unsafe { // This can technically fail, and it would be nice to detect that, // but it doesn't return anything so we'd have to do nasty things // with errno. setutxent(); } iter } /// Iterate through all the utmp records from a specific file. /// /// No failure is reported or detected. /// /// This function affects subsequent calls to [`Utmpx::iter_all_records`]. /// /// The same caveats as for [`Utmpx::iter_all_records`] apply. pub fn iter_all_records_from>(path: P) -> UtmpxIter { let iter = UtmpxIter::new(); let path = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); unsafe { // In glibc, utmpxname() only fails if there's not enough memory // to copy the string. // Solaris returns 1 on success instead of 0. Supposedly there also // exist systems where it returns void. // GNU who on Debian seems to output nothing if an invalid filename // is specified, no warning or anything. // So this function is pretty crazy and we don't try to detect errors. // Not much we can do besides pray. utmpxname(path.as_ptr()); setutxent(); } iter } } // On some systems these functions are not thread-safe. On others they're // thread-local. Therefore we use a mutex to allow only one guard to exist at // a time, and make sure UtmpxIter cannot be sent across threads. // // I believe the only technical memory unsafety that could happen is a data // race while copying the data out of the pointer returned by getutxent(), but // ordinary race conditions are also very much possible. static LOCK: Lazy> = Lazy::new(|| Mutex::new(())); /// Iterator of login records pub struct UtmpxIter { #[allow(dead_code)] guard: MutexGuard<'static, ()>, /// Ensure UtmpxIter is !Send. Technically redundant because MutexGuard /// is also !Send. phantom: PhantomData>, } impl UtmpxIter { fn new() -> Self { // PoisonErrors can safely be ignored let guard = LOCK.lock().unwrap_or_else(|err| err.into_inner()); Self { guard, phantom: PhantomData, } } } impl Iterator for UtmpxIter { type Item = Utmpx; fn next(&mut self) -> Option { unsafe { let res = getutxent(); if res.is_null() { None } else { // The data behind this pointer will be replaced by the next // call to getutxent(), so we have to read it now. // All the strings live inline in the struct as arrays, which // makes things easier. Some(Utmpx { inner: ptr::read(res as *const _), }) } } } } impl Drop for UtmpxIter { fn drop(&mut self) { unsafe { endutxent(); } } } uucore-0.0.23/src/lib/features/version_cmp.rs000064400000000000000000000257051046102023000172660ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use std::cmp::Ordering; /// Compares the non-digit parts of a version. /// Special cases: ~ are before everything else, even ends ("a~" < "a") /// Letters are before non-letters fn version_non_digit_cmp(a: &str, b: &str) -> Ordering { let mut a_chars = a.chars(); let mut b_chars = b.chars(); loop { match (a_chars.next(), b_chars.next()) { (Some(c1), Some(c2)) if c1 == c2 => {} (None, None) => return Ordering::Equal, (_, Some('~')) => return Ordering::Greater, (Some('~'), _) => return Ordering::Less, (None, Some(_)) => return Ordering::Less, (Some(_), None) => return Ordering::Greater, (Some(c1), Some(c2)) if c1.is_ascii_alphabetic() && !c2.is_ascii_alphabetic() => { return Ordering::Less } (Some(c1), Some(c2)) if !c1.is_ascii_alphabetic() && c2.is_ascii_alphabetic() => { return Ordering::Greater } (Some(c1), Some(c2)) => return c1.cmp(&c2), } } } /// Remove file endings matching the regex (\.[A-Za-z~][A-Za-z0-9~]*)*$ fn remove_file_ending(a: &str) -> &str { let mut ending_start = None; let mut prev_was_dot = false; for (idx, char) in a.char_indices() { if char == '.' { if ending_start.is_none() || prev_was_dot { ending_start = Some(idx); } prev_was_dot = true; } else if prev_was_dot { prev_was_dot = false; if !char.is_ascii_alphabetic() && char != '~' { ending_start = None; } } else if !char.is_ascii_alphanumeric() && char != '~' { ending_start = None; } } if prev_was_dot { ending_start = None; } if let Some(ending_start) = ending_start { &a[..ending_start] } else { a } } pub fn version_cmp(mut a: &str, mut b: &str) -> Ordering { let str_cmp = a.cmp(b); if str_cmp == Ordering::Equal { return str_cmp; } // Special cases: // 1. Empty strings match (a.is_empty(), b.is_empty()) { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 2. Dots match (a == ".", b == ".") { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 3. Two Dots match (a == "..", b == "..") { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 4. Strings starting with a dot match (a.starts_with('.'), b.starts_with('.')) { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => { // Strip the leading dot for later comparisons a = &a[1..]; b = &b[1..]; } _ => {} } // Try to strip file extensions let (mut a, mut b) = match (remove_file_ending(a), remove_file_ending(b)) { (a_stripped, b_stripped) if a_stripped == b_stripped => { // If both would be the same after stripping file extensions, don't strip them. (a, b) } stripped => stripped, }; // 1. Compare leading non-numerical part // 2. Compare leading numerical part // 3. Repeat while !a.is_empty() || !b.is_empty() { let a_numerical_start = a.find(|c: char| c.is_ascii_digit()).unwrap_or(a.len()); let b_numerical_start = b.find(|c: char| c.is_ascii_digit()).unwrap_or(b.len()); let a_str = &a[..a_numerical_start]; let b_str = &b[..b_numerical_start]; match version_non_digit_cmp(a_str, b_str) { Ordering::Equal => {} ord => return ord, } a = &a[a_numerical_start..]; b = &b[a_numerical_start..]; let a_numerical_end = a.find(|c: char| !c.is_ascii_digit()).unwrap_or(a.len()); let b_numerical_end = b.find(|c: char| !c.is_ascii_digit()).unwrap_or(b.len()); let a_str = a[..a_numerical_end].trim_start_matches('0'); let b_str = b[..b_numerical_end].trim_start_matches('0'); match a_str.len().cmp(&b_str.len()) { Ordering::Equal => {} ord => return ord, } match a_str.cmp(b_str) { Ordering::Equal => {} ord => return ord, } a = &a[a_numerical_end..]; b = &b[b_numerical_end..]; } Ordering::Equal } #[cfg(test)] mod tests { use crate::version_cmp::version_cmp; use std::cmp::Ordering; #[test] fn test_version_cmp() { // Identical strings assert_eq!(version_cmp("hello", "hello"), Ordering::Equal); assert_eq!(version_cmp("file12", "file12"), Ordering::Equal); assert_eq!( version_cmp("file12-suffix", "file12-suffix"), Ordering::Equal ); assert_eq!( version_cmp("file12-suffix24", "file12-suffix24"), Ordering::Equal ); // Shortened names assert_eq!(version_cmp("world", "wo"), Ordering::Greater,); assert_eq!(version_cmp("hello10wo", "hello10world"), Ordering::Less,); // Simple names assert_eq!(version_cmp("world", "hello"), Ordering::Greater,); assert_eq!(version_cmp("hello", "world"), Ordering::Less); assert_eq!(version_cmp("apple", "ant"), Ordering::Greater); assert_eq!(version_cmp("ant", "apple"), Ordering::Less); // Uppercase letters assert_eq!( version_cmp("Beef", "apple"), Ordering::Less, "Uppercase letters are sorted before all lowercase letters" ); assert_eq!(version_cmp("Apple", "apple"), Ordering::Less); assert_eq!(version_cmp("apple", "aPple"), Ordering::Greater); // Numbers assert_eq!( version_cmp("100", "20"), Ordering::Greater, "Greater numbers are greater even if they start with a smaller digit", ); assert_eq!( version_cmp("20", "20"), Ordering::Equal, "Equal numbers are equal" ); assert_eq!( version_cmp("15", "200"), Ordering::Less, "Small numbers are smaller" ); // Comparing numbers with other characters assert_eq!( version_cmp("1000", "apple"), Ordering::Less, "Numbers are sorted before other characters" ); assert_eq!( // spell-checker:disable-next-line version_cmp("file1000", "fileapple"), Ordering::Less, "Numbers in the middle of the name are sorted before other characters" ); // Leading zeroes assert_eq!( version_cmp("012", "12"), Ordering::Equal, "A single leading zero does not make a difference" ); assert_eq!( version_cmp("000800", "0000800"), Ordering::Equal, "Multiple leading zeros do not make a difference" ); // Numbers and other characters combined assert_eq!(version_cmp("ab10", "aa11"), Ordering::Greater); assert_eq!( version_cmp("aa10", "aa11"), Ordering::Less, "Numbers after other characters are handled correctly." ); assert_eq!( version_cmp("aa2", "aa100"), Ordering::Less, "Numbers after alphabetical characters are handled correctly." ); assert_eq!( version_cmp("aa10bb", "aa11aa"), Ordering::Less, "Number is used even if alphabetical characters after it differ." ); assert_eq!( version_cmp("aa10aa0010", "aa11aa1"), Ordering::Less, "Second number is ignored if the first number differs." ); assert_eq!( version_cmp("aa10aa0010", "aa10aa1"), Ordering::Greater, "Second number is used if the rest is equal." ); assert_eq!( version_cmp("aa10aa0010", "aa00010aa1"), Ordering::Greater, "Second number is used if the rest is equal up to leading zeroes of the first number." ); assert_eq!( version_cmp("aa10aa0022", "aa010aa022"), Ordering::Equal, "Test multiple numeric values with leading zeros" ); assert_eq!( version_cmp("file-1.4", "file-1.13"), Ordering::Less, "Periods are handled as normal text, not as a decimal point." ); // Greater than u64::Max // u64 == 18446744073709551615 so this should be plenty: // 20000000000000000000000 assert_eq!( version_cmp("aa2000000000000000000000bb", "aa002000000000000000000001bb"), Ordering::Less, "Numbers larger than u64::MAX are handled correctly without crashing" ); assert_eq!( version_cmp("aa2000000000000000000000bb", "aa002000000000000000000000bb"), Ordering::Equal, "Leading zeroes for numbers larger than u64::MAX are \ handled correctly without crashing" ); assert_eq!( version_cmp(" a", "a"), Ordering::Greater, "Whitespace is after letters because letters are before non-letters" ); assert_eq!( version_cmp("a~", "ab"), Ordering::Less, "A tilde is before other letters" ); assert_eq!( version_cmp("a~", "a"), Ordering::Less, "A tilde is before the line end" ); assert_eq!( version_cmp("~", ""), Ordering::Greater, "A tilde is after the empty string" ); assert_eq!( version_cmp(".f", ".1"), Ordering::Greater, "if both start with a dot it is ignored for the comparison" ); // The following tests are incompatible with GNU as of 2021/06. // I think that's because of a bug in GNU, reported as https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html assert_eq!( version_cmp("a..a", "a.+"), Ordering::Less, ".a is stripped before the comparison" ); assert_eq!( version_cmp("a.", "a+"), Ordering::Greater, ". is not stripped before the comparison" ); assert_eq!( version_cmp("a\0a", "a"), Ordering::Greater, "NULL bytes are handled comparison" ); } } uucore-0.0.23/src/lib/features/wide.rs000064400000000000000000000020661046102023000156650ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use std::ffi::{OsStr, OsString}; use std::os::windows::ffi::{OsStrExt, OsStringExt}; pub trait ToWide { fn to_wide(&self) -> Vec; fn to_wide_null(&self) -> Vec; } impl ToWide for T where T: AsRef, { fn to_wide(&self) -> Vec { self.as_ref().encode_wide().collect() } fn to_wide_null(&self) -> Vec { self.as_ref().encode_wide().chain(Some(0)).collect() } } pub trait FromWide { fn from_wide(wide: &[u16]) -> Self; fn from_wide_null(wide: &[u16]) -> Self; } impl FromWide for String { fn from_wide(wide: &[u16]) -> Self { OsString::from_wide(wide).to_string_lossy().into_owned() } fn from_wide_null(wide: &[u16]) -> Self { let len = wide.iter().take_while(|&&c| c != 0).count(); OsString::from_wide(&wide[..len]) .to_string_lossy() .into_owned() } } uucore-0.0.23/src/lib/features.rs000064400000000000000000000030341046102023000147310ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // features ~ feature-gated modules (core/bundler file) #[cfg(feature = "backup-control")] pub mod backup_control; #[cfg(feature = "encoding")] pub mod encoding; #[cfg(feature = "fs")] pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; #[cfg(feature = "lines")] pub mod lines; #[cfg(feature = "memo")] pub mod memo; #[cfg(feature = "quoting-style")] pub mod quoting_style; #[cfg(feature = "ranges")] pub mod ranges; #[cfg(feature = "ringbuffer")] pub mod ringbuffer; #[cfg(feature = "sum")] pub mod sum; #[cfg(feature = "memo")] mod tokenize; #[cfg(feature = "update-control")] pub mod update_control; #[cfg(feature = "version-cmp")] pub mod version_cmp; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) #[cfg(all(not(windows), feature = "mode"))] pub mod mode; // ** unix-only #[cfg(all(unix, feature = "entries"))] pub mod entries; #[cfg(all(unix, feature = "perms"))] pub mod perms; #[cfg(all(unix, feature = "pipes"))] pub mod pipes; #[cfg(all(unix, feature = "process"))] pub mod process; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub mod signals; #[cfg(all( unix, not(target_os = "android"), not(target_os = "fuchsia"), not(target_os = "redox"), not(target_env = "musl"), feature = "utmpx" ))] pub mod utmpx; // ** windows-only #[cfg(all(windows, feature = "wide"))] pub mod wide; uucore-0.0.23/src/lib/lib.rs000064400000000000000000000233421046102023000136650ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // library ~ (core/bundler file) // * feature-gated external crates (re-shared as public internal modules) #[cfg(feature = "libc")] pub extern crate libc; #[cfg(all(feature = "windows-sys", target_os = "windows"))] pub extern crate windows_sys; //## internal modules mod features; // feature-gated code modules mod macros; // crate macros (macro_rules-type; exported to `crate::...`) mod mods; // core cross-platform modules mod parser; // string parsing modules pub use uucore_procs::*; // * cross-platform modules pub use crate::mods::display; pub use crate::mods::error; pub use crate::mods::line_ending; pub use crate::mods::os; pub use crate::mods::panic; // * string parsing modules pub use crate::parser::parse_glob; pub use crate::parser::parse_size; pub use crate::parser::parse_time; pub use crate::parser::shortcut_value_parser; // * feature-gated modules #[cfg(feature = "backup-control")] pub use crate::features::backup_control; #[cfg(feature = "encoding")] pub use crate::features::encoding; #[cfg(feature = "fs")] pub use crate::features::fs; #[cfg(feature = "fsext")] pub use crate::features::fsext; #[cfg(feature = "lines")] pub use crate::features::lines; #[cfg(feature = "memo")] pub use crate::features::memo; #[cfg(feature = "quoting-style")] pub use crate::features::quoting_style; #[cfg(feature = "ranges")] pub use crate::features::ranges; #[cfg(feature = "ringbuffer")] pub use crate::features::ringbuffer; #[cfg(feature = "sum")] pub use crate::features::sum; #[cfg(feature = "update-control")] pub use crate::features::update_control; #[cfg(feature = "version-cmp")] pub use crate::features::version_cmp; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) #[cfg(all(not(windows), feature = "mode"))] pub use crate::features::mode; // ** unix-only #[cfg(all(unix, feature = "entries"))] pub use crate::features::entries; #[cfg(all(unix, feature = "perms"))] pub use crate::features::perms; #[cfg(all(unix, feature = "pipes"))] pub use crate::features::pipes; #[cfg(all(unix, feature = "process"))] pub use crate::features::process; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub use crate::features::signals; #[cfg(all( unix, not(target_os = "android"), not(target_os = "fuchsia"), not(target_os = "redox"), not(target_env = "musl"), feature = "utmpx" ))] pub use crate::features::utmpx; // ** windows-only #[cfg(all(windows, feature = "wide"))] pub use crate::features::wide; //## core functions use std::ffi::OsString; use std::sync::atomic::Ordering; use once_cell::sync::Lazy; /// Execute utility code for `util`. /// /// This macro expands to a main function that invokes the `uumain` function in `util` /// Exits with code returned by `uumain`. #[macro_export] macro_rules! bin { ($util:ident) => { pub fn main() { use std::io::Write; uucore::panic::mute_sigpipe_panic(); // suppress extraneous error output for SIGPIPE failures/panics let code = $util::uumain(uucore::args_os()); // execute utility code std::io::stdout().flush().expect("could not flush stdout"); // (defensively) flush stdout for utility prior to exit; see std::process::exit(code); } }; } /// Generate the usage string for clap. /// /// This function does two things. It indents all but the first line to align /// the lines because clap adds "Usage: " to the first line. And it replaces /// all occurrences of `{}` with the execution phrase and returns the resulting /// `String`. It does **not** support more advanced formatting features such /// as `{0}`. pub fn format_usage(s: &str) -> String { let s = s.replace('\n', &format!("\n{}", " ".repeat(7))); s.replace("{}", crate::execution_phrase()) } pub fn get_utility_is_second_arg() -> bool { crate::macros::UTILITY_IS_SECOND_ARG.load(Ordering::SeqCst) } pub fn set_utility_is_second_arg() { crate::macros::UTILITY_IS_SECOND_ARG.store(true, Ordering::SeqCst); } // args_os() can be expensive to call, it copies all of argv before iterating. // So if we want only the first arg or so it's overkill. We cache it. static ARGV: Lazy> = Lazy::new(|| wild::args_os().collect()); static UTIL_NAME: Lazy = Lazy::new(|| { let base_index = usize::from(get_utility_is_second_arg()); let is_man = usize::from(ARGV[base_index].eq("manpage")); let argv_index = base_index + is_man; ARGV[argv_index].to_string_lossy().into_owned() }); /// Derive the utility name. pub fn util_name() -> &'static str { &UTIL_NAME } static EXECUTION_PHRASE: Lazy = Lazy::new(|| { if get_utility_is_second_arg() { ARGV.iter() .take(2) .map(|os_str| os_str.to_string_lossy().into_owned()) .collect::>() .join(" ") } else { ARGV[0].to_string_lossy().into_owned() } }); /// Derive the complete execution phrase for "usage". pub fn execution_phrase() -> &'static str { &EXECUTION_PHRASE } pub trait Args: Iterator + Sized { /// Collects the iterator into a `Vec`, lossily converting the `OsString`s to `Strings`. fn collect_lossy(self) -> Vec { self.map(|s| s.to_string_lossy().into_owned()).collect() } /// Collects the iterator into a `Vec`, removing any elements that contain invalid encoding. fn collect_ignore(self) -> Vec { self.filter_map(|s| s.into_string().ok()).collect() } } impl + Sized> Args for T {} pub fn args_os() -> impl Iterator { ARGV.iter().cloned() } /// Read a line from stdin and check whether the first character is `'y'` or `'Y'` pub fn read_yes() -> bool { let mut s = String::new(); match std::io::stdin().read_line(&mut s) { Ok(_) => matches!(s.chars().next(), Some('y' | 'Y')), _ => false, } } /// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'` /// /// This macro functions accepts the same syntax as `format!`. The prompt is written to /// `stderr`. A space is also printed at the end for nice spacing between the prompt and /// the user input. Any input starting with `'y'` or `'Y'` is interpreted as `yes`. /// /// # Examples /// ``` /// use uucore::prompt_yes; /// let file = "foo.rs"; /// prompt_yes!("Do you want to delete '{}'?", file); /// ``` /// will print something like below to `stderr` (with `util_name` substituted by the actual /// util name) and will wait for user input. /// ```txt /// util_name: Do you want to delete 'foo.rs'? /// ``` #[macro_export] macro_rules! prompt_yes( ($($args:tt)+) => ({ use std::io::Write; eprint!("{}: ", uucore::util_name()); eprint!($($args)+); eprint!(" "); uucore::crash_if_err!(1, std::io::stderr().flush()); uucore::read_yes() }) ); #[cfg(test)] mod tests { use super::*; use std::ffi::OsStr; fn make_os_vec(os_str: &OsStr) -> Vec { vec![ OsString::from("test"), OsString::from("สวัสดี"), // spell-checker:disable-line os_str.to_os_string(), ] } #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_lossy(os_str: &OsStr) { // assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); let test_vec = make_os_vec(os_str); let collected_to_str = test_vec.clone().into_iter().collect_lossy(); // conservation of length - when accepting lossy conversion no arguments may be dropped assert_eq!(collected_to_str.len(), test_vec.len()); // first indices identical for index in 0..2 { assert_eq!(collected_to_str[index], test_vec[index].to_str().unwrap()); } // lossy conversion for string with illegal encoding is done assert_eq!( *collected_to_str[2], os_str.to_os_string().to_string_lossy() ); } #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_ignore(os_str: &OsStr) { // assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); let test_vec = make_os_vec(os_str); let collected_to_str = test_vec.clone().into_iter().collect_ignore(); // assert that the broken entry is filtered out assert_eq!(collected_to_str.len(), test_vec.len() - 1); // assert that the unbroken indices are converted as expected for index in 0..2 { assert_eq!( collected_to_str.get(index).unwrap(), test_vec.get(index).unwrap().to_str().unwrap() ); } } #[test] fn valid_utf8_encoding_args() { // create a vector containing only correct encoding let test_vec = make_os_vec(&OsString::from("test2")); // expect complete conversion without losses, even when lossy conversion is accepted let _ = test_vec.into_iter().collect_lossy(); } #[cfg(any(unix, target_os = "redox"))] #[test] fn invalid_utf8_args_unix() { use std::os::unix::ffi::OsStrExt; let source = [0x66, 0x6f, 0x80, 0x6f]; let os_str = OsStr::from_bytes(&source[..]); test_invalid_utf8_args_lossy(os_str); test_invalid_utf8_args_ignore(os_str); } #[test] fn test_format_usage() { assert_eq!(format_usage("expr EXPRESSION"), "expr EXPRESSION"); assert_eq!( format_usage("expr EXPRESSION\nexpr OPTION"), "expr EXPRESSION\n expr OPTION" ); } } uucore-0.0.23/src/lib/macros.rs000064400000000000000000000142241046102023000144020ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // TODO fix broken links #![allow(rustdoc::broken_intra_doc_links)] //! Macros for the uucore utilities. //! //! This module bundles all macros used across the uucore utilities. These //! include macros for reporting errors in various formats, aborting program //! execution and more. //! //! To make use of all macros in this module, they must be imported like so: //! //! ```ignore //! #[macro_use] //! extern crate uucore; //! ``` //! //! Alternatively, you can import single macros by importing them through their //! fully qualified name like this: //! //! ```no_run //! use uucore::{show, crash}; //! ``` //! //! Here's an overview of the macros sorted by purpose //! //! - Print errors //! - From types implementing [`crate::error::UError`]: [`show!`], //! [`show_if_err!`] //! - From custom messages: [`show_error!`] //! - Print warnings: [`show_warning!`] //! - Terminate util execution //! - Crash program: [`crash!`], [`crash_if_err!`] // spell-checker:ignore sourcepath targetpath rustdoc use std::sync::atomic::AtomicBool; // This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. /// Whether we were called as a multicall binary (`coreutils `) pub static UTILITY_IS_SECOND_ARG: AtomicBool = AtomicBool::new(false); //==== /// Display a [`crate::error::UError`] and set global exit code. /// /// Prints the error message contained in an [`crate::error::UError`] to stderr /// and sets the exit code through [`crate::error::set_exit_code`]. The printed /// error message is prepended with the calling utility's name. A call to this /// macro will not finish program execution. /// /// # Examples /// /// The following example would print a message "Some error occurred" and set /// the utility's exit code to 2. /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// /// use uucore::error::{self, USimpleError}; /// /// fn main() { /// let err = USimpleError::new(2, "Some error occurred."); /// show!(err); /// assert_eq!(error::get_exit_code(), 2); /// } /// ``` /// /// If not using [`crate::error::UError`], one may achieve the same behavior /// like this: /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// /// use uucore::error::set_exit_code; /// /// fn main() { /// set_exit_code(2); /// show_error!("Some error occurred."); /// } /// ``` #[macro_export] macro_rules! show( ($err:expr) => ({ let e = $err; $crate::error::set_exit_code(e.code()); eprintln!("{}: {}", $crate::util_name(), e); }) ); /// Display an error and set global exit code in error case. /// /// Wraps around [`show!`] and takes a [`crate::error::UResult`] instead of a /// [`crate::error::UError`] type. This macro invokes [`show!`] if the /// [`crate::error::UResult`] is an `Err`-variant. This can be invoked directly /// on the result of a function call, like in the `install` utility: /// /// ```ignore /// show_if_err!(copy(sourcepath, &targetpath, b)); /// ``` /// /// # Examples /// /// ```ignore /// # #[macro_use] /// # extern crate uucore; /// # use uucore::error::{UError, UIoError, UResult, USimpleError}; /// /// # fn main() { /// let is_ok = Ok(1); /// // This does nothing at all /// show_if_err!(is_ok); /// /// let is_err = Err(USimpleError::new(1, "I'm an error").into()); /// // Calls `show!` on the contained USimpleError /// show_if_err!(is_err); /// # } /// ``` /// /// #[macro_export] macro_rules! show_if_err( ($res:expr) => ({ if let Err(e) = $res { show!(e); } }) ); /// Show an error to stderr in a similar style to GNU coreutils. /// /// Takes a [`format!`]-like input and prints it to stderr. The output is /// prepended with the current utility's name. /// /// # Examples /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// show_error!("Couldn't apply {} to {}", "foo", "bar"); /// # } /// ``` #[macro_export] macro_rules! show_error( ($($args:tt)+) => ({ eprint!("{}: ", $crate::util_name()); eprintln!($($args)+); }) ); /// Print a warning message to stderr. /// /// Takes [`format!`]-compatible input and prepends it with the current /// utility's name and "warning: " before printing to stderr. /// /// # Examples /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// // outputs : warning: Couldn't apply foo to bar /// show_warning!("Couldn't apply {} to {}", "foo", "bar"); /// # } /// ``` #[macro_export] macro_rules! show_warning( ($($args:tt)+) => ({ eprint!("{}: warning: ", $crate::util_name()); eprintln!($($args)+); }) ); /// Display an error and [`std::process::exit`] /// /// Displays the provided error message using [`show_error!`], then invokes /// [`std::process::exit`] with the provided exit code. /// /// # Examples /// /// ```should_panic /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// // outputs : Couldn't apply foo to bar /// // and terminates execution /// crash!(1, "Couldn't apply {} to {}", "foo", "bar"); /// # } /// ``` #[macro_export] macro_rules! crash( ($exit_code:expr, $($args:tt)+) => ({ $crate::show_error!($($args)+); std::process::exit($exit_code); }) ); /// Unwrap a [`std::result::Result`], crashing instead of panicking. /// /// If the result is an `Ok`-variant, returns the value contained inside. If it /// is an `Err`-variant, invokes [`crash!`] with the formatted error instead. /// /// # Examples /// /// ```should_panic /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// let is_ok: Result = Ok(1); /// // Does nothing /// crash_if_err!(1, is_ok); /// /// let is_err: Result = Err("This didn't work..."); /// // Calls `crash!` /// crash_if_err!(1, is_err); /// # } /// ``` #[macro_export] macro_rules! crash_if_err( ($exit_code:expr, $exp:expr) => ( match $exp { Ok(m) => m, Err(f) => $crate::crash!($exit_code, "{}", f), } ) ); uucore-0.0.23/src/lib/mods/display.rs000064400000000000000000000052221046102023000155230ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. /// Utilities for printing paths, with special attention paid to special /// characters and invalid unicode. /// /// For displaying paths in informational messages use `Quotable::quote`. This /// will wrap quotes around the filename and add the necessary escapes to make /// it copy/paste-able into a shell. /// /// For writing raw paths to stdout when the output should not be quoted or escaped, /// use `println_verbatim`. This will preserve invalid unicode. /// /// # Examples /// ``` /// use std::path::Path; /// use uucore::display::{Quotable, println_verbatim}; /// /// let path = Path::new("foo/bar.baz"); /// /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" /// println_verbatim(path)?; // Prints "foo/bar.baz" /// # Ok::<(), std::io::Error>(()) /// ``` use std::ffi::OsStr; use std::io::{self, Write as IoWrite}; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::ffi::OsStrExt; // These used to be defined here, but they live in their own crate now. pub use os_display::{Quotable, Quoted}; /// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline, /// without losing any information if its encoding is invalid. /// /// This function is appropriate for commands where printing paths is the point and the /// output is likely to be captured, like `pwd` and `basename`. For informational output /// use `Quotable::quote`. /// /// FIXME: This is lossy on Windows. It could probably be implemented using some low-level /// API that takes UTF-16, without going through io::Write. This is not a big priority /// because broken filenames are much rarer on Windows than on Unix. pub fn println_verbatim>(text: S) -> io::Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); #[cfg(any(unix, target_os = "wasi"))] { stdout.write_all(text.as_ref().as_bytes())?; stdout.write_all(b"\n")?; } #[cfg(not(any(unix, target_os = "wasi")))] { writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?; } Ok(()) } /// Like `println_verbatim`, without the trailing newline. pub fn print_verbatim>(text: S) -> io::Result<()> { let mut stdout = io::stdout(); #[cfg(any(unix, target_os = "wasi"))] { stdout.write_all(text.as_ref().as_bytes()) } #[cfg(not(any(unix, target_os = "wasi")))] { write!(stdout, "{}", std::path::Path::new(text.as_ref()).display()) } } uucore-0.0.23/src/lib/mods/error.rs000064400000000000000000000552061046102023000152160ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // TODO fix broken links #![allow(rustdoc::broken_intra_doc_links)] //! All utils return exit with an exit code. Usually, the following scheme is used: //! * `0`: succeeded //! * `1`: minor problems //! * `2`: major problems //! //! This module provides types to reconcile these exit codes with idiomatic Rust error //! handling. This has a couple advantages over manually using [`std::process::exit`]: //! 1. It enables the use of `?`, `map_err`, `unwrap_or`, etc. in `uumain`. //! 1. It encourages the use of [`UResult`]/[`Result`] in functions in the utils. //! 1. The error messages are largely standardized across utils. //! 1. Standardized error messages can be created from external result types //! (i.e. [`std::io::Result`] & `clap::ClapResult`). //! 1. [`set_exit_code`] takes away the burden of manually tracking exit codes for non-fatal errors. //! //! # Usage //! The signature of a typical util should be: //! ```ignore //! fn uumain(args: impl uucore::Args) -> UResult<()> { //! ... //! } //! ``` //! [`UResult`] is a simple wrapper around [`Result`] with a custom error trait: [`UError`]. The //! most important difference with types implementing [`std::error::Error`] is that [`UError`]s //! can specify the exit code of the program when they are returned from `uumain`: //! * When `Ok` is returned, the code set with [`set_exit_code`] is used as exit code. If //! [`set_exit_code`] was not used, then `0` is used. //! * When `Err` is returned, the code corresponding with the error is used as exit code and the //! error message is displayed. //! //! Additionally, the errors can be displayed manually with the [`show`] and [`show_if_err`] macros: //! ```ignore //! let res = Err(USimpleError::new(1, "Error!!")); //! show_if_err!(res); //! // or //! if let Err(e) = res { //! show!(e); //! } //! ``` //! //! **Note**: The [`show`] and [`show_if_err`] macros set the exit code of the program using //! [`set_exit_code`]. See the documentation on that function for more information. //! //! # Guidelines //! * Use error types from `uucore` where possible. //! * Add error types to `uucore` if an error appears in multiple utils. //! * Prefer proper custom error types over [`ExitCode`] and [`USimpleError`]. //! * [`USimpleError`] may be used in small utils with simple error handling. //! * Using [`ExitCode`] is not recommended but can be useful for converting utils to use //! [`UResult`]. // spell-checker:ignore uioerror rustdoc use clap; use std::{ error::Error, fmt::{Display, Formatter}, sync::atomic::{AtomicI32, Ordering}, }; static EXIT_CODE: AtomicI32 = AtomicI32::new(0); /// Get the last exit code set with [`set_exit_code`]. /// The default value is `0`. pub fn get_exit_code() -> i32 { EXIT_CODE.load(Ordering::SeqCst) } /// Set the exit code for the program if `uumain` returns `Ok(())`. /// /// This function is most useful for non-fatal errors, for example when applying an operation to /// multiple files: /// ```ignore /// use uucore::error::{UResult, set_exit_code}; /// /// fn uumain(args: impl uucore::Args) -> UResult<()> { /// ... /// for file in files { /// let res = some_operation_that_might_fail(file); /// match res { /// Ok() => {}, /// Err(_) => set_exit_code(1), /// } /// } /// Ok(()) // If any of the operations failed, 1 is returned. /// } /// ``` pub fn set_exit_code(code: i32) { EXIT_CODE.store(code, Ordering::SeqCst); } /// Result type that should be returned by all utils. pub type UResult = Result>; /// Custom errors defined by the utils and `uucore`. /// /// All errors should implement [`std::error::Error`], [`std::fmt::Display`] and /// [`std::fmt::Debug`] and have an additional `code` method that specifies the /// exit code of the program if the error is returned from `uumain`. /// /// An example of a custom error from `ls`: /// /// ``` /// use uucore::{ /// display::Quotable, /// error::{UError, UResult} /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum LsError { /// InvalidLineWidth(String), /// NoMetadata(PathBuf), /// } /// /// impl UError for LsError { /// fn code(&self) -> i32 { /// match self { /// LsError::InvalidLineWidth(_) => 2, /// LsError::NoMetadata(_) => 1, /// } /// } /// } /// /// impl Error for LsError {} /// /// impl Display for LsError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// match self { /// LsError::InvalidLineWidth(s) => write!(f, "invalid line width: {}", s.quote()), /// LsError::NoMetadata(p) => write!(f, "could not open file: {}", p.quote()), /// } /// } /// } /// ``` /// /// The main routine would look like this: /// /// ```ignore /// #[uucore::main] /// pub fn uumain(args: impl uucore::Args) -> UResult<()> { /// // Perform computations here ... /// return Err(LsError::InvalidLineWidth(String::from("test")).into()) /// } /// ``` /// /// The call to `into()` is required to convert the `LsError` to /// [`Box`]. The implementation for `From` is provided automatically. /// /// A crate like [`quick_error`](https://crates.io/crates/quick-error) might /// also be used, but will still require an `impl` for the `code` method. pub trait UError: Error + Send { /// Error code of a custom error. /// /// Set a return value for each variant of an enum-type to associate an /// error code (which is returned to the system shell) with an error /// variant. /// /// # Example /// /// ``` /// use uucore::{ /// display::Quotable, /// error::UError /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum MyError { /// Foo(String), /// Bar(PathBuf), /// Bing(), /// } /// /// impl UError for MyError { /// fn code(&self) -> i32 { /// match self { /// MyError::Foo(_) => 2, /// // All other errors yield the same error code, there's no /// // need to list them explicitly. /// _ => 1, /// } /// } /// } /// /// impl Error for MyError {} /// /// impl Display for MyError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// use MyError as ME; /// match self { /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), /// ME::Bing() => write!(f, "Exterminate!"), /// } /// } /// } /// ``` fn code(&self) -> i32 { 1 } /// Print usage help to a custom error. /// /// Return true or false to control whether a short usage help is printed /// below the error message. The usage help is in the format: "Try `{name} /// --help` for more information." and printed only if `true` is returned. /// /// # Example /// /// ``` /// use uucore::{ /// display::Quotable, /// error::UError /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum MyError { /// Foo(String), /// Bar(PathBuf), /// Bing(), /// } /// /// impl UError for MyError { /// fn usage(&self) -> bool { /// match self { /// // This will have a short usage help appended /// MyError::Bar(_) => true, /// // These matches won't have a short usage help appended /// _ => false, /// } /// } /// } /// /// impl Error for MyError {} /// /// impl Display for MyError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// use MyError as ME; /// match self { /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), /// ME::Bing() => write!(f, "Exterminate!"), /// } /// } /// } /// ``` fn usage(&self) -> bool { false } } impl From for Box where T: UError + 'static, { fn from(t: T) -> Self { Box::new(t) } } /// A simple error type with an exit code and a message that implements [`UError`]. /// /// ``` /// use uucore::error::{UResult, USimpleError}; /// let err = USimpleError { code: 1, message: "error!".into()}; /// let res: UResult<()> = Err(err.into()); /// // or using the `new` method: /// let res: UResult<()> = Err(USimpleError::new(1, "error!")); /// ``` #[derive(Debug)] pub struct USimpleError { pub code: i32, pub message: String, } impl USimpleError { #[allow(clippy::new_ret_no_self)] pub fn new>(code: i32, message: S) -> Box { Box::new(Self { code, message: message.into(), }) } } impl Error for USimpleError {} impl Display for USimpleError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.message.fmt(f) } } impl UError for USimpleError { fn code(&self) -> i32 { self.code } } #[derive(Debug)] pub struct UUsageError { pub code: i32, pub message: String, } impl UUsageError { #[allow(clippy::new_ret_no_self)] pub fn new>(code: i32, message: S) -> Box { Box::new(Self { code, message: message.into(), }) } } impl Error for UUsageError {} impl Display for UUsageError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.message.fmt(f) } } impl UError for UUsageError { fn code(&self) -> i32 { self.code } fn usage(&self) -> bool { true } } /// Wrapper type around [`std::io::Error`]. /// /// The messages displayed by [`UIoError`] should match the error messages displayed by GNU /// coreutils. /// /// There are two ways to construct this type: with [`UIoError::new`] or by calling the /// [`FromIo::map_err_context`] method on a [`std::io::Result`] or [`std::io::Error`]. /// ``` /// use uucore::{ /// display::Quotable, /// error::{FromIo, UResult, UIoError, UError} /// }; /// use std::fs::File; /// use std::path::Path; /// let path = Path::new("test.txt"); /// /// // Manual construction /// let e: Box = UIoError::new( /// std::io::ErrorKind::NotFound, /// format!("cannot access {}", path.quote()) /// ); /// let res: UResult<()> = Err(e.into()); /// /// // Converting from an `std::io::Error`. /// let res: UResult = File::open(path).map_err_context(|| format!("cannot access {}", path.quote())); /// ``` #[derive(Debug)] pub struct UIoError { context: Option, inner: std::io::Error, } impl UIoError { #[allow(clippy::new_ret_no_self)] pub fn new>(kind: std::io::ErrorKind, context: S) -> Box { Box::new(Self { context: Some(context.into()), inner: kind.into(), }) } } impl UError for UIoError {} impl Error for UIoError {} impl Display for UIoError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { use std::io::ErrorKind::*; let message; let message = if self.inner.raw_os_error().is_some() { // These are errors that come directly from the OS. // We want to normalize their messages across systems, // and we want to strip the "(os error X)" suffix. match self.inner.kind() { NotFound => "No such file or directory", PermissionDenied => "Permission denied", ConnectionRefused => "Connection refused", ConnectionReset => "Connection reset", ConnectionAborted => "Connection aborted", NotConnected => "Not connected", AddrInUse => "Address in use", AddrNotAvailable => "Address not available", BrokenPipe => "Broken pipe", AlreadyExists => "Already exists", WouldBlock => "Would block", InvalidInput => "Invalid input", InvalidData => "Invalid data", TimedOut => "Timed out", WriteZero => "Write zero", Interrupted => "Interrupted", UnexpectedEof => "Unexpected end of file", _ => { // TODO: When the new error variants // (https://github.com/rust-lang/rust/issues/86442) // are stabilized, we should add them to the match statement. message = strip_errno(&self.inner); &message } } } else { // These messages don't need as much normalization, and the above // messages wouldn't always be a good substitute. // For example, ErrorKind::NotFound doesn't necessarily mean it was // a file that was not found. // There are also errors with entirely custom messages. message = self.inner.to_string(); &message }; if let Some(ctx) = &self.context { write!(f, "{ctx}: {message}") } else { write!(f, "{message}") } } } /// Strip the trailing " (os error XX)" from io error strings. pub fn strip_errno(err: &std::io::Error) -> String { let mut msg = err.to_string(); if let Some(pos) = msg.find(" (os error ") { msg.truncate(pos); } msg } /// Enables the conversion from [`std::io::Error`] to [`UError`] and from [`std::io::Result`] to /// [`UResult`]. pub trait FromIo { fn map_err_context(self, context: impl FnOnce() -> String) -> T; } impl FromIo> for std::io::Error { fn map_err_context(self, context: impl FnOnce() -> String) -> Box { Box::new(UIoError { context: Some((context)()), inner: self, }) } } impl FromIo> for std::io::Result { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { self.map_err(|e| e.map_err_context(context) as Box) } } impl FromIo> for std::io::ErrorKind { fn map_err_context(self, context: impl FnOnce() -> String) -> Box { Box::new(UIoError { context: Some((context)()), inner: std::io::Error::new(self, ""), }) } } impl From for UIoError { fn from(f: std::io::Error) -> Self { Self { context: None, inner: f, } } } impl From for Box { fn from(f: std::io::Error) -> Self { let u_error: UIoError = f.into(); Box::new(u_error) as Self } } /// Enables the conversion from [`Result`] to [`UResult`]. /// /// # Examples /// /// ``` /// use uucore::error::FromIo; /// use nix::errno::Errno; /// /// let nix_err = Err::<(), nix::Error>(Errno::EACCES); /// let uio_result = nix_err.map_err_context(|| String::from("fix me please!")); /// /// // prints "fix me please!: Permission denied" /// println!("{}", uio_result.unwrap_err()); /// ``` #[cfg(unix)] impl FromIo> for Result { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { self.map_err(|e| { Box::new(UIoError { context: Some((context)()), inner: std::io::Error::from_raw_os_error(e as i32), }) as Box }) } } #[cfg(unix)] impl FromIo> for nix::Error { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { Err(Box::new(UIoError { context: Some((context)()), inner: std::io::Error::from_raw_os_error(self as i32), }) as Box) } } #[cfg(unix)] impl From for UIoError { fn from(f: nix::Error) -> Self { Self { context: None, inner: std::io::Error::from_raw_os_error(f as i32), } } } #[cfg(unix)] impl From for Box { fn from(f: nix::Error) -> Self { let u_error: UIoError = f.into(); Box::new(u_error) as Self } } /// Shorthand to construct [`UIoError`]-instances. /// /// This macro serves as a convenience call to quickly construct instances of /// [`UIoError`]. It takes: /// /// - An instance of [`std::io::Error`] /// - A `format!`-compatible string and /// - An arbitrary number of arguments to the format string /// /// In exactly this order. It is equivalent to the more verbose code seen in the /// example. /// /// # Examples /// /// ``` /// use uucore::error::UIoError; /// use uucore::uio_error; /// /// let io_err = std::io::Error::new( /// std::io::ErrorKind::PermissionDenied, "fix me please!" /// ); /// /// let uio_err = UIoError::new( /// io_err.kind(), /// format!("Error code: {}", 2) /// ); /// /// let other_uio_err = uio_error!(io_err, "Error code: {}", 2); /// /// // prints "fix me please!: Permission denied" /// println!("{}", uio_err); /// // prints "Error code: 2: Permission denied" /// println!("{}", other_uio_err); /// ``` /// /// The [`std::fmt::Display`] impl of [`UIoError`] will then ensure that an /// appropriate error message relating to the actual error kind of the /// [`std::io::Error`] is appended to whatever error message is defined in /// addition (as secondary argument). /// /// If you want to show only the error message for the [`std::io::ErrorKind`] /// that's contained in [`UIoError`], pass the second argument as empty string: /// /// ``` /// use uucore::error::UIoError; /// use uucore::uio_error; /// /// let io_err = std::io::Error::new( /// std::io::ErrorKind::PermissionDenied, "fix me please!" /// ); /// /// let other_uio_err = uio_error!(io_err, ""); /// /// // prints: ": Permission denied" /// println!("{}", other_uio_err); /// ``` //#[macro_use] #[macro_export] macro_rules! uio_error( ($err:expr, $($args:tt)+) => ({ UIoError::new( $err.kind(), format!($($args)+) ) }) ); /// A special error type that does not print any message when returned from /// `uumain`. Especially useful for porting utilities to using [`UResult`]. /// /// There are two ways to construct an [`ExitCode`]: /// ``` /// use uucore::error::{ExitCode, UResult}; /// // Explicit /// let res: UResult<()> = Err(ExitCode(1).into()); /// /// // Using into on `i32`: /// let res: UResult<()> = Err(1.into()); /// ``` /// This type is especially useful for a trivial conversion from utils returning [`i32`] to /// returning [`UResult`]. #[derive(Debug)] pub struct ExitCode(pub i32); impl ExitCode { #[allow(clippy::new_ret_no_self)] pub fn new(code: i32) -> Box { Box::new(Self(code)) } } impl Error for ExitCode {} impl Display for ExitCode { fn fmt(&self, _: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { Ok(()) } } impl UError for ExitCode { fn code(&self) -> i32 { self.0 } } impl From for Box { fn from(i: i32) -> Self { ExitCode::new(i) } } /// A wrapper for `clap::Error` that implements [`UError`] /// /// Contains a custom error code. When `Display::fmt` is called on this struct /// the [`clap::Error`] will be printed _directly to `stdout` or `stderr`_. /// This is because `clap` only supports colored output when it prints directly. /// /// [`ClapErrorWrapper`] is generally created by calling the /// [`UClapError::with_exit_code`] method on [`clap::Error`] or using the [`From`] /// implementation from [`clap::Error`] to `Box`, which constructs /// a [`ClapErrorWrapper`] with an exit code of `1`. /// /// ```rust /// use uucore::error::{ClapErrorWrapper, UError, UClapError}; /// let command = clap::Command::new("test"); /// let result: Result<_, ClapErrorWrapper> = command.try_get_matches().with_exit_code(125); /// /// let command = clap::Command::new("test"); /// let result: Result<_, Box> = command.try_get_matches().map_err(Into::into); /// ``` #[derive(Debug)] pub struct ClapErrorWrapper { code: i32, error: clap::Error, } /// Extension trait for `clap::Error` to adjust the exit code. pub trait UClapError { fn with_exit_code(self, code: i32) -> T; } impl From for Box { fn from(e: clap::Error) -> Self { Box::new(ClapErrorWrapper { code: 1, error: e }) } } impl UClapError for clap::Error { fn with_exit_code(self, code: i32) -> ClapErrorWrapper { ClapErrorWrapper { code, error: self } } } impl UClapError> for Result { fn with_exit_code(self, code: i32) -> Result { self.map_err(|e| e.with_exit_code(code)) } } impl UError for ClapErrorWrapper { fn code(&self) -> i32 { // If the error is a DisplayHelp or DisplayVersion variant, // we don't want to apply the custom error code, but leave // it 0. if let clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion = self.error.kind() { 0 } else { self.code } } } impl Error for ClapErrorWrapper {} // This is abuse of the Display trait impl Display for ClapErrorWrapper { fn fmt(&self, _f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.error.print().unwrap(); Ok(()) } } #[cfg(test)] mod tests { #[test] #[cfg(unix)] fn test_nix_error_conversion() { use super::{FromIo, UIoError}; use nix::errno::Errno; use std::io::ErrorKind; for (nix_error, expected_error_kind) in [ (Errno::EACCES, ErrorKind::PermissionDenied), (Errno::ENOENT, ErrorKind::NotFound), (Errno::EEXIST, ErrorKind::AlreadyExists), ] { let error = UIoError::from(nix_error); assert_eq!(expected_error_kind, error.inner.kind()); } assert_eq!( "test: Permission denied", Err::<(), nix::Error>(Errno::EACCES) .map_err_context(|| String::from("test")) .unwrap_err() .to_string() ); } } uucore-0.0.23/src/lib/mods/line_ending.rs000064400000000000000000000027731046102023000163410ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Provides consistent newline/zero terminator handling for `-z`/`--zero` flags. //! //! See the [`LineEnding`] struct for more information. use std::fmt::Display; /// Line ending of either `\n` or `\0` /// /// Used by various utilities that have the option to separate lines by nul /// characters instead of `\n`. Usually, this is specified with the `-z` or /// `--zero` flag. /// /// The [`Display`] implementation writes the character corresponding to the /// variant to the formatter. #[repr(u8)] #[derive(Clone, Copy, Debug, Default, PartialEq)] pub enum LineEnding { #[default] Newline = b'\n', Nul = 0, } impl Display for LineEnding { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Newline => writeln!(f), Self::Nul => write!(f, "\0"), } } } impl From for u8 { fn from(line_ending: LineEnding) -> Self { line_ending as Self } } impl LineEnding { /// Create a [`LineEnding`] from a `-z`/`--zero` flag /// /// If `is_zero_terminated` is true, [`LineEnding::Nul`] is returned, /// otherwise [`LineEnding::Newline`]. pub fn from_zero_flag(is_zero_terminated: bool) -> Self { if is_zero_terminated { Self::Nul } else { Self::Newline } } } uucore-0.0.23/src/lib/mods/os.rs000064400000000000000000000020761046102023000145030ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. /// Test if the program is running under WSL // ref: @@ // spell-checker:ignore (path) osrelease pub fn is_wsl_1() -> bool { #[cfg(target_os = "linux")] { if is_wsl_2() { return false; } if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { if let Ok(s) = std::str::from_utf8(&b) { let a = s.to_ascii_lowercase(); return a.contains("microsoft") || a.contains("wsl"); } } } false } pub fn is_wsl_2() -> bool { #[cfg(target_os = "linux")] { if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { if let Ok(s) = std::str::from_utf8(&b) { let a = s.to_ascii_lowercase(); return a.contains("wsl2"); } } } false } uucore-0.0.23/src/lib/mods/panic.rs000064400000000000000000000027531046102023000151560ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Custom panic hooks that allow silencing certain types of errors. //! //! Use the [`mute_sigpipe_panic`] function to silence panics caused by //! broken pipe errors. This can happen when a process is still //! producing data when the consuming process terminates and closes the //! pipe. For example, //! //! ```sh //! $ seq inf | head -n 1 //! ``` //! use std::panic; use std::panic::PanicInfo; /// Decide whether a panic was caused by a broken pipe (SIGPIPE) error. fn is_broken_pipe(info: &PanicInfo) -> bool { if let Some(res) = info.payload().downcast_ref::() { if res.contains("BrokenPipe") || res.contains("Broken pipe") { return true; } } false } /// Terminate without error on panics that occur due to broken pipe errors. /// /// For background discussions on `SIGPIPE` handling, see /// /// * `` /// * `` /// * `` /// * `` /// * `` /// pub fn mute_sigpipe_panic() { let hook = panic::take_hook(); panic::set_hook(Box::new(move |info| { if !is_broken_pipe(info) { hook(info); } })); } uucore-0.0.23/src/lib/mods.rs000064400000000000000000000004771046102023000140650ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // mods ~ cross-platforms modules (core/bundler file) pub mod display; pub mod error; pub mod line_ending; pub mod os; pub mod panic; uucore-0.0.23/src/lib/parser/parse_glob.rs000064400000000000000000000070621046102023000165310ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Parsing a glob Pattern from a string. //! //! Use the [`from_str`] function to parse a [`Pattern`] from a string. // cSpell:words fnmatch use glob::{Pattern, PatternError}; fn fix_negation(glob: &str) -> String { let mut chars = glob.chars().collect::>(); let mut i = 0; // Add 3 to prevent out of bounds in loop while i + 3 < chars.len() { if chars[i] == '[' && chars[i + 1] == '^' { match chars[i + 3..].iter().position(|x| *x == ']') { None => (), Some(j) => { chars[i + 1] = '!'; i += j + 4; continue; } } } i += 1; } chars.into_iter().collect::() } /// Parse a glob Pattern from a string. /// /// This function amends the input string to replace any caret or circumflex /// character (^) used to negate a set of characters with an exclamation mark /// (!), which adapts rust's glob matching to function the way the GNU utils' /// fnmatch does. /// /// # Examples /// /// ```rust /// use std::time::Duration; /// use uucore::parse_glob::from_str; /// assert!(!from_str("[^abc]").unwrap().matches("a")); /// assert!(from_str("[^abc]").unwrap().matches("x")); /// ``` pub fn from_str(glob: &str) -> Result { Pattern::new(&fix_negation(glob)) } #[cfg(test)] mod tests { use super::*; #[test] fn test_from_str() { assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap()); } #[test] fn test_fix_negation() { // Happy/Simple case assert_eq!(fix_negation("[^abc]"), "[!abc]"); // Should fix negations in a long regex assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]"); // Should fix multiple negations in a regex assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]"); // Should fix negation of the single character ] assert_eq!(fix_negation("[^]]"), "[!]]"); // Should fix negation of the single character ^ assert_eq!(fix_negation("[^^]"), "[!^]"); // Should fix negation of the space character assert_eq!(fix_negation("[^ ]"), "[! ]"); // Complicated patterns assert_eq!(fix_negation("[^][]"), "[!][]"); assert_eq!(fix_negation("[^[]]"), "[![]]"); // More complex patterns that should be replaced assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]"); assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]"); assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]"); } #[test] fn test_fix_negation_should_not_amend() { assert_eq!(fix_negation("abc"), "abc"); // Regex specifically matches either [ or ^ assert_eq!(fix_negation("[[^]"), "[[^]"); // Regex that specifically matches either space or ^ assert_eq!(fix_negation("[ ^]"), "[ ^]"); // Regex that specifically matches either [, space or ^ assert_eq!(fix_negation("[[ ^]"), "[[ ^]"); assert_eq!(fix_negation("[ [^]"), "[ [^]"); // Invalid globs (according to rust's glob implementation) will remain unamended assert_eq!(fix_negation("[^]"), "[^]"); assert_eq!(fix_negation("[^"), "[^"); assert_eq!(fix_negation("[][^]"), "[][^]"); // Issue #4479 assert_eq!(fix_negation("ààà[^"), "ààà[^"); } } uucore-0.0.23/src/lib/parser/parse_size.rs000064400000000000000000000574121046102023000165640ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) hdsf ghead gtail ACDBK hexdigit use std::error::Error; use std::fmt; use std::num::IntErrorKind; use crate::display::Quotable; /// Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). /// /// The [`Parser::parse`] function performs the parse. #[derive(Default)] pub struct Parser<'parser> { /// Whether to treat the suffix "B" as meaning "bytes". pub capital_b_bytes: bool, /// Whether to treat "b" as a "byte count" instead of "block" pub b_byte_count: bool, /// Whitelist for the suffix pub allow_list: Option<&'parser [&'parser str]>, /// Default unit when no suffix is provided pub default_unit: Option<&'parser str>, } enum NumberSystem { Decimal, Octal, Hexadecimal, } impl<'parser> Parser<'parser> { pub fn with_allow_list(&mut self, allow_list: &'parser [&str]) -> &mut Self { self.allow_list = Some(allow_list); self } pub fn with_default_unit(&mut self, default_unit: &'parser str) -> &mut Self { self.default_unit = Some(default_unit); self } pub fn with_b_byte_count(&mut self, value: bool) -> &mut Self { self.b_byte_count = value; self } /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit /// may be K, M, G, T, P, E, Z, Y, R or Q (powers of 1024), or KB, MB, /// etc. (powers of 1000), or b which is 512. /// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. /// /// # Errors /// /// Will return `ParseSizeError` if it's not possible to parse this /// string into a number, e.g. if the string does not begin with a /// numeral, or if the unit is not one of the supported units described /// in the preceding section. /// /// # Examples /// /// ```rust /// use uucore::parse_size::Parser; /// let parser = Parser { /// default_unit: Some("M"), /// ..Default::default() /// }; /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123M")); // M is 1024^2 /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123")); // default unit set to "M" on parser instance /// assert_eq!(Ok(9 * 1000), parser.parse("9kB")); // kB is 1000 /// assert_eq!(Ok(2 * 1024), parser.parse("2K")); // K is 1024 /// assert_eq!(Ok(44251 * 1024), parser.parse("0xACDBK")); // 0xACDB is 44251 in decimal /// ``` pub fn parse(&self, size: &str) -> Result { if size.is_empty() { return Err(ParseSizeError::parse_failure(size)); } let number_system = Self::determine_number_system(size); // Split the size argument into numeric and unit parts // For example, if the argument is "123K", the numeric part is "123", and // the unit is "K" let numeric_string: String = match number_system { NumberSystem::Hexadecimal => size .chars() .take(2) .chain(size.chars().skip(2).take_while(|c| c.is_ascii_hexdigit())) .collect(), _ => size.chars().take_while(|c| c.is_ascii_digit()).collect(), }; let mut unit: &str = &size[numeric_string.len()..]; if let Some(default_unit) = self.default_unit { // Check if `unit` is empty then assigns `default_unit` to `unit` if unit.is_empty() { unit = default_unit; } } // Check if `b` is a byte count and remove `b` if self.b_byte_count && unit.ends_with('b') { // If `unit` = 'b' then return error if numeric_string.is_empty() { return Err(ParseSizeError::parse_failure(size)); } unit = &unit[0..unit.len() - 1]; } if let Some(allow_list) = self.allow_list { // Check if `unit` appears in `allow_list`, if not return error if !allow_list.contains(&unit) && !unit.is_empty() { if numeric_string.is_empty() { return Err(ParseSizeError::parse_failure(size)); } return Err(ParseSizeError::invalid_suffix(size)); } } // Compute the factor the unit represents. // empty string means the factor is 1. // // The lowercase "b" (used by `od`, `head`, `tail`, etc.) means // "block" and the Posix block size is 512. The uppercase "B" // means "byte". let (base, exponent): (u128, u32) = match unit { "" => (1, 0), "B" if self.capital_b_bytes => (1, 0), "b" => (512, 1), "KiB" | "kiB" | "K" | "k" => (1024, 1), "MiB" | "miB" | "M" | "m" => (1024, 2), "GiB" | "giB" | "G" | "g" => (1024, 3), "TiB" | "tiB" | "T" | "t" => (1024, 4), "PiB" | "piB" | "P" | "p" => (1024, 5), "EiB" | "eiB" | "E" | "e" => (1024, 6), "ZiB" | "ziB" | "Z" | "z" => (1024, 7), "YiB" | "yiB" | "Y" | "y" => (1024, 8), "RiB" | "riB" | "R" | "r" => (1024, 9), "QiB" | "qiB" | "Q" | "q" => (1024, 10), "KB" | "kB" => (1000, 1), "MB" | "mB" => (1000, 2), "GB" | "gB" => (1000, 3), "TB" | "tB" => (1000, 4), "PB" | "pB" => (1000, 5), "EB" | "eB" => (1000, 6), "ZB" | "zB" => (1000, 7), "YB" | "yB" => (1000, 8), "RB" | "rB" => (1000, 9), "QB" | "qB" => (1000, 10), _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), _ => return Err(ParseSizeError::invalid_suffix(size)), }; let factor = base.pow(exponent); // parse string into u128 let number: u128 = match number_system { NumberSystem::Decimal => { if numeric_string.is_empty() { 1 } else { Self::parse_number(&numeric_string, 10, size)? } } NumberSystem::Octal => { let trimmed_string = numeric_string.trim_start_matches('0'); Self::parse_number(trimmed_string, 8, size)? } NumberSystem::Hexadecimal => { let trimmed_string = numeric_string.trim_start_matches("0x"); Self::parse_number(trimmed_string, 16, size)? } }; number .checked_mul(factor) .ok_or_else(|| ParseSizeError::size_too_big(size)) } /// Explicit u128 alias for `parse()` pub fn parse_u128(&self, size: &str) -> Result { self.parse(size) } /// Same as `parse()` but tries to return u64 pub fn parse_u64(&self, size: &str) -> Result { match self.parse(size) { Ok(num_u128) => { let num_u64 = match u64::try_from(num_u128) { Ok(n) => n, Err(_) => return Err(ParseSizeError::size_too_big(size)), }; Ok(num_u64) } Err(e) => Err(e), } } /// Same as `parse_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality /// and GNU test suite checks this behavior for some utils (`split` for example) pub fn parse_u64_max(&self, size: &str) -> Result { let result = self.parse_u64(size); match result { Ok(_) => result, Err(error) => { if let ParseSizeError::SizeTooBig(_) = error { Ok(u64::MAX) } else { Err(error) } } } } /// Same as `parse_u64_max()`, except for u128, i.e. returns `u128::MAX` on overflow pub fn parse_u128_max(&self, size: &str) -> Result { let result = self.parse_u128(size); match result { Ok(_) => result, Err(error) => { if let ParseSizeError::SizeTooBig(_) = error { Ok(u128::MAX) } else { Err(error) } } } } fn determine_number_system(size: &str) -> NumberSystem { if size.len() <= 1 { return NumberSystem::Decimal; } if size.starts_with("0x") { return NumberSystem::Hexadecimal; } let num_digits: usize = size .chars() .take_while(|c| c.is_ascii_digit()) .collect::() .len(); let all_zeros = size.chars().all(|c| c == '0'); if size.starts_with('0') && num_digits > 1 && !all_zeros { return NumberSystem::Octal; } NumberSystem::Decimal } fn parse_number( numeric_string: &str, radix: u32, original_size: &str, ) -> Result { u128::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() { IntErrorKind::PosOverflow => ParseSizeError::size_too_big(original_size), _ => ParseSizeError::ParseFailure(original_size.to_string()), }) } } /// Parse a size string into a number of bytes /// using Default Parser (no custom settings) /// /// # Examples /// /// ```rust /// use uucore::parse_size::parse_size_u128; /// assert_eq!(Ok(123), parse_size_u128("123")); /// assert_eq!(Ok(9 * 1000), parse_size_u128("9kB")); // kB is 1000 /// assert_eq!(Ok(2 * 1024), parse_size_u128("2K")); // K is 1024 /// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); /// ``` pub fn parse_size_u128(size: &str) -> Result { Parser::default().parse(size) } /// Same as `parse_size_u128()`, but for u64 pub fn parse_size_u64(size: &str) -> Result { Parser::default().parse_u64(size) } #[deprecated = "Please use parse_size_u64(size: &str) -> Result OR parse_size_u128(size: &str) -> Result instead."] pub fn parse_size(size: &str) -> Result { parse_size_u64(size) } /// Same as `parse_size_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality /// and GNU test suite checks this behavior for some utils pub fn parse_size_u64_max(size: &str) -> Result { Parser::default().parse_u64_max(size) } /// Same as `parse_size_u128()`, except returns `u128::MAX` on overflow pub fn parse_size_u128_max(size: &str) -> Result { Parser::default().parse_u128_max(size) } #[derive(Debug, PartialEq, Eq)] pub enum ParseSizeError { InvalidSuffix(String), // Suffix ParseFailure(String), // Syntax SizeTooBig(String), // Overflow } impl Error for ParseSizeError { fn description(&self) -> &str { match *self { Self::InvalidSuffix(ref s) => s, Self::ParseFailure(ref s) => s, Self::SizeTooBig(ref s) => s, } } } impl fmt::Display for ParseSizeError { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { let s = match self { Self::InvalidSuffix(s) | Self::ParseFailure(s) | Self::SizeTooBig(s) => s, }; write!(f, "{s}") } } // FIXME: It's more idiomatic to move the formatting into the Display impl, // but there's a lot of downstream code that constructs these errors manually // that would be affected impl ParseSizeError { fn invalid_suffix(s: &str) -> Self { Self::InvalidSuffix(format!("{}", s.quote())) } fn parse_failure(s: &str) -> Self { // stderr on linux (GNU coreutils 8.32) (LC_ALL=C) // has to be handled in the respective uutils because strings differ, e.g.: // // `NUM` // head: invalid number of bytes: '1fb' // tail: invalid number of bytes: '1fb' // // `SIZE` // split: invalid number of bytes: '1fb' // truncate: Invalid number: '1fb' // // `MODE` // stdbuf: invalid mode '1fb' // // `SIZE` // sort: invalid suffix in --buffer-size argument '1fb' // sort: invalid --buffer-size argument 'fb' // // `SIZE` // du: invalid suffix in --buffer-size argument '1fb' // du: invalid suffix in --threshold argument '1fb' // du: invalid --buffer-size argument 'fb' // du: invalid --threshold argument 'fb' // // `BYTES` // od: invalid suffix in --read-bytes argument '1fb' // od: invalid --read-bytes argument argument 'fb' // --skip-bytes // --width // --strings // etc. Self::ParseFailure(format!("{}", s.quote())) } fn size_too_big(s: &str) -> Self { // stderr on linux (GNU coreutils 8.32) (LC_ALL=C) // has to be handled in the respective uutils because strings differ, e.g.: // // head: invalid number of bytes: '1Y': Value too large for defined data type // tail: invalid number of bytes: '1Y': Value too large for defined data type // split: invalid number of bytes: '1Y': Value too large for defined data type // truncate: Invalid number: '1Y': Value too large for defined data type // stdbuf: invalid mode '1Y': Value too large for defined data type // sort: -S argument '1Y' too large // du: -B argument '1Y' too large // od: -N argument '1Y' too large // etc. // // stderr on macos (brew - GNU coreutils 8.32) also differs for the same version, e.g.: // ghead: invalid number of bytes: '1Y': Value too large to be stored in data type // gtail: invalid number of bytes: '1Y': Value too large to be stored in data type Self::SizeTooBig(format!( "{}: Value too large for defined data type", s.quote() )) } } #[cfg(test)] mod tests { use super::*; fn variant_eq(a: &ParseSizeError, b: &ParseSizeError) -> bool { std::mem::discriminant(a) == std::mem::discriminant(b) } #[test] fn all_suffixes() { // Units are K,M,G,T,P,E,Z,Y,R,Q (powers of 1024) or KB,MB,... (powers of 1000). // Binary prefixes can be used, too: KiB=K, MiB=M, and so on. let suffixes = [ ('K', 1u32), ('M', 2u32), ('G', 3u32), ('T', 4u32), ('P', 5u32), ('E', 6u32), ('Z', 7u32), ('Y', 8u32), ('R', 9u32), ('Q', 10u32), ]; for &(c, exp) in &suffixes { let s = format!("2{c}B"); // KB assert_eq!(Ok((2 * (1000_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{c}"); // K assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{c}iB"); // KiB assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("2{}iB", c.to_lowercase()); // kiB assert_eq!(Ok((2 * (1024_u128).pow(exp)) as u128), parse_size_u128(&s)); // suffix only let s = format!("{c}B"); // KB assert_eq!(Ok(((1000_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{c}"); // K assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{c}iB"); // KiB assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); let s = format!("{}iB", c.to_lowercase()); // kiB assert_eq!(Ok(((1024_u128).pow(exp)) as u128), parse_size_u128(&s)); } } #[test] #[cfg(not(target_pointer_width = "128"))] fn overflow_x64() { assert!(parse_size_u64("10000000000000000000000").is_err()); assert!(parse_size_u64("1000000000T").is_err()); assert!(parse_size_u64("100000P").is_err()); assert!(parse_size_u64("100E").is_err()); assert!(parse_size_u64("1Z").is_err()); assert!(parse_size_u64("1Y").is_err()); assert!(parse_size_u64("1R").is_err()); assert!(parse_size_u64("1Q").is_err()); assert!(variant_eq( &parse_size_u64("1Z").unwrap_err(), &ParseSizeError::SizeTooBig(String::new()) )); assert_eq!( ParseSizeError::SizeTooBig("'1Y': Value too large for defined data type".to_string()), parse_size_u64("1Y").unwrap_err() ); assert_eq!( ParseSizeError::SizeTooBig("'1R': Value too large for defined data type".to_string()), parse_size_u64("1R").unwrap_err() ); assert_eq!( ParseSizeError::SizeTooBig("'1Q': Value too large for defined data type".to_string()), parse_size_u64("1Q").unwrap_err() ); } #[test] #[cfg(not(target_pointer_width = "128"))] fn overflow_to_max_u64() { assert_eq!(Ok(1_099_511_627_776), parse_size_u64_max("1T")); assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64_max("1P")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("18446744073709551616")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("10000000000000000000000")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Y")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1R")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Q")); } #[test] #[cfg(not(target_pointer_width = "128"))] fn overflow_to_max_u128() { assert_eq!( Ok(12_379_400_392_853_802_748_991_242_240), parse_size_u128_max("10R") ); assert_eq!( Ok(12_676_506_002_282_294_014_967_032_053_760), parse_size_u128_max("10Q") ); assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000000R")); assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000Q")); } #[test] fn invalid_suffix() { let test_strings = ["5mib", "1eb", "1H"]; for &test_string in &test_strings { assert_eq!( parse_size_u64(test_string).unwrap_err(), ParseSizeError::InvalidSuffix(format!("{}", test_string.quote())) ); } } #[test] fn invalid_syntax() { let test_strings = ["biB", "-", "+", "", "-1", "∞"]; for &test_string in &test_strings { assert_eq!( parse_size_u64(test_string).unwrap_err(), ParseSizeError::ParseFailure(format!("{}", test_string.quote())) ); } } #[test] fn b_suffix() { assert_eq!(Ok(3 * 512), parse_size_u64("3b")); // b is 512 } #[test] fn no_suffix() { assert_eq!(Ok(1234), parse_size_u64("1234")); assert_eq!(Ok(0), parse_size_u64("0")); assert_eq!(Ok(5), parse_size_u64("5")); assert_eq!(Ok(999), parse_size_u64("999")); } #[test] fn kilobytes_suffix() { assert_eq!(Ok(123 * 1000), parse_size_u64("123KB")); // KB is 1000 assert_eq!(Ok(9 * 1000), parse_size_u64("9kB")); // kB is 1000 assert_eq!(Ok(2 * 1024), parse_size_u64("2K")); // K is 1024 assert_eq!(Ok(0), parse_size_u64("0K")); assert_eq!(Ok(0), parse_size_u64("0KB")); assert_eq!(Ok(1000), parse_size_u64("KB")); assert_eq!(Ok(1024), parse_size_u64("K")); assert_eq!(Ok(2000), parse_size_u64("2kB")); assert_eq!(Ok(4000), parse_size_u64("4KB")); } #[test] fn megabytes_suffix() { assert_eq!(Ok(123 * 1024 * 1024), parse_size_u64("123M")); assert_eq!(Ok(123 * 1000 * 1000), parse_size_u64("123MB")); assert_eq!(Ok(1024 * 1024), parse_size_u64("M")); assert_eq!(Ok(1000 * 1000), parse_size_u64("MB")); assert_eq!(Ok(2 * 1_048_576), parse_size_u64("2m")); assert_eq!(Ok(4 * 1_048_576), parse_size_u64("4M")); assert_eq!(Ok(2_000_000), parse_size_u64("2mB")); assert_eq!(Ok(4_000_000), parse_size_u64("4MB")); } #[test] fn gigabytes_suffix() { assert_eq!(Ok(1_073_741_824), parse_size_u64("1G")); assert_eq!(Ok(2_000_000_000), parse_size_u64("2GB")); } #[test] #[cfg(target_pointer_width = "64")] fn x64() { assert_eq!(Ok(1_099_511_627_776), parse_size_u64("1T")); assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64("1P")); assert_eq!(Ok(1_152_921_504_606_846_976), parse_size_u64("1E")); assert_eq!(Ok(1_180_591_620_717_411_303_424), parse_size_u128("1Z")); assert_eq!(Ok(1_208_925_819_614_629_174_706_176), parse_size_u128("1Y")); assert_eq!( Ok(1_237_940_039_285_380_274_899_124_224), parse_size_u128("1R") ); assert_eq!( Ok(1_267_650_600_228_229_401_496_703_205_376), parse_size_u128("1Q") ); assert_eq!(Ok(2_000_000_000_000), parse_size_u64("2TB")); assert_eq!(Ok(2_000_000_000_000_000), parse_size_u64("2PB")); assert_eq!(Ok(2_000_000_000_000_000_000), parse_size_u64("2EB")); assert_eq!(Ok(2_000_000_000_000_000_000_000), parse_size_u128("2ZB")); assert_eq!( Ok(2_000_000_000_000_000_000_000_000), parse_size_u128("2YB") ); assert_eq!( Ok(2_000_000_000_000_000_000_000_000_000), parse_size_u128("2RB") ); assert_eq!( Ok(2_000_000_000_000_000_000_000_000_000_000), parse_size_u128("2QB") ); } #[test] fn parse_size_options() { let mut parser = Parser::default(); parser .with_allow_list(&["k", "K", "G", "MB", "M"]) .with_default_unit("K"); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); assert_eq!(Ok(1000 * 1000), parser.parse("1MB")); assert_eq!(Ok(1024 * 1024), parser.parse("1M")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1G")); assert!(parser.parse("1T").is_err()); assert!(parser.parse("1P").is_err()); assert!(parser.parse("1E").is_err()); parser .with_allow_list(&[ "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", ]) .with_default_unit("K") .with_b_byte_count(true); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); assert_eq!(Ok(1000 * 1000), parser.parse("1MB")); assert_eq!(Ok(1024 * 1024), parser.parse("1M")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1G")); assert_eq!( Ok(1_237_940_039_285_380_274_899_124_224), parser.parse_u128("1R") ); assert_eq!( Ok(1_267_650_600_228_229_401_496_703_205_376), parser.parse_u128("1Q") ); assert_eq!(Ok(1), parser.parse("1b")); assert_eq!(Ok(1024), parser.parse("1024b")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1024Mb")); assert!(parser.parse("b").is_err()); assert!(parser.parse("1B").is_err()); assert!(parser.parse("B").is_err()); } #[test] fn parse_octal_size() { assert_eq!(Ok(63), parse_size_u64("077")); assert_eq!(Ok(528), parse_size_u64("01020")); assert_eq!(Ok(668 * 1024), parse_size_u128("01234K")); } #[test] fn parse_hex_size() { assert_eq!(Ok(10), parse_size_u64("0xA")); assert_eq!(Ok(94722), parse_size_u64("0x17202")); assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); } } uucore-0.0.23/src/lib/parser/parse_time.rs000064400000000000000000000075661046102023000165550ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) NANOS numstr //! Parsing a duration from a string. //! //! Use the [`from_str`] function to parse a [`Duration`] from a string. use std::time::Duration; use crate::display::Quotable; /// Parse a duration from a string. /// /// The string may contain only a number, like "123" or "4.5", or it /// may contain a number with a unit specifier, like "123s" meaning /// one hundred twenty three seconds or "4.5d" meaning four and a half /// days. If no unit is specified, the unit is assumed to be seconds. /// /// The only allowed suffixes are /// /// * "s" for seconds, /// * "m" for minutes, /// * "h" for hours, /// * "d" for days. /// /// This function uses [`Duration::saturating_mul`] to compute the /// number of seconds, so it does not overflow. If overflow would have /// occurred, [`Duration::MAX`] is returned instead. /// /// # Errors /// /// This function returns an error if the input string is empty, the /// input is not a valid number, or the unit specifier is invalid or /// unknown. /// /// # Examples /// /// ```rust /// use std::time::Duration; /// use uucore::parse_time::from_str; /// assert_eq!(from_str("123"), Ok(Duration::from_secs(123))); /// assert_eq!(from_str("2d"), Ok(Duration::from_secs(60 * 60 * 24 * 2))); /// ``` pub fn from_str(string: &str) -> Result { let len = string.len(); if len == 0 { return Err("empty string".to_owned()); } let slice = match string.get(..len - 1) { Some(s) => s, None => return Err(format!("invalid time interval {}", string.quote())), }; let (numstr, times) = match string.chars().next_back().unwrap() { 's' => (slice, 1), 'm' => (slice, 60), 'h' => (slice, 60 * 60), 'd' => (slice, 60 * 60 * 24), val if !val.is_alphabetic() => (string, 1), _ => { if string == "inf" || string == "infinity" { ("inf", 1) } else { return Err(format!("invalid time interval {}", string.quote())); } } }; let num = numstr .parse::() .map_err(|e| format!("invalid time interval {}: {}", string.quote(), e))?; if num < 0. { return Err(format!("invalid time interval {}", string.quote())); } const NANOS_PER_SEC: u32 = 1_000_000_000; let whole_secs = num.trunc(); let nanos = (num.fract() * (NANOS_PER_SEC as f64)).trunc(); let duration = Duration::new(whole_secs as u64, nanos as u32); Ok(duration.saturating_mul(times)) } #[cfg(test)] mod tests { use crate::parse_time::from_str; use std::time::Duration; #[test] fn test_no_units() { assert_eq!(from_str("123"), Ok(Duration::from_secs(123))); } #[test] fn test_units() { assert_eq!(from_str("2d"), Ok(Duration::from_secs(60 * 60 * 24 * 2))); } #[test] fn test_saturating_mul() { assert_eq!(from_str("9223372036854775808d"), Ok(Duration::MAX)); } #[test] fn test_error_empty() { assert!(from_str("").is_err()); } #[test] fn test_error_invalid_unit() { assert!(from_str("123X").is_err()); } #[test] fn test_error_multi_bytes_characters() { assert!(from_str("10€").is_err()); } #[test] fn test_error_invalid_magnitude() { assert!(from_str("12abc3s").is_err()); } #[test] fn test_negative() { assert!(from_str("-1").is_err()); } /// Test that capital letters are not allowed in suffixes. #[test] fn test_no_capital_letters() { assert!(from_str("1S").is_err()); assert!(from_str("1M").is_err()); assert!(from_str("1H").is_err()); assert!(from_str("1D").is_err()); } } uucore-0.0.23/src/lib/parser/shortcut_value_parser.rs000064400000000000000000000121161046102023000210330ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore abcdefgh use clap::{ builder::{PossibleValue, TypedValueParser}, error::{ContextKind, ContextValue, ErrorKind}, }; #[derive(Clone)] pub struct ShortcutValueParser(Vec); /// `ShortcutValueParser` is similar to clap's `PossibleValuesParser`: it verifies that the value is /// from an enumerated set of `PossibleValue`. /// /// Whereas `PossibleValuesParser` only accepts exact matches, `ShortcutValueParser` also accepts /// shortcuts as long as they are unambiguous. impl ShortcutValueParser { pub fn new(values: impl Into) -> Self { values.into() } fn generate_clap_error( &self, cmd: &clap::Command, arg: Option<&clap::Arg>, value: &str, ) -> clap::Error { let mut err = clap::Error::new(ErrorKind::InvalidValue).with_cmd(cmd); if let Some(arg) = arg { err.insert( ContextKind::InvalidArg, ContextValue::String(arg.to_string()), ); } err.insert( ContextKind::InvalidValue, ContextValue::String(value.to_string()), ); err.insert( ContextKind::ValidValue, ContextValue::Strings(self.0.iter().map(|x| x.get_name().to_string()).collect()), ); err } } impl TypedValueParser for ShortcutValueParser { type Value = String; fn parse_ref( &self, cmd: &clap::Command, arg: Option<&clap::Arg>, value: &std::ffi::OsStr, ) -> Result { let value = value .to_str() .ok_or(clap::Error::new(ErrorKind::InvalidUtf8))?; let matched_values: Vec<_> = self .0 .iter() .filter(|x| x.get_name().starts_with(value)) .collect(); match matched_values.len() { 0 => Err(self.generate_clap_error(cmd, arg, value)), 1 => Ok(matched_values[0].get_name().to_string()), _ => { if let Some(direct_match) = matched_values.iter().find(|x| x.get_name() == value) { Ok(direct_match.get_name().to_string()) } else { Err(self.generate_clap_error(cmd, arg, value)) } } } } fn possible_values(&self) -> Option + '_>> { Some(Box::new(self.0.iter().cloned())) } } impl From for ShortcutValueParser where I: IntoIterator, T: Into, { fn from(values: I) -> Self { Self(values.into_iter().map(|t| t.into()).collect()) } } #[cfg(test)] mod tests { use std::ffi::OsStr; use clap::{builder::TypedValueParser, error::ErrorKind, Command}; use super::ShortcutValueParser; #[test] fn test_parse_ref() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd"]); let values = ["a", "ab", "abc", "abcd"]; for value in values { let result = parser.parse_ref(&cmd, None, OsStr::new(value)); assert_eq!("abcd", result.unwrap()); } } #[test] fn test_parse_ref_with_invalid_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd"]); let invalid_values = ["e", "abe", "abcde"]; for invalid_value in invalid_values { let result = parser.parse_ref(&cmd, None, OsStr::new(invalid_value)); assert_eq!(ErrorKind::InvalidValue, result.unwrap_err().kind()); } } #[test] fn test_parse_ref_with_ambiguous_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd", "abef"]); let ambiguous_values = ["a", "ab"]; for ambiguous_value in ambiguous_values { let result = parser.parse_ref(&cmd, None, OsStr::new(ambiguous_value)); assert_eq!(ErrorKind::InvalidValue, result.unwrap_err().kind()); } let result = parser.parse_ref(&cmd, None, OsStr::new("abc")); assert_eq!("abcd", result.unwrap()); let result = parser.parse_ref(&cmd, None, OsStr::new("abe")); assert_eq!("abef", result.unwrap()); } #[test] fn test_parse_ref_with_ambiguous_value_that_is_a_possible_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd", "abcdefgh"]); let result = parser.parse_ref(&cmd, None, OsStr::new("abcd")); assert_eq!("abcd", result.unwrap()); } #[test] #[cfg(unix)] fn test_parse_ref_with_invalid_utf8() { use std::os::unix::prelude::OsStrExt; let parser = ShortcutValueParser::new(["abcd"]); let cmd = Command::new("cmd"); let result = parser.parse_ref(&cmd, None, OsStr::from_bytes(&[0xc3 as u8, 0x28 as u8])); assert_eq!(ErrorKind::InvalidUtf8, result.unwrap_err().kind()); } } uucore-0.0.23/src/lib/parser.rs000064400000000000000000000004231046102023000144060ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. pub mod parse_glob; pub mod parse_size; pub mod parse_time; pub mod shortcut_value_parser;