uucore-0.0.30/.cargo_vcs_info.json0000644000000001500000000000100124120ustar { "git": { "sha1": "088599f41602e0b0505543a010ec59f5f81e74b1" }, "path_in_vcs": "src/uucore" }uucore-0.0.30/Cargo.toml0000644000000114770000000000100104260ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "uucore" version = "0.0.30" authors = ["uutils developers"] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "uutils ~ 'core' uutils code library (cross-platform)" homepage = "https://github.com/uutils/coreutils" readme = false keywords = [ "coreutils", "uutils", "cross-platform", "cli", "utility", ] categories = ["command-line-utilities"] license = "MIT" repository = "https://github.com/uutils/coreutils/tree/main/src/uucore" [package.metadata.docs.rs] all-features = true [lib] name = "uucore" path = "src/lib/lib.rs" [dependencies.blake2b_simd] version = "1.0.2" optional = true [dependencies.blake3] version = "1.5.1" optional = true [dependencies.chrono] version = "0.4.38" features = [ "std", "alloc", "clock", ] default-features = false [dependencies.chrono-tz] version = "0.10.0" [dependencies.clap] version = "4.5" features = [ "wrap_help", "cargo", ] [dependencies.crc32fast] version = "1.4.2" optional = true [dependencies.data-encoding] version = "2.6" optional = true [dependencies.data-encoding-macro] version = "0.1.15" optional = true [dependencies.digest] version = "0.10.7" optional = true [dependencies.dns-lookup] version = "2.0.4" optional = true [dependencies.dunce] version = "1.0.4" optional = true [dependencies.glob] version = "0.3.1" [dependencies.hex] version = "0.4.3" optional = true [dependencies.iana-time-zone] version = "0.1.57" [dependencies.itertools] version = "0.14.0" optional = true [dependencies.libc] version = "0.2.153" optional = true [dependencies.md-5] version = "0.10.6" optional = true [dependencies.memchr] version = "2.7.2" optional = true [dependencies.number_prefix] version = "0.4" [dependencies.os_display] version = "0.1.3" [dependencies.regex] version = "1.10.4" optional = true [dependencies.sha1] version = "0.10.6" optional = true [dependencies.sha2] version = "0.10.8" optional = true [dependencies.sha3] version = "0.10.8" optional = true [dependencies.sm3] version = "0.4.2" optional = true [dependencies.thiserror] version = "2.0.3" optional = true [dependencies.time] version = "0.3.36" features = [ "formatting", "local-offset", "macros", ] optional = true [dependencies.uucore_procs] version = "0.0.30" package = "uucore_procs" [dependencies.wild] version = "2.2.1" [dependencies.z85] version = "3.0.5" optional = true [dev-dependencies.clap] version = "4.5" features = [ "wrap_help", "cargo", ] [dev-dependencies.tempfile] version = "3.15.0" [features] backup-control = [] buf-copy = [] checksum = [ "data-encoding", "thiserror", "regex", "sum", ] colors = [] custom-tz-fmt = [] default = [] encoding = [ "data-encoding", "data-encoding-macro", "z85", ] entries = ["libc"] format = [ "itertools", "quoting-style", ] fs = [ "dunce", "libc", "winapi-util", "windows-sys", ] fsext = [ "libc", "windows-sys", ] fsxattr = ["xattr"] lines = [] mode = ["libc"] perms = [ "entries", "libc", "walkdir", ] pipes = [] proc-info = [ "tty", "walkdir", ] process = ["libc"] quoting-style = [] ranges = [] ringbuffer = [] signals = [] sum = [ "digest", "hex", "memchr", "md-5", "sha1", "sha2", "sha3", "blake2b_simd", "blake3", "sm3", "crc32fast", ] tty = [] update-control = [] uptime = [ "libc", "windows-sys", "utmpx", "utmp-classic", "thiserror", ] utf8 = [] utmpx = [ "time", "time/macros", "libc", "dns-lookup", ] version-cmp = [] wide = [] [target.'cfg(target_os = "openbsd")'.dependencies.utmp-classic] version = "0.1.6" optional = true [target.'cfg(target_os = "windows")'.dependencies.winapi-util] version = "0.1.8" optional = true [target.'cfg(target_os = "windows")'.dependencies.windows-sys] version = "0.59.0" features = [ "Wdk_System_SystemInformation", "Win32_Storage_FileSystem", "Win32_Foundation", "Win32_System_RemoteDesktop", "Win32_System_WindowsProgramming", ] optional = true default-features = false [target."cfg(unix)".dependencies.nix] version = "0.29" features = [ "fs", "uio", "zerocopy", "signal", ] default-features = false [target."cfg(unix)".dependencies.walkdir] version = "2.5" optional = true [target."cfg(unix)".dependencies.xattr] version = "1.3.1" optional = true uucore-0.0.30/Cargo.toml.orig000064400000000000000000000070331046102023000141000ustar 00000000000000# spell-checker:ignore (features) zerocopy [package] name = "uucore" version = "0.0.30" authors = ["uutils developers"] license = "MIT" description = "uutils ~ 'core' uutils code library (cross-platform)" homepage = "https://github.com/uutils/coreutils" repository = "https://github.com/uutils/coreutils/tree/main/src/uucore" # readme = "README.md" keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"] categories = ["command-line-utilities"] edition = "2021" [package.metadata.docs.rs] all-features = true [lib] path = "src/lib/lib.rs" [dependencies] chrono = { workspace = true } chrono-tz = { workspace = true } clap = { workspace = true } uucore_procs = { workspace = true } number_prefix = { workspace = true } dns-lookup = { workspace = true, optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2.1" glob = { workspace = true } iana-time-zone = { workspace = true } # * optional itertools = { workspace = true, optional = true } thiserror = { workspace = true, optional = true } time = { workspace = true, optional = true, features = [ "formatting", "local-offset", "macros", ] } # * "problem" dependencies (pinned) data-encoding = { version = "2.6", optional = true } data-encoding-macro = { version = "0.1.15", optional = true } z85 = { version = "3.0.5", optional = true } libc = { workspace = true, optional = true } os_display = "0.1.3" digest = { workspace = true, optional = true } hex = { workspace = true, optional = true } memchr = { workspace = true, optional = true } md-5 = { workspace = true, optional = true } sha1 = { workspace = true, optional = true } sha2 = { workspace = true, optional = true } sha3 = { workspace = true, optional = true } blake2b_simd = { workspace = true, optional = true } blake3 = { workspace = true, optional = true } sm3 = { workspace = true, optional = true } crc32fast = { workspace = true, optional = true } regex = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] walkdir = { workspace = true, optional = true } nix = { workspace = true, features = ["fs", "uio", "zerocopy", "signal"] } xattr = { workspace = true, optional = true } [dev-dependencies] clap = { workspace = true } tempfile = { workspace = true } [target.'cfg(target_os = "windows")'.dependencies] winapi-util = { workspace = true, optional = true } windows-sys = { workspace = true, optional = true, default-features = false, features = [ "Wdk_System_SystemInformation", "Win32_Storage_FileSystem", "Win32_Foundation", "Win32_System_RemoteDesktop", "Win32_System_WindowsProgramming", ] } [target.'cfg(target_os = "openbsd")'.dependencies] utmp-classic = { workspace = true, optional = true } [features] default = [] # * non-default features backup-control = [] colors = [] checksum = ["data-encoding", "thiserror", "regex", "sum"] encoding = ["data-encoding", "data-encoding-macro", "z85"] entries = ["libc"] fs = ["dunce", "libc", "winapi-util", "windows-sys"] fsext = ["libc", "windows-sys"] fsxattr = ["xattr"] lines = [] format = ["itertools", "quoting-style"] mode = ["libc"] perms = ["entries", "libc", "walkdir"] buf-copy = [] pipes = [] process = ["libc"] proc-info = ["tty", "walkdir"] quoting-style = [] ranges = [] ringbuffer = [] signals = [] sum = [ "digest", "hex", "memchr", "md-5", "sha1", "sha2", "sha3", "blake2b_simd", "blake3", "sm3", "crc32fast", ] update-control = [] utf8 = [] utmpx = ["time", "time/macros", "libc", "dns-lookup"] version-cmp = [] wide = [] custom-tz-fmt = [] tty = [] uptime = ["libc", "windows-sys", "utmpx", "utmp-classic", "thiserror"] uucore-0.0.30/LICENSE000064400000000000000000000020401046102023000122070ustar 00000000000000Copyright (c) uutils developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. uucore-0.0.30/src/lib/features/backup_control.rs000064400000000000000000000605371046102023000177470ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Implement GNU-style backup functionality. //! //! This module implements the backup functionality as described in the [GNU //! manual][1]. It provides //! //! - pre-defined [`clap`-Arguments][2] for inclusion in utilities that //! implement backups //! - determination of the [backup mode][3] //! - determination of the [backup suffix][4] //! - [backup target path construction][5] //! - [Error types][6] for backup-related errors //! - GNU-compliant [help texts][7] for backup-related errors //! //! Backup-functionality is implemented by the following utilities: //! //! - `cp` //! - `install` //! - `ln` //! - `mv` //! //! //! [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html //! [2]: arguments //! [3]: `determine_backup_mode()` //! [4]: `determine_backup_suffix()` //! [5]: `get_backup_path()` //! [6]: `BackupError` //! [7]: `BACKUP_CONTROL_LONG_HELP` //! //! //! # Usage example //! //! ``` //! #[macro_use] //! extern crate uucore; //! //! use clap::{Command, Arg, ArgMatches}; //! use std::path::{Path, PathBuf}; //! use uucore::backup_control::{self, BackupMode}; //! use uucore::error::{UError, UResult}; //! //! fn main() { //! let usage = String::from("command [OPTION]... ARG"); //! let long_usage = String::from("And here's a detailed explanation"); //! //! let matches = Command::new("command") //! .arg(backup_control::arguments::backup()) //! .arg(backup_control::arguments::backup_no_args()) //! .arg(backup_control::arguments::suffix()) //! .override_usage(usage) //! .after_help(format!( //! "{}\n{}", //! long_usage, //! backup_control::BACKUP_CONTROL_LONG_HELP //! )) //! .get_matches_from(vec![ //! "command", "--backup=t", "--suffix=bak~" //! ]); //! //! let backup_mode = match backup_control::determine_backup_mode(&matches) { //! Err(e) => { //! show!(e); //! return; //! }, //! Ok(mode) => mode, //! }; //! let backup_suffix = backup_control::determine_backup_suffix(&matches); //! let target_path = Path::new("/tmp/example"); //! //! let backup_path = backup_control::get_backup_path( //! backup_mode, target_path, &backup_suffix //! ); //! //! // Perform your backups here. //! //! } //! ``` // spell-checker:ignore backupopt use crate::{ display::Quotable, error::{UError, UResult}, }; use clap::ArgMatches; use std::{ env, error::Error, fmt::{Debug, Display}, path::{Path, PathBuf}, }; pub static BACKUP_CONTROL_VALUES: &[&str] = &[ "simple", "never", "numbered", "t", "existing", "nil", "none", "off", ]; pub const BACKUP_CONTROL_LONG_HELP: &str = "The backup suffix is '~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX. The version control method may be selected via the --backup option or through the VERSION_CONTROL environment variable. Here are the values: none, off never make backups (even if --backup is given) numbered, t make numbered backups existing, nil numbered if numbered backups exist, simple otherwise simple, never always make simple backups"; static VALID_ARGS_HELP: &str = "Valid arguments are: - 'none', 'off' - 'simple', 'never' - 'existing', 'nil' - 'numbered', 't'"; /// Available backup modes. /// /// The mapping of the backup modes to the CLI arguments is annotated on the /// enum variants. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum BackupMode { /// Argument 'none', 'off' NoBackup, /// Argument 'simple', 'never' SimpleBackup, /// Argument 'numbered', 't' NumberedBackup, /// Argument 'existing', 'nil' ExistingBackup, } /// Backup error types. /// /// Errors are currently raised by [`determine_backup_mode`] only. All errors /// are implemented as [`UError`] for uniform handling across utilities. #[derive(Debug, Eq, PartialEq)] pub enum BackupError { /// An invalid argument (e.g. 'foo') was given as backup type. First /// parameter is the argument, second is the arguments origin (CLI or /// ENV-var) InvalidArgument(String, String), /// An ambiguous argument (e.g. 'n') was given as backup type. First /// parameter is the argument, second is the arguments origin (CLI or /// ENV-var) AmbiguousArgument(String, String), /// Currently unused BackupImpossible(), // BackupFailed(PathBuf, PathBuf, std::io::Error), } impl UError for BackupError { fn code(&self) -> i32 { match self { Self::BackupImpossible() => 2, _ => 1, } } fn usage(&self) -> bool { // Suggested by clippy. matches!( self, Self::InvalidArgument(_, _) | Self::AmbiguousArgument(_, _) ) } } impl Error for BackupError {} impl Display for BackupError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::InvalidArgument(arg, origin) => write!( f, "invalid argument {} for '{}'\n{}", arg.quote(), origin, VALID_ARGS_HELP ), Self::AmbiguousArgument(arg, origin) => write!( f, "ambiguous argument {} for '{}'\n{}", arg.quote(), origin, VALID_ARGS_HELP ), Self::BackupImpossible() => write!(f, "cannot create backup"), // Placeholder for later // Self::BackupFailed(from, to, e) => Display::fmt( // &uio_error!(e, "failed to backup {} to {}", from.quote(), to.quote()), // f // ), } } } /// Arguments for backup-related functionality. /// /// Rather than implementing the `clap`-Arguments for every utility, it is /// recommended to include the `clap` arguments via the functions provided here. /// This way the backup-specific arguments are handled uniformly across /// utilities and can be maintained in one central place. pub mod arguments { use clap::ArgAction; pub static OPT_BACKUP: &str = "backupopt_backup"; pub static OPT_BACKUP_NO_ARG: &str = "backupopt_b"; pub static OPT_SUFFIX: &str = "backupopt_suffix"; /// '--backup' argument pub fn backup() -> clap::Arg { clap::Arg::new(OPT_BACKUP) .long("backup") .help("make a backup of each existing destination file") .action(clap::ArgAction::Set) .require_equals(true) .num_args(0..=1) .value_name("CONTROL") } /// '-b' argument pub fn backup_no_args() -> clap::Arg { clap::Arg::new(OPT_BACKUP_NO_ARG) .short('b') .help("like --backup but does not accept an argument") .action(ArgAction::SetTrue) } /// '-S, --suffix' argument pub fn suffix() -> clap::Arg { clap::Arg::new(OPT_SUFFIX) .short('S') .long("suffix") .help("override the usual backup suffix") .action(clap::ArgAction::Set) .value_name("SUFFIX") .allow_hyphen_values(true) } } /// Obtain the suffix to use for a backup. /// /// In order of precedence, this function obtains the backup suffix /// /// 1. From the '-S' or '--suffix' CLI argument, if present /// 2. From the "SIMPLE_BACKUP_SUFFIX" environment variable, if present /// 3. By using the default '~' if none of the others apply /// /// This function directly takes [`clap::ArgMatches`] as argument and looks for /// the '-S' and '--suffix' arguments itself. pub fn determine_backup_suffix(matches: &ArgMatches) -> String { let supplied_suffix = matches.get_one::(arguments::OPT_SUFFIX); if let Some(suffix) = supplied_suffix { String::from(suffix) } else { env::var("SIMPLE_BACKUP_SUFFIX").unwrap_or_else(|_| "~".to_owned()) } } /// Determine the "mode" for the backup operation to perform, if any. /// /// Parses the backup options according to the [GNU manual][1], and converts /// them to an instance of `BackupMode` for further processing. /// /// Takes [`clap::ArgMatches`] as argument which **must** contain the options /// from [`arguments::backup()`] and [`arguments::backup_no_args()`]. Otherwise /// the `NoBackup` mode is returned unconditionally. /// /// It is recommended for anyone who would like to implement the /// backup-functionality to use the arguments prepared in the `arguments` /// submodule (see examples) /// /// [1]: https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html /// /// /// # Errors /// /// If an argument supplied directly to the long `backup` option, or read in /// through the `VERSION CONTROL` env var is ambiguous (i.e. may resolve to /// multiple backup modes) or invalid, an [`InvalidArgument`][10] or /// [`AmbiguousArgument`][11] error is returned, respectively. /// /// [10]: BackupError::InvalidArgument /// [11]: BackupError::AmbiguousArgument /// /// /// # Examples /// /// Here's how one would integrate the backup mode determination into an /// application. /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::backup_control::{self, BackupMode}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(backup_control::arguments::backup()) /// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "command", "-b", "--backup=t" /// ]); /// /// let backup_mode = backup_control::determine_backup_mode(&matches).unwrap(); /// assert_eq!(backup_mode, BackupMode::NumberedBackup) /// } /// ``` /// /// This example shows an ambiguous input, as 'n' may resolve to 4 different /// backup modes. /// /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::backup_control::{self, BackupMode, BackupError}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(backup_control::arguments::backup()) /// .arg(backup_control::arguments::backup_no_args()) /// .get_matches_from(vec![ /// "command", "-b", "--backup=n" /// ]); /// /// let backup_mode = backup_control::determine_backup_mode(&matches); /// /// assert!(backup_mode.is_err()); /// let err = backup_mode.unwrap_err(); /// // assert_eq!(err, BackupError::AmbiguousArgument); /// // Use uucore functionality to show the error to the user /// show!(err); /// } /// ``` pub fn determine_backup_mode(matches: &ArgMatches) -> UResult { if matches.contains_id(arguments::OPT_BACKUP) { // Use method to determine the type of backups to make. When this option // is used but method is not specified, then the value of the // VERSION_CONTROL environment variable is used. And if VERSION_CONTROL // is not set, the default backup type is 'existing'. if let Some(method) = matches.get_one::(arguments::OPT_BACKUP) { // Second argument is for the error string that is returned. match_method(method, "backup type") } else if let Ok(method) = env::var("VERSION_CONTROL") { // Second argument is for the error string that is returned. match_method(&method, "$VERSION_CONTROL") } else { // Default if no argument is provided to '--backup' Ok(BackupMode::ExistingBackup) } } else if matches.get_flag(arguments::OPT_BACKUP_NO_ARG) { // the short form of this option, -b does not accept any argument. // if VERSION_CONTROL is not set then using -b is equivalent to // using --backup=existing. if let Ok(method) = env::var("VERSION_CONTROL") { match_method(&method, "$VERSION_CONTROL") } else { Ok(BackupMode::ExistingBackup) } } else { // No option was present at all Ok(BackupMode::NoBackup) } } /// Match a backup option string to a `BackupMode`. /// /// The GNU manual specifies that abbreviations to options are valid as long as /// they aren't ambiguous. This function matches the given `method` argument /// against all valid backup options (via `starts_with`), and returns a valid /// [`BackupMode`] if exactly one backup option matches the `method` given. /// /// `origin` is required in order to format the generated error message /// properly, when an error occurs. /// /// /// # Errors /// /// If `method` is invalid or ambiguous (i.e. may resolve to multiple backup /// modes), an [`InvalidArgument`][10] or [`AmbiguousArgument`][11] error is /// returned, respectively. /// /// [10]: BackupError::InvalidArgument /// [11]: BackupError::AmbiguousArgument fn match_method(method: &str, origin: &str) -> UResult { let matches: Vec<&&str> = BACKUP_CONTROL_VALUES .iter() .filter(|val| val.starts_with(method)) .collect(); if matches.len() == 1 { match *matches[0] { "simple" | "never" => Ok(BackupMode::SimpleBackup), "numbered" | "t" => Ok(BackupMode::NumberedBackup), "existing" | "nil" => Ok(BackupMode::ExistingBackup), "none" | "off" => Ok(BackupMode::NoBackup), _ => unreachable!(), // cannot happen as we must have exactly one match // from the list above. } } else if matches.is_empty() { Err(BackupError::InvalidArgument(method.to_string(), origin.to_string()).into()) } else { Err(BackupError::AmbiguousArgument(method.to_string(), origin.to_string()).into()) } } pub fn get_backup_path( backup_mode: BackupMode, backup_path: &Path, suffix: &str, ) -> Option { match backup_mode { BackupMode::NoBackup => None, BackupMode::SimpleBackup => Some(simple_backup_path(backup_path, suffix)), BackupMode::NumberedBackup => Some(numbered_backup_path(backup_path)), BackupMode::ExistingBackup => Some(existing_backup_path(backup_path, suffix)), } } fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { let mut file_name = path.file_name().unwrap_or_default().to_os_string(); file_name.push(suffix); path.with_file_name(file_name) } fn numbered_backup_path(path: &Path) -> PathBuf { let file_name = path.file_name().unwrap_or_default(); for i in 1_u64.. { let mut numbered_file_name = file_name.to_os_string(); numbered_file_name.push(format!(".~{}~", i)); let path = path.with_file_name(numbered_file_name); if !path.exists() { return path; } } panic!("cannot create backup") } fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { let file_name = path.file_name().unwrap_or_default(); let mut numbered_file_name = file_name.to_os_string(); numbered_file_name.push(".~1~"); let test_path = path.with_file_name(numbered_file_name); if test_path.exists() { numbered_backup_path(path) } else { simple_backup_path(path, suffix) } } /// Returns true if the source file is likely to be the simple backup file for the target file. /// /// # Arguments /// /// * `source` - A Path reference that holds the source (backup) file path. /// * `target` - A Path reference that holds the target file path. /// * `suffix` - Str that holds the backup suffix. /// /// # Examples /// /// ``` /// use std::path::Path; /// use uucore::backup_control::source_is_target_backup; /// let source = Path::new("data.txt~"); /// let target = Path::new("data.txt"); /// let suffix = String::from("~"); /// /// assert_eq!(source_is_target_backup(&source, &target, &suffix), true); /// ``` /// pub fn source_is_target_backup(source: &Path, target: &Path, suffix: &str) -> bool { let source_filename = source.to_string_lossy(); let target_backup_filename = format!("{}{suffix}", target.to_string_lossy()); source_filename == target_backup_filename } // // Tests for this module // #[cfg(test)] mod tests { use super::*; // Required to instantiate mutex in shared context use clap::Command; use std::sync::{LazyLock, Mutex}; // The mutex is required here as by default all tests are run as separate // threads under the same parent process. As environment variables are // specific to processes (and thus shared among threads), data races *will* // occur if no precautions are taken. Thus we have all tests that rely on // environment variables lock this empty mutex to ensure they don't access // it concurrently. static TEST_MUTEX: LazyLock> = LazyLock::new(|| Mutex::new(())); // Environment variable for "VERSION_CONTROL" static ENV_VERSION_CONTROL: &str = "VERSION_CONTROL"; fn make_app() -> clap::Command { Command::new("command") .arg(arguments::backup()) .arg(arguments::backup_no_args()) .arg(arguments::suffix()) } // Defaults to --backup=existing #[test] fn test_backup_mode_short_only() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } // --backup takes precedence over -b #[test] fn test_backup_mode_long_preferred_over_short() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b", "--backup=none"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); } // --backup can be passed without an argument #[test] fn test_backup_mode_long_without_args_no_env() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::ExistingBackup); } // --backup can be passed with an argument only #[test] fn test_backup_mode_long_with_args() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=simple"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } // --backup errors on invalid argument #[test] fn test_backup_mode_long_with_args_invalid() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=foobar"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("invalid argument 'foobar' for 'backup type'")); } // --backup errors on ambiguous argument #[test] fn test_backup_mode_long_with_args_ambiguous() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=n"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("ambiguous argument 'n' for 'backup type'")); } // --backup accepts shortened arguments (si for simple) #[test] fn test_backup_mode_long_with_arg_shortened() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "--backup=si"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); } // -b doesn't ignores the "VERSION_CONTROL" environment variable #[test] fn test_backup_mode_short_does_not_ignore_env() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "numbered"); let matches = make_app().get_matches_from(vec!["command", "-b"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NumberedBackup); env::remove_var(ENV_VERSION_CONTROL); } // --backup can be passed without an argument, but reads env var if existent #[test] fn test_backup_mode_long_without_args_with_env() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "none"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::NoBackup); env::remove_var(ENV_VERSION_CONTROL); } // --backup errors on invalid VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_invalid() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "foobar"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("invalid argument 'foobar' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup errors on ambiguous VERSION_CONTROL env var #[test] fn test_backup_mode_long_with_env_var_ambiguous() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "n"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches); assert!(result.is_err()); let text = format!("{}", result.unwrap_err()); assert!(text.contains("ambiguous argument 'n' for '$VERSION_CONTROL'")); env::remove_var(ENV_VERSION_CONTROL); } // --backup accepts shortened env vars (si for simple) #[test] fn test_backup_mode_long_with_env_var_shortened() { let _dummy = TEST_MUTEX.lock().unwrap(); env::set_var(ENV_VERSION_CONTROL, "si"); let matches = make_app().get_matches_from(vec!["command", "--backup"]); let result = determine_backup_mode(&matches).unwrap(); assert_eq!(result, BackupMode::SimpleBackup); env::remove_var(ENV_VERSION_CONTROL); } #[test] fn test_suffix_takes_hyphen_value() { let _dummy = TEST_MUTEX.lock().unwrap(); let matches = make_app().get_matches_from(vec!["command", "-b", "--suffix", "-v"]); let result = determine_backup_suffix(&matches); assert_eq!(result, "-v"); } #[test] fn test_numbered_backup_path() { assert_eq!(numbered_backup_path(Path::new("")), PathBuf::from(".~1~")); assert_eq!(numbered_backup_path(Path::new("/")), PathBuf::from("/.~1~")); assert_eq!( numbered_backup_path(Path::new("/hello/world")), PathBuf::from("/hello/world.~1~") ); assert_eq!( numbered_backup_path(Path::new("/hello/world/")), PathBuf::from("/hello/world.~1~") ); } #[test] fn test_simple_backup_path() { assert_eq!( simple_backup_path(Path::new(""), ".bak"), PathBuf::from(".bak") ); assert_eq!( simple_backup_path(Path::new("/"), ".bak"), PathBuf::from("/.bak") ); assert_eq!( simple_backup_path(Path::new("/hello/world"), ".bak"), PathBuf::from("/hello/world.bak") ); assert_eq!( simple_backup_path(Path::new("/hello/world/"), ".bak"), PathBuf::from("/hello/world.bak") ); } #[test] fn test_source_is_target_backup() { let source = Path::new("data.txt.bak"); let target = Path::new("data.txt"); let suffix = String::from(".bak"); assert!(source_is_target_backup(source, target, &suffix)); } #[test] fn test_source_is_not_target_backup() { let source = Path::new("data.txt"); let target = Path::new("backup.txt"); let suffix = String::from(".bak"); assert!(!source_is_target_backup(source, target, &suffix)); } #[test] fn test_source_is_target_backup_with_tilde_suffix() { let source = Path::new("example~"); let target = Path::new("example"); let suffix = String::from("~"); assert!(source_is_target_backup(source, target, &suffix)); } } uucore-0.0.30/src/lib/features/buf_copy/common.rs000064400000000000000000000014711046102023000200300ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use crate::error::UError; /// Error types used by buffer-copying functions from the `buf_copy` module. #[derive(Debug)] pub enum Error { Io(std::io::Error), WriteError(String), } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::WriteError(msg) => write!(f, "splice() write error: {}", msg), Error::Io(err) => write!(f, "I/O error: {}", err), } } } impl std::error::Error for Error {} impl UError for Error { fn code(&self) -> i32 { 1 } fn usage(&self) -> bool { false } } uucore-0.0.30/src/lib/features/buf_copy/linux.rs000064400000000000000000000107451046102023000177030ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Buffer-based copying implementation for Linux and Android. use crate::{ error::UResult, pipes::{pipe, splice, splice_exact}, }; /// Buffer-based copying utilities for unix (excluding Linux). use std::{ io::{Read, Write}, os::fd::{AsFd, AsRawFd, RawFd}, }; use super::common::Error; /// A readable file descriptor. pub trait FdReadable: Read + AsRawFd + AsFd {} impl FdReadable for T where T: Read + AsFd + AsRawFd {} /// A writable file descriptor. pub trait FdWritable: Write + AsFd + AsRawFd {} impl FdWritable for T where T: Write + AsFd + AsRawFd {} const SPLICE_SIZE: usize = 1024 * 128; const BUF_SIZE: usize = 1024 * 16; /// Conversion from a `nix::Error` into our `Error` which implements `UError`. impl From for Error { fn from(error: nix::Error) -> Self { Self::Io(std::io::Error::from_raw_os_error(error as i32)) } } /// Copy data from `Read` implementor `source` into a `Write` implementor /// `dest`. This works by reading a chunk of data from `source` and writing the /// data to `dest` in a loop. /// /// This function uses the Linux-specific `splice` call when possible which does /// not use any intermediate user-space buffer. It falls backs to /// `std::io::copy` when the call fails and is still recoverable. /// /// # Arguments /// * `source` - `Read` implementor to copy data from. /// * `dest` - `Write` implementor to copy data to. /// /// # Returns /// /// Result of operation and bytes successfully written (as a `u64`) when /// operation is successful. pub fn copy_stream(src: &mut R, dest: &mut S) -> UResult where R: Read + AsFd + AsRawFd, S: Write + AsFd + AsRawFd, { // If we're on Linux or Android, try to use the splice() system call // for faster writing. If it works, we're done. let result = splice_write(src, &dest.as_fd())?; if !result.1 { return Ok(result.0); } // If the splice() call failed, fall back on slower writing. let result = std::io::copy(src, dest)?; // If the splice() call failed and there has been some data written to // stdout via while loop above AND there will be second splice() call // that will succeed, data pushed through splice will be output before // the data buffered in stdout.lock. Therefore additional explicit flush // is required here. dest.flush()?; Ok(result) } /// Write from source `handle` into destination `write_fd` using Linux-specific /// `splice` system call. /// /// # Arguments /// - `source` - source handle /// - `dest` - destination handle #[inline] pub(crate) fn splice_write(source: &R, dest: &S) -> UResult<(u64, bool)> where R: Read + AsFd + AsRawFd, S: AsRawFd + AsFd, { let (pipe_rd, pipe_wr) = pipe()?; let mut bytes: u64 = 0; loop { match splice(&source, &pipe_wr, SPLICE_SIZE) { Ok(n) => { if n == 0 { return Ok((bytes, false)); } if splice_exact(&pipe_rd, dest, n).is_err() { // If the first splice manages to copy to the intermediate // pipe, but the second splice to stdout fails for some reason // we can recover by copying the data that we have from the // intermediate pipe to stdout using normal read/write. Then // we tell the caller to fall back. copy_exact(pipe_rd.as_raw_fd(), dest, n)?; return Ok((bytes, true)); } bytes += n as u64; } Err(_) => { return Ok((bytes, true)); } } } } /// Move exactly `num_bytes` bytes from `read_fd` to `write_fd` using the `read` /// and `write` calls. #[cfg(any(target_os = "linux", target_os = "android"))] pub(crate) fn copy_exact( read_fd: RawFd, write_fd: &impl AsFd, num_bytes: usize, ) -> std::io::Result { use nix::unistd; let mut left = num_bytes; let mut buf = [0; BUF_SIZE]; let mut written = 0; while left > 0 { let read = unistd::read(read_fd, &mut buf)?; assert_ne!(read, 0, "unexpected end of pipe"); while written < read { let n = unistd::write(write_fd, &buf[written..read])?; written += n; } left -= read; } Ok(written) } uucore-0.0.30/src/lib/features/buf_copy/other.rs000064400000000000000000000017201046102023000176560ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! //! Buffer-based copying implementation for other platforms. use std::io::{Read, Write}; use crate::error::UResult; /// Copy data from `Read` implementor `source` into a `Write` implementor /// `dest`. This works by reading a chunk of data from `source` and writing the /// data to `dest` in a loop, using std::io::copy. This is implemented for /// non-Linux platforms. /// /// # Arguments /// * `source` - `Read` implementor to copy data from. /// * `dest` - `Write` implementor to copy data to. /// /// # Returns /// /// Result of operation and bytes successfully written (as a `u64`) when /// operation is successful. pub fn copy_stream(src: &mut R, dest: &mut S) -> UResult where R: Read, S: Write, { let result = std::io::copy(src, dest)?; Ok(result) } uucore-0.0.30/src/lib/features/buf_copy.rs000064400000000000000000000072251046102023000165430ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! This module provides several buffer-based copy/write functions that leverage //! the `splice` system call in Linux systems, thus increasing the I/O //! performance of copying between two file descriptors. This module is mostly //! used by utilities to work around the limitations of Rust's `fs::copy` which //! does not handle copying special files (e.g pipes, character/block devices). pub mod common; #[cfg(any(target_os = "linux", target_os = "android"))] pub mod linux; #[cfg(any(target_os = "linux", target_os = "android"))] pub use linux::*; #[cfg(not(any(target_os = "linux", target_os = "android")))] pub mod other; #[cfg(not(any(target_os = "linux", target_os = "android")))] pub use other::copy_stream; #[cfg(test)] mod tests { use super::*; use std::fs::File; use tempfile::tempdir; #[cfg(unix)] use { crate::pipes, std::fs::OpenOptions, std::{ io::{Seek, SeekFrom}, thread, }, }; #[cfg(any(target_os = "linux", target_os = "android"))] use std::os::fd::AsRawFd; use std::io::{Read, Write}; #[cfg(unix)] fn new_temp_file() -> File { let temp_dir = tempdir().unwrap(); OpenOptions::new() .read(true) .write(true) .create(true) .open(temp_dir.path().join("file.txt")) .unwrap() } #[cfg(any(target_os = "linux", target_os = "android"))] #[test] fn test_copy_exact() { let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap(); let data = b"Hello, world!"; let n = pipe_write.write(data).unwrap(); assert_eq!(n, data.len()); let mut buf = [0; 1024]; let n = copy_exact(pipe_read.as_raw_fd(), &pipe_write, data.len()).unwrap(); let n2 = pipe_read.read(&mut buf).unwrap(); assert_eq!(n, n2); assert_eq!(&buf[..n], data); } #[test] #[cfg(unix)] fn test_copy_stream() { let mut dest_file = new_temp_file(); let (mut pipe_read, mut pipe_write) = pipes::pipe().unwrap(); let data = b"Hello, world!"; let thread = thread::spawn(move || { pipe_write.write_all(data).unwrap(); }); let result = copy_stream(&mut pipe_read, &mut dest_file).unwrap(); thread.join().unwrap(); assert!(result == data.len() as u64); // We would have been at the end already, so seek again to the start. dest_file.seek(SeekFrom::Start(0)).unwrap(); let mut buf = Vec::new(); dest_file.read_to_end(&mut buf).unwrap(); assert_eq!(buf, data); } #[test] #[cfg(not(unix))] // Test for non-unix platforms. We use regular files instead. fn test_copy_stream() { let temp_dir = tempdir().unwrap(); let src_path = temp_dir.path().join("src.txt"); let dest_path = temp_dir.path().join("dest.txt"); let mut src_file = File::create(&src_path).unwrap(); let mut dest_file = File::create(&dest_path).unwrap(); let data = b"Hello, world!"; src_file.write_all(data).unwrap(); src_file.sync_all().unwrap(); let mut src_file = File::open(&src_path).unwrap(); let bytes_copied = copy_stream(&mut src_file, &mut dest_file).unwrap(); let mut dest_file = File::open(&dest_path).unwrap(); let mut buf = Vec::new(); dest_file.read_to_end(&mut buf).unwrap(); assert_eq!(bytes_copied as usize, data.len()); assert_eq!(buf, data); } } uucore-0.0.30/src/lib/features/checksum.rs000064400000000000000000001477141046102023000165470ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore anotherfile invalidchecksum regexes JWZG FFFD xffname prefixfilename bytelen bitlen hexdigit use data_encoding::BASE64; use os_display::Quotable; use regex::bytes::{Match, Regex}; use std::{ borrow::Cow, ffi::OsStr, fmt::Display, fs::File, io::{self, stdin, BufReader, Read, Write}, path::Path, str, sync::LazyLock, }; use crate::{ error::{FromIo, UError, UResult, USimpleError}, os_str_as_bytes, os_str_from_bytes, read_os_string_lines, show, show_error, show_warning_caps, sum::{ Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Sha512, Shake128, Shake256, Sm3, BSD, CRC, CRC32B, SYSV, }, util_name, }; use thiserror::Error; pub const ALGORITHM_OPTIONS_SYSV: &str = "sysv"; pub const ALGORITHM_OPTIONS_BSD: &str = "bsd"; pub const ALGORITHM_OPTIONS_CRC: &str = "crc"; pub const ALGORITHM_OPTIONS_CRC32B: &str = "crc32b"; pub const ALGORITHM_OPTIONS_MD5: &str = "md5"; pub const ALGORITHM_OPTIONS_SHA1: &str = "sha1"; pub const ALGORITHM_OPTIONS_SHA3: &str = "sha3"; pub const ALGORITHM_OPTIONS_SHA224: &str = "sha224"; pub const ALGORITHM_OPTIONS_SHA256: &str = "sha256"; pub const ALGORITHM_OPTIONS_SHA384: &str = "sha384"; pub const ALGORITHM_OPTIONS_SHA512: &str = "sha512"; pub const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b"; pub const ALGORITHM_OPTIONS_BLAKE3: &str = "blake3"; pub const ALGORITHM_OPTIONS_SM3: &str = "sm3"; pub const ALGORITHM_OPTIONS_SHAKE128: &str = "shake128"; pub const ALGORITHM_OPTIONS_SHAKE256: &str = "shake256"; pub const SUPPORTED_ALGORITHMS: [&str; 16] = [ ALGORITHM_OPTIONS_SYSV, ALGORITHM_OPTIONS_BSD, ALGORITHM_OPTIONS_CRC, ALGORITHM_OPTIONS_CRC32B, ALGORITHM_OPTIONS_MD5, ALGORITHM_OPTIONS_SHA1, ALGORITHM_OPTIONS_SHA3, ALGORITHM_OPTIONS_SHA224, ALGORITHM_OPTIONS_SHA256, ALGORITHM_OPTIONS_SHA384, ALGORITHM_OPTIONS_SHA512, ALGORITHM_OPTIONS_BLAKE2B, ALGORITHM_OPTIONS_BLAKE3, ALGORITHM_OPTIONS_SM3, ALGORITHM_OPTIONS_SHAKE128, ALGORITHM_OPTIONS_SHAKE256, ]; pub struct HashAlgorithm { pub name: &'static str, pub create_fn: Box Box>, pub bits: usize, } /// This structure holds the count of checksum test lines' outcomes. #[derive(Default)] struct ChecksumResult { /// Number of lines in the file where the computed checksum MATCHES /// the expectation. pub correct: u32, /// Number of lines in the file where the computed checksum DIFFERS /// from the expectation. pub failed_cksum: u32, pub failed_open_file: u32, /// Number of improperly formatted lines. pub bad_format: u32, /// Total number of non-empty, non-comment lines. pub total: u32, } impl ChecksumResult { #[inline] fn total_properly_formatted(&self) -> u32 { self.total - self.bad_format } } /// Represents a reason for which the processing of a checksum line /// could not proceed to digest comparison. enum LineCheckError { /// a generic UError was encountered in sub-functions UError(Box), /// the computed checksum digest differs from the expected one DigestMismatch, /// the line is empty or is a comment Skipped, /// the line has a formatting error ImproperlyFormatted, /// file exists but is impossible to read CantOpenFile, /// there is nothing at the given path FileNotFound, /// the given path leads to a directory FileIsDirectory, } impl From> for LineCheckError { fn from(value: Box) -> Self { Self::UError(value) } } impl From for LineCheckError { fn from(value: ChecksumError) -> Self { Self::UError(Box::new(value)) } } /// Represents an error that was encountered when processing a checksum file. enum FileCheckError { /// a generic UError was encountered in sub-functions UError(Box), /// reading of the checksum file failed CantOpenChecksumFile, /// processing of the file is considered as a failure regarding the /// provided flags. This however does not stop the processing of /// further files. Failed, } impl From> for FileCheckError { fn from(value: Box) -> Self { Self::UError(value) } } impl From for FileCheckError { fn from(value: ChecksumError) -> Self { Self::UError(Box::new(value)) } } #[derive(Debug, PartialEq, Eq, PartialOrd, Clone, Copy)] pub enum ChecksumVerbose { Status, Quiet, Normal, Warning, } impl ChecksumVerbose { pub fn new(status: bool, quiet: bool, warn: bool) -> Self { use ChecksumVerbose::*; // Assume only one of the three booleans will be enabled at once. // This is ensured by clap's overriding arguments. match (status, quiet, warn) { (true, _, _) => Status, (_, true, _) => Quiet, (_, _, true) => Warning, _ => Normal, } } #[inline] pub fn over_status(self) -> bool { self > Self::Status } #[inline] pub fn over_quiet(self) -> bool { self > Self::Quiet } #[inline] pub fn at_least_warning(self) -> bool { self >= Self::Warning } } impl Default for ChecksumVerbose { fn default() -> Self { Self::Normal } } /// This struct regroups CLI flags. #[derive(Debug, Default, Clone, Copy)] pub struct ChecksumOptions { pub binary: bool, pub ignore_missing: bool, pub strict: bool, pub verbose: ChecksumVerbose, } #[derive(Debug, Error)] pub enum ChecksumError { #[error("the --raw option is not supported with multiple files")] RawMultipleFiles, #[error("the --ignore-missing option is meaningful only when verifying checksums")] IgnoreNotCheck, #[error("the --strict option is meaningful only when verifying checksums")] StrictNotCheck, #[error("the --quiet option is meaningful only when verifying checksums")] QuietNotCheck, #[error("Invalid output size for SHA3 (expected 224, 256, 384, or 512)")] InvalidOutputSizeForSha3, #[error("--bits required for SHA3")] BitsRequiredForSha3, #[error("--bits required for SHAKE128")] BitsRequiredForShake128, #[error("--bits required for SHAKE256")] BitsRequiredForShake256, #[error("unknown algorithm: clap should have prevented this case")] UnknownAlgorithm, #[error("length is not a multiple of 8")] InvalidLength, #[error("--length is only supported with --algorithm=blake2b")] LengthOnlyForBlake2b, #[error("the --binary and --text options are meaningless when verifying checksums")] BinaryTextConflict, #[error("--check is not supported with --algorithm={{bsd,sysv,crc,crc32b}}")] AlgorithmNotSupportedWithCheck, #[error("You cannot combine multiple hash algorithms!")] CombineMultipleAlgorithms, #[error("Needs an algorithm to hash with.\nUse --help for more information.")] NeedAlgorithmToHash, } impl UError for ChecksumError { fn code(&self) -> i32 { 1 } } /// Creates a SHA3 hasher instance based on the specified bits argument. /// /// # Returns /// /// Returns a UResult of a tuple containing the algorithm name, the hasher instance, and /// the output length in bits or an Err if an unsupported output size is provided, or if /// the `--bits` flag is missing. pub fn create_sha3(bits: Option) -> UResult { match bits { Some(224) => Ok(HashAlgorithm { name: "SHA3_224", create_fn: Box::new(|| Box::new(Sha3_224::new())), bits: 224, }), Some(256) => Ok(HashAlgorithm { name: "SHA3_256", create_fn: Box::new(|| Box::new(Sha3_256::new())), bits: 256, }), Some(384) => Ok(HashAlgorithm { name: "SHA3_384", create_fn: Box::new(|| Box::new(Sha3_384::new())), bits: 384, }), Some(512) => Ok(HashAlgorithm { name: "SHA3_512", create_fn: Box::new(|| Box::new(Sha3_512::new())), bits: 512, }), Some(_) => Err(ChecksumError::InvalidOutputSizeForSha3.into()), None => Err(ChecksumError::BitsRequiredForSha3.into()), } } #[allow(clippy::comparison_chain)] fn print_cksum_report(res: &ChecksumResult) { if res.bad_format == 1 { show_warning_caps!("{} line is improperly formatted", res.bad_format); } else if res.bad_format > 1 { show_warning_caps!("{} lines are improperly formatted", res.bad_format); } if res.failed_cksum == 1 { show_warning_caps!("{} computed checksum did NOT match", res.failed_cksum); } else if res.failed_cksum > 1 { show_warning_caps!("{} computed checksums did NOT match", res.failed_cksum); } if res.failed_open_file == 1 { show_warning_caps!("{} listed file could not be read", res.failed_open_file); } else if res.failed_open_file > 1 { show_warning_caps!("{} listed files could not be read", res.failed_open_file); } } /// Print a "no properly formatted lines" message in stderr #[inline] fn log_no_properly_formatted(filename: String) { show_error!("{filename}: no properly formatted checksum lines found"); } /// Represents the different outcomes that can happen to a file /// that is being checked. #[derive(Debug, Clone, Copy)] enum FileChecksumResult { Ok, Failed, CantOpen, } impl FileChecksumResult { /// Creates a `FileChecksumResult` from a digest comparison that /// either succeeded or failed. fn from_bool(checksum_correct: bool) -> Self { if checksum_correct { FileChecksumResult::Ok } else { FileChecksumResult::Failed } } /// The cli options might prevent to display on the outcome of the /// comparison on STDOUT. fn can_display(&self, verbose: ChecksumVerbose) -> bool { match self { FileChecksumResult::Ok => verbose.over_quiet(), FileChecksumResult::Failed => verbose.over_status(), FileChecksumResult::CantOpen => true, } } } impl Display for FileChecksumResult { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { FileChecksumResult::Ok => write!(f, "OK"), FileChecksumResult::Failed => write!(f, "FAILED"), FileChecksumResult::CantOpen => write!(f, "FAILED open or read"), } } } /// Print to the given buffer the checksum validation status of a file which /// name might contain non-utf-8 characters. fn print_file_report( mut w: W, filename: &[u8], result: FileChecksumResult, prefix: &str, verbose: ChecksumVerbose, ) { if result.can_display(verbose) { let _ = write!(w, "{prefix}"); let _ = w.write_all(filename); let _ = writeln!(w, ": {result}"); } } pub fn detect_algo(algo: &str, length: Option) -> UResult { match algo { ALGORITHM_OPTIONS_SYSV => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SYSV, create_fn: Box::new(|| Box::new(SYSV::new())), bits: 512, }), ALGORITHM_OPTIONS_BSD => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BSD, create_fn: Box::new(|| Box::new(BSD::new())), bits: 1024, }), ALGORITHM_OPTIONS_CRC => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_CRC, create_fn: Box::new(|| Box::new(CRC::new())), bits: 256, }), ALGORITHM_OPTIONS_CRC32B => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_CRC32B, create_fn: Box::new(|| Box::new(CRC32B::new())), bits: 32, }), ALGORITHM_OPTIONS_MD5 | "md5sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_MD5, create_fn: Box::new(|| Box::new(Md5::new())), bits: 128, }), ALGORITHM_OPTIONS_SHA1 | "sha1sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA1, create_fn: Box::new(|| Box::new(Sha1::new())), bits: 160, }), ALGORITHM_OPTIONS_SHA224 | "sha224sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA224, create_fn: Box::new(|| Box::new(Sha224::new())), bits: 224, }), ALGORITHM_OPTIONS_SHA256 | "sha256sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA256, create_fn: Box::new(|| Box::new(Sha256::new())), bits: 256, }), ALGORITHM_OPTIONS_SHA384 | "sha384sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA384, create_fn: Box::new(|| Box::new(Sha384::new())), bits: 384, }), ALGORITHM_OPTIONS_SHA512 | "sha512sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHA512, create_fn: Box::new(|| Box::new(Sha512::new())), bits: 512, }), ALGORITHM_OPTIONS_BLAKE2B | "b2sum" => { // Set default length to 512 if None let bits = length.unwrap_or(512); if bits == 512 { Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BLAKE2B, create_fn: Box::new(move || Box::new(Blake2b::new())), bits: 512, }) } else { Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BLAKE2B, create_fn: Box::new(move || Box::new(Blake2b::with_output_bytes(bits))), bits, }) } } ALGORITHM_OPTIONS_BLAKE3 | "b3sum" => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_BLAKE3, create_fn: Box::new(|| Box::new(Blake3::new())), bits: 256, }), ALGORITHM_OPTIONS_SM3 => Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SM3, create_fn: Box::new(|| Box::new(Sm3::new())), bits: 512, }), ALGORITHM_OPTIONS_SHAKE128 | "shake128sum" => { let bits = length.ok_or_else(|| USimpleError::new(1, "--bits required for SHAKE128"))?; Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHAKE128, create_fn: Box::new(|| Box::new(Shake128::new())), bits, }) } ALGORITHM_OPTIONS_SHAKE256 | "shake256sum" => { let bits = length.ok_or_else(|| USimpleError::new(1, "--bits required for SHAKE256"))?; Ok(HashAlgorithm { name: ALGORITHM_OPTIONS_SHAKE256, create_fn: Box::new(|| Box::new(Shake256::new())), bits, }) } //ALGORITHM_OPTIONS_SHA3 | "sha3" => ( _ if algo.starts_with("sha3") => create_sha3(length), _ => Err(ChecksumError::UnknownAlgorithm.into()), } } // Regexp to handle the three input formats: // 1. [-] () = // algo must be uppercase or b (for blake2b) // 2. [* ] // 3. [*] (only one space) const ALGO_BASED_REGEX: &str = r"^\s*\\?(?P(?:[A-Z0-9]+|BLAKE2b))(?:-(?P\d+))?\s?\((?P(?-u:.*))\)\s*=\s*(?P[A-Za-z0-9+/]+={0,2})$"; const DOUBLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s{2}(?P(?-u:.*))$"; // In this case, we ignore the * const SINGLE_SPACE_REGEX: &str = r"^(?P[a-fA-F0-9]+)\s(?P\*?(?-u:.*))$"; static R_ALGO_BASED: LazyLock = LazyLock::new(|| Regex::new(ALGO_BASED_REGEX).unwrap()); static R_DOUBLE_SPACE: LazyLock = LazyLock::new(|| Regex::new(DOUBLE_SPACE_REGEX).unwrap()); static R_SINGLE_SPACE: LazyLock = LazyLock::new(|| Regex::new(SINGLE_SPACE_REGEX).unwrap()); #[derive(Debug, PartialEq, Eq, Clone, Copy)] enum LineFormat { AlgoBased, SingleSpace, DoubleSpace, } impl LineFormat { fn to_regex(self) -> &'static Regex { match self { LineFormat::AlgoBased => &R_ALGO_BASED, LineFormat::SingleSpace => &R_SINGLE_SPACE, LineFormat::DoubleSpace => &R_DOUBLE_SPACE, } } } /// Hold the data extracted from a checksum line. struct LineInfo { algo_name: Option, algo_bit_len: Option, checksum: String, filename: Vec, format: LineFormat, } impl LineInfo { /// Returns a `LineInfo` parsed from a checksum line. /// The function will run 3 regexes against the line and select the first one that matches /// to populate the fields of the struct. /// However, there is a catch to handle regarding the handling of `cached_regex`. /// In case of non-algo-based regex, if `cached_regex` is Some, it must take the priority /// over the detected regex. Otherwise, we must set it the the detected regex. /// This specific behavior is emphasized by the test /// `test_hashsum::test_check_md5sum_only_one_space`. fn parse(s: impl AsRef, cached_regex: &mut Option) -> Option { let regexes: &[(&'static Regex, LineFormat)] = &[ (&R_ALGO_BASED, LineFormat::AlgoBased), (&R_DOUBLE_SPACE, LineFormat::DoubleSpace), (&R_SINGLE_SPACE, LineFormat::SingleSpace), ]; let line_bytes = os_str_as_bytes(s.as_ref()).expect("UTF-8 decoding failed"); for (regex, format) in regexes { if !regex.is_match(line_bytes) { continue; } let mut r = *regex; if *format != LineFormat::AlgoBased { // The cached regex ensures that when processing non-algo based regexes, // it cannot be changed (can't have single and double space regexes // used in the same file). if cached_regex.is_some() { r = cached_regex.unwrap().to_regex(); } else { *cached_regex = Some(*format); } } if let Some(caps) = r.captures(line_bytes) { // These unwraps are safe thanks to the regex let match_to_string = |m: Match| String::from_utf8(m.as_bytes().into()).unwrap(); return Some(Self { algo_name: caps.name("algo").map(match_to_string), algo_bit_len: caps .name("bits") .map(|m| match_to_string(m).parse::().unwrap()), checksum: caps.name("checksum").map(match_to_string).unwrap(), filename: caps.name("filename").map(|m| m.as_bytes().into()).unwrap(), format: *format, }); } } None } } fn get_filename_for_output(filename: &OsStr, input_is_stdin: bool) -> String { if input_is_stdin { "standard input" } else { filename.to_str().unwrap() } .maybe_quote() .to_string() } /// Extract the expected digest from the checksum string fn get_expected_digest_as_hex_string( line_info: &LineInfo, len_hint: Option, ) -> Option> { let ck = &line_info.checksum; let against_hint = |len| len_hint.is_none_or(|l| l == len); if ck.len() % 2 != 0 { // If the length of the digest is not a multiple of 2, then it // must be improperly formatted (1 hex digit is 2 characters) return None; } // If the digest can be decoded as hexadecimal AND its length matches the // one expected (in case it's given), just go with it. if ck.as_bytes().iter().all(u8::is_ascii_hexdigit) && against_hint(ck.len()) { return Some(Cow::Borrowed(ck)); } // If hexadecimal digest fails for any reason, interpret the digest as base 64. BASE64 .decode(ck.as_bytes()) // Decode the string as encoded base64 .map(hex::encode) // Encode it back as hexadecimal .map(Cow::::Owned) .ok() .and_then(|s| { // Check the digest length if against_hint(s.len()) { Some(s) } else { None } }) } /// Returns a reader that reads from the specified file, or from stdin if `filename_to_check` is "-". fn get_file_to_check( filename: &OsStr, opts: ChecksumOptions, ) -> Result, LineCheckError> { let filename_bytes = os_str_as_bytes(filename).expect("UTF-8 error"); let filename_lossy = String::from_utf8_lossy(filename_bytes); if filename == "-" { Ok(Box::new(stdin())) // Use stdin if "-" is specified in the checksum file } else { let failed_open = || { print_file_report( std::io::stdout(), filename_bytes, FileChecksumResult::CantOpen, "", opts.verbose, ); }; match File::open(filename) { Ok(f) => { if f.metadata() .map_err(|_| LineCheckError::CantOpenFile)? .is_dir() { show!(USimpleError::new( 1, format!("{filename_lossy}: Is a directory") )); // also regarded as a failed open failed_open(); Err(LineCheckError::FileIsDirectory) } else { Ok(Box::new(f)) } } Err(err) => { if !opts.ignore_missing { // yes, we have both stderr and stdout here show!(err.map_err_context(|| filename_lossy.to_string())); failed_open(); } // we could not open the file but we want to continue Err(LineCheckError::FileNotFound) } } } } /// Returns a reader to the list of checksums fn get_input_file(filename: &OsStr) -> UResult> { match File::open(filename) { Ok(f) => { if f.metadata()?.is_dir() { Err(io::Error::new( io::ErrorKind::Other, format!("{}: Is a directory", filename.to_string_lossy()), ) .into()) } else { Ok(Box::new(f)) } } Err(_) => Err(io::Error::new( io::ErrorKind::Other, format!("{}: No such file or directory", filename.to_string_lossy()), ) .into()), } } /// Gets the algorithm name and length from the `LineInfo` if the algo-based format is matched. fn identify_algo_name_and_length( line_info: &LineInfo, algo_name_input: Option<&str>, last_algo: &mut Option, ) -> Result<(String, Option), LineCheckError> { let algo_from_line = line_info.algo_name.clone().unwrap_or_default(); let algorithm = algo_from_line.to_lowercase(); *last_algo = Some(algo_from_line); // check if we are called with XXXsum (example: md5sum) but we detected a different algo parsing the file // (for example SHA1 (f) = d...) // Also handle the case cksum -s sm3 but the file contains other formats if algo_name_input.is_some() && algo_name_input != Some(&algorithm) { return Err(LineCheckError::ImproperlyFormatted); } if !SUPPORTED_ALGORITHMS.contains(&algorithm.as_str()) { // Not supported algo, leave early return Err(LineCheckError::ImproperlyFormatted); } let bytes = if let Some(bitlen) = line_info.algo_bit_len { if algorithm != ALGORITHM_OPTIONS_BLAKE2B || bitlen % 8 != 0 { // Either // the algo based line is provided with a bit length // with an algorithm that does not support it (only Blake2B does). // // eg: MD5-128 (foo.txt) = fffffffff // ^ This is illegal // OR // the given length is wrong because it's not a multiple of 8. return Err(LineCheckError::ImproperlyFormatted); } Some(bitlen / 8) } else if algorithm == ALGORITHM_OPTIONS_BLAKE2B { // Default length with BLAKE2b, Some(64) } else { None }; Ok((algorithm, bytes)) } /// Given a filename and an algorithm, compute the digest and compare it with /// the expected one. fn compute_and_check_digest_from_file( filename: &[u8], expected_checksum: &str, mut algo: HashAlgorithm, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let (filename_to_check_unescaped, prefix) = unescape_filename(filename); let real_filename_to_check = os_str_from_bytes(&filename_to_check_unescaped)?; // Open the input file let file_to_check = get_file_to_check(&real_filename_to_check, opts)?; let mut file_reader = BufReader::new(file_to_check); // Read the file and calculate the checksum let create_fn = &mut algo.create_fn; let mut digest = create_fn(); let (calculated_checksum, _) = digest_reader(&mut digest, &mut file_reader, opts.binary, algo.bits).unwrap(); // Do the checksum validation let checksum_correct = expected_checksum == calculated_checksum; print_file_report( std::io::stdout(), filename, FileChecksumResult::from_bool(checksum_correct), prefix, opts.verbose, ); if checksum_correct { Ok(()) } else { Err(LineCheckError::DigestMismatch) } } /// Check a digest checksum with non-algo based pre-treatment. fn process_algo_based_line( line_info: &LineInfo, cli_algo_name: Option<&str>, opts: ChecksumOptions, last_algo: &mut Option, ) -> Result<(), LineCheckError> { let filename_to_check = line_info.filename.as_slice(); let (algo_name, algo_byte_len) = identify_algo_name_and_length(line_info, cli_algo_name, last_algo)?; // If the digest bitlen is known, we can check the format of the expected // checksum with it. let digest_char_length_hint = match (algo_name.as_str(), algo_byte_len) { (ALGORITHM_OPTIONS_BLAKE2B, Some(bytelen)) => Some(bytelen * 2), _ => None, }; let expected_checksum = get_expected_digest_as_hex_string(line_info, digest_char_length_hint) .ok_or(LineCheckError::ImproperlyFormatted)?; let algo = detect_algo(&algo_name, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } /// Check a digest checksum with non-algo based pre-treatment. fn process_non_algo_based_line( line_number: usize, line_info: &LineInfo, cli_algo_name: &str, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), LineCheckError> { let mut filename_to_check = line_info.filename.as_slice(); if filename_to_check.starts_with(b"*") && line_number == 0 && line_info.format == LineFormat::SingleSpace { // Remove the leading asterisk if present - only for the first line filename_to_check = &filename_to_check[1..]; } let expected_checksum = get_expected_digest_as_hex_string(line_info, None) .ok_or(LineCheckError::ImproperlyFormatted)?; // When a specific algorithm name is input, use it and use the provided bits // except when dealing with blake2b, where we will detect the length let (algo_name, algo_byte_len) = if cli_algo_name == ALGORITHM_OPTIONS_BLAKE2B { // division by 2 converts the length of the Blake2b checksum from hexadecimal // characters to bytes, as each byte is represented by two hexadecimal characters. let length = Some(expected_checksum.len() / 2); (ALGORITHM_OPTIONS_BLAKE2B.to_string(), length) } else { (cli_algo_name.to_lowercase(), cli_algo_length) }; let algo = detect_algo(&algo_name, algo_byte_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } /// Parses a checksum line, detect the algorithm to use, read the file and produce /// its digest, and compare it to the expected value. /// /// Returns `Ok(bool)` if the comparison happened, bool indicates if the digest /// matched the expected. /// If the comparison didn't happen, return a `LineChecksumError`. fn process_checksum_line( line: &OsStr, i: usize, cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, cached_regex: &mut Option, last_algo: &mut Option, ) -> Result<(), LineCheckError> { let line_bytes = os_str_as_bytes(line)?; // Early return on empty or commented lines. if line.is_empty() || line_bytes.starts_with(b"#") { return Err(LineCheckError::Skipped); } // Use `LineInfo` to extract the data of a line. // Then, depending on its format, apply a different pre-treatment. let Some(line_info) = LineInfo::parse(line, cached_regex) else { return Err(LineCheckError::ImproperlyFormatted); }; if line_info.format == LineFormat::AlgoBased { process_algo_based_line(&line_info, cli_algo_name, opts, last_algo) } else if let Some(cli_algo) = cli_algo_name { // If we match a non-algo based regex, we expect a cli argument // to give us the algorithm to use process_non_algo_based_line(i, &line_info, cli_algo, cli_algo_length, opts) } else { // We have no clue of what algorithm to use return Err(LineCheckError::ImproperlyFormatted); } } fn process_checksum_file( filename_input: &OsStr, cli_algo_name: Option<&str>, cli_algo_length: Option, opts: ChecksumOptions, ) -> Result<(), FileCheckError> { let mut res = ChecksumResult::default(); let input_is_stdin = filename_input == OsStr::new("-"); let file: Box = if input_is_stdin { // Use stdin if "-" is specified Box::new(stdin()) } else { match get_input_file(filename_input) { Ok(f) => f, Err(e) => { // Could not read the file, show the error and continue to the next file show_error!("{e}"); return Err(FileCheckError::CantOpenChecksumFile); } } }; let reader = BufReader::new(file); let lines = read_os_string_lines(reader).collect::>(); // cached_regex is used to ensure that several non algo-based checksum line // will use the same regex. let mut cached_regex = None; // last_algo caches the algorithm used in the last line to print a warning // message for the current line if improperly formatted. // Behavior tested in gnu_cksum_c::test_warn let mut last_algo = None; for (i, line) in lines.iter().enumerate() { let line_result = process_checksum_line( line, i, cli_algo_name, cli_algo_length, opts, &mut cached_regex, &mut last_algo, ); // Match a first time to elude critical UErrors, and increment the total // in all cases except on skipped. use LineCheckError::*; match line_result { Err(UError(e)) => return Err(e.into()), Err(Skipped) => (), _ => res.total += 1, } // Match a second time to update the right field of `res`. match line_result { Ok(()) => res.correct += 1, Err(DigestMismatch) => res.failed_cksum += 1, Err(ImproperlyFormatted) => { res.bad_format += 1; if opts.verbose.at_least_warning() { let algo = if let Some(algo_name_input) = cli_algo_name { Cow::Owned(algo_name_input.to_uppercase()) } else if let Some(algo) = &last_algo { Cow::Borrowed(algo.as_str()) } else { Cow::Borrowed("Unknown algorithm") }; eprintln!( "{}: {}: {}: improperly formatted {} checksum line", util_name(), &filename_input.maybe_quote(), i + 1, algo ); } } Err(CantOpenFile | FileIsDirectory) => res.failed_open_file += 1, Err(FileNotFound) if !opts.ignore_missing => res.failed_open_file += 1, _ => continue, }; } // not a single line correctly formatted found // return an error if res.total_properly_formatted() == 0 { if opts.verbose.over_status() { log_no_properly_formatted(get_filename_for_output(filename_input, input_is_stdin)); } return Err(FileCheckError::Failed); } // if any incorrectly formatted line, show it if opts.verbose.over_status() { print_cksum_report(&res); } if opts.ignore_missing && res.correct == 0 { // we have only bad format // and we had ignore-missing if opts.verbose.over_status() { eprintln!( "{}: {}: no file was verified", util_name(), filename_input.maybe_quote(), ); } return Err(FileCheckError::Failed); } // strict means that we should have an exit code. if opts.strict && res.bad_format > 0 { return Err(FileCheckError::Failed); } // If a file was missing, return an error unless we explicitly ignore it. if res.failed_open_file > 0 && !opts.ignore_missing { return Err(FileCheckError::Failed); } // Obviously, if a checksum failed at some point, report the error. if res.failed_cksum > 0 { return Err(FileCheckError::Failed); } Ok(()) } /*** * Do the checksum validation (can be strict or not) */ pub fn perform_checksum_validation<'a, I>( files: I, algo_name_input: Option<&str>, length_input: Option, opts: ChecksumOptions, ) -> UResult<()> where I: Iterator, { let mut failed = false; // if cksum has several input files, it will print the result for each file for filename_input in files { use FileCheckError::*; match process_checksum_file(filename_input, algo_name_input, length_input, opts) { Err(UError(e)) => return Err(e), Err(Failed | CantOpenChecksumFile) => failed = true, Ok(_) => continue, } } if failed { Err(USimpleError::new(1, "")) } else { Ok(()) } } pub fn digest_reader( digest: &mut Box, reader: &mut BufReader, binary: bool, output_bits: usize, ) -> io::Result<(String, usize)> { digest.reset(); // Read bytes from `reader` and write those bytes to `digest`. // // If `binary` is `false` and the operating system is Windows, then // `DigestWriter` replaces "\r\n" with "\n" before it writes the // bytes into `digest`. Otherwise, it just inserts the bytes as-is. // // In order to support replacing "\r\n", we must call `finalize()` // in order to support the possibility that the last character read // from the reader was "\r". (This character gets buffered by // `DigestWriter` and only written if the following character is // "\n". But when "\r" is the last character read, we need to force // it to be written.) let mut digest_writer = DigestWriter::new(digest, binary); let output_size = std::io::copy(reader, &mut digest_writer)? as usize; digest_writer.finalize(); if digest.output_bits() > 0 { Ok((digest.result_str(), output_size)) } else { // Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016) let mut bytes = vec![0; output_bits.div_ceil(8)]; digest.hash_finalize(&mut bytes); Ok((hex::encode(bytes), output_size)) } } /// Calculates the length of the digest. pub fn calculate_blake2b_length(length: usize) -> UResult> { match length { 0 => Ok(None), n if n % 8 != 0 => { show_error!("invalid length: \u{2018}{length}\u{2019}"); Err(io::Error::new(io::ErrorKind::InvalidInput, "length is not a multiple of 8").into()) } n if n > 512 => { show_error!("invalid length: \u{2018}{length}\u{2019}"); Err(io::Error::new( io::ErrorKind::InvalidInput, "maximum digest length for \u{2018}BLAKE2b\u{2019} is 512 bits", ) .into()) } n => { // Divide by 8, as our blake2b implementation expects bytes instead of bits. if n == 512 { // When length is 512, it is blake2b's default. // So, don't show it Ok(None) } else { Ok(Some(n / 8)) } } } } pub fn unescape_filename(filename: &[u8]) -> (Vec, &'static str) { let mut unescaped = Vec::with_capacity(filename.len()); let mut byte_iter = filename.iter().peekable(); loop { let Some(byte) = byte_iter.next() else { break; }; if *byte == b'\\' { match byte_iter.next() { Some(b'\\') => unescaped.push(b'\\'), Some(b'n') => unescaped.push(b'\n'), Some(b'r') => unescaped.push(b'\r'), Some(x) => { unescaped.push(b'\\'); unescaped.push(*x); } _ => {} } } else { unescaped.push(*byte); } } let prefix = if unescaped == filename { "" } else { "\\" }; (unescaped, prefix) } pub fn escape_filename(filename: &Path) -> (String, &'static str) { let original = filename.as_os_str().to_string_lossy(); let escaped = original .replace('\\', "\\\\") .replace('\n', "\\n") .replace('\r', "\\r"); let prefix = if escaped == original { "" } else { "\\" }; (escaped, prefix) } #[cfg(test)] mod tests { use super::*; use std::ffi::OsString; #[test] fn test_unescape_filename() { let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); assert_eq!(unescaped, b"test\nfile.txt"); assert_eq!(prefix, "\\"); let (unescaped, prefix) = unescape_filename(b"test\\nfile.txt"); assert_eq!(unescaped, b"test\nfile.txt"); assert_eq!(prefix, "\\"); let (unescaped, prefix) = unescape_filename(b"test\\rfile.txt"); assert_eq!(unescaped, b"test\rfile.txt"); assert_eq!(prefix, "\\"); let (unescaped, prefix) = unescape_filename(b"test\\\\file.txt"); assert_eq!(unescaped, b"test\\file.txt"); assert_eq!(prefix, "\\"); } #[test] fn test_escape_filename() { let (escaped, prefix) = escape_filename(Path::new("testfile.txt")); assert_eq!(escaped, "testfile.txt"); assert_eq!(prefix, ""); let (escaped, prefix) = escape_filename(Path::new("test\nfile.txt")); assert_eq!(escaped, "test\\nfile.txt"); assert_eq!(prefix, "\\"); let (escaped, prefix) = escape_filename(Path::new("test\rfile.txt")); assert_eq!(escaped, "test\\rfile.txt"); assert_eq!(prefix, "\\"); let (escaped, prefix) = escape_filename(Path::new("test\\file.txt")); assert_eq!(escaped, "test\\\\file.txt"); assert_eq!(prefix, "\\"); } #[test] fn test_calculate_blake2b_length() { assert_eq!(calculate_blake2b_length(0).unwrap(), None); assert!(calculate_blake2b_length(10).is_err()); assert!(calculate_blake2b_length(520).is_err()); assert_eq!(calculate_blake2b_length(512).unwrap(), None); assert_eq!(calculate_blake2b_length(256).unwrap(), Some(32)); } #[test] fn test_detect_algo() { assert_eq!( detect_algo(ALGORITHM_OPTIONS_SYSV, None).unwrap().name, ALGORITHM_OPTIONS_SYSV ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_BSD, None).unwrap().name, ALGORITHM_OPTIONS_BSD ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_CRC, None).unwrap().name, ALGORITHM_OPTIONS_CRC ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_MD5, None).unwrap().name, ALGORITHM_OPTIONS_MD5 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHA1, None).unwrap().name, ALGORITHM_OPTIONS_SHA1 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHA224, None).unwrap().name, ALGORITHM_OPTIONS_SHA224 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHA256, None).unwrap().name, ALGORITHM_OPTIONS_SHA256 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHA384, None).unwrap().name, ALGORITHM_OPTIONS_SHA384 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHA512, None).unwrap().name, ALGORITHM_OPTIONS_SHA512 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_BLAKE2B, None).unwrap().name, ALGORITHM_OPTIONS_BLAKE2B ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_BLAKE3, None).unwrap().name, ALGORITHM_OPTIONS_BLAKE3 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SM3, None).unwrap().name, ALGORITHM_OPTIONS_SM3 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHAKE128, Some(128)) .unwrap() .name, ALGORITHM_OPTIONS_SHAKE128 ); assert_eq!( detect_algo(ALGORITHM_OPTIONS_SHAKE256, Some(256)) .unwrap() .name, ALGORITHM_OPTIONS_SHAKE256 ); assert_eq!(detect_algo("sha3_224", Some(224)).unwrap().name, "SHA3_224"); assert_eq!(detect_algo("sha3_256", Some(256)).unwrap().name, "SHA3_256"); assert_eq!(detect_algo("sha3_384", Some(384)).unwrap().name, "SHA3_384"); assert_eq!(detect_algo("sha3_512", Some(512)).unwrap().name, "SHA3_512"); assert!(detect_algo("sha3_512", None).is_err()); } #[test] fn test_algo_based_regex() { let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap(); #[allow(clippy::type_complexity)] let test_cases: &[(&[u8], Option<(&[u8], Option<&[u8]>, &[u8], &[u8])>)] = &[ (b"SHA256 (example.txt) = d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2", Some((b"SHA256", None, b"example.txt", b"d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2"))), // cspell:disable-next-line (b"BLAKE2b-512 (file) = abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef", Some((b"BLAKE2b", Some(b"512"), b"file", b"abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef"))), (b" MD5 (test) = 9e107d9d372bb6826bd81d3542a419d6", Some((b"MD5", None, b"test", b"9e107d9d372bb6826bd81d3542a419d6"))), (b"SHA-1 (anotherfile) = a9993e364706816aba3e25717850c26c9cd0d89d", Some((b"SHA", Some(b"1"), b"anotherfile", b"a9993e364706816aba3e25717850c26c9cd0d89d"))), ]; for (input, expected) in test_cases { let captures = algo_based_regex.captures(input); match expected { Some((algo, bits, filename, checksum)) => { assert!(captures.is_some()); let captures = captures.unwrap(); assert_eq!(&captures.name("algo").unwrap().as_bytes(), algo); assert_eq!(&captures.name("bits").map(|m| m.as_bytes()), bits); assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename); assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum); } None => { assert!(captures.is_none()); } } } } #[test] fn test_double_space_regex() { let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap(); #[allow(clippy::type_complexity)] let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[ ( b"60b725f10c9c85c70d97880dfe8191b3 a", Some((b"60b725f10c9c85c70d97880dfe8191b3", b"a")), ), ( b"bf35d7536c785cf06730d5a40301eba2 b", Some((b"bf35d7536c785cf06730d5a40301eba2", b" b")), ), ( b"f5b61709718c1ecf8db1aea8547d4698 *c", Some((b"f5b61709718c1ecf8db1aea8547d4698", b"*c")), ), ( b"b064a020db8018f18ff5ae367d01b212 dd", Some((b"b064a020db8018f18ff5ae367d01b212", b"dd")), ), ( b"b064a020db8018f18ff5ae367d01b212 ", Some((b"b064a020db8018f18ff5ae367d01b212", b" ")), ), (b"invalidchecksum test", None), ]; for (input, expected) in test_cases { let captures = double_space_regex.captures(input); match expected { Some((checksum, filename)) => { assert!(captures.is_some()); let captures = captures.unwrap(); assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum); assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename); } None => { assert!(captures.is_none()); } } } } #[test] fn test_single_space_regex() { let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap(); #[allow(clippy::type_complexity)] let test_cases: &[(&[u8], Option<(&[u8], &[u8])>)] = &[ ( b"60b725f10c9c85c70d97880dfe8191b3 a", Some((b"60b725f10c9c85c70d97880dfe8191b3", b"a")), ), ( b"bf35d7536c785cf06730d5a40301eba2 b", Some((b"bf35d7536c785cf06730d5a40301eba2", b"b")), ), ( b"f5b61709718c1ecf8db1aea8547d4698 *c", Some((b"f5b61709718c1ecf8db1aea8547d4698", b"*c")), ), ( b"b064a020db8018f18ff5ae367d01b212 dd", Some((b"b064a020db8018f18ff5ae367d01b212", b"dd")), ), (b"invalidchecksum test", None), ]; for (input, expected) in test_cases { let captures = single_space_regex.captures(input); match expected { Some((checksum, filename)) => { assert!(captures.is_some()); let captures = captures.unwrap(); assert_eq!(&captures.name("checksum").unwrap().as_bytes(), checksum); assert_eq!(&captures.name("filename").unwrap().as_bytes(), filename); } None => { assert!(captures.is_none()); } } } } #[test] fn test_line_info() { let mut cached_regex = None; // Test algo-based regex let line_algo_based = OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); let line_info = LineInfo::parse(&line_algo_based, &mut cached_regex).unwrap(); assert_eq!(line_info.algo_name.as_deref(), Some("MD5")); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); assert_eq!(line_info.format, LineFormat::AlgoBased); assert!(cached_regex.is_none()); // Test double-space regex let line_double_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); let line_info = LineInfo::parse(&line_double_space, &mut cached_regex).unwrap(); assert!(line_info.algo_name.is_none()); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); assert_eq!(line_info.format, LineFormat::DoubleSpace); assert!(cached_regex.is_some()); cached_regex = None; // Test single-space regex let line_single_space = OsString::from("d41d8cd98f00b204e9800998ecf8427e example.txt"); let line_info = LineInfo::parse(&line_single_space, &mut cached_regex).unwrap(); assert!(line_info.algo_name.is_none()); assert!(line_info.algo_bit_len.is_none()); assert_eq!(line_info.filename, b"example.txt"); assert_eq!(line_info.checksum, "d41d8cd98f00b204e9800998ecf8427e"); assert_eq!(line_info.format, LineFormat::SingleSpace); assert!(cached_regex.is_some()); cached_regex = None; // Test invalid checksum line let line_invalid = OsString::from("invalid checksum line"); assert!(LineInfo::parse(&line_invalid, &mut cached_regex).is_none()); assert!(cached_regex.is_none()); // Test leading space before checksum line let line_algo_based_leading_space = OsString::from(" MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e"); let line_info = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex).unwrap(); assert_eq!(line_info.format, LineFormat::AlgoBased); assert!(cached_regex.is_none()); // Test trailing space after checksum line (should fail) let line_algo_based_leading_space = OsString::from("MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e "); let res = LineInfo::parse(&line_algo_based_leading_space, &mut cached_regex); assert!(res.is_none()); assert!(cached_regex.is_none()); } #[test] fn test_get_expected_digest() { let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="); let mut cached_regex = None; let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); let result = get_expected_digest_as_hex_string(&line_info, None); assert_eq!( result.unwrap(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ); } #[test] fn test_get_expected_checksum_invalid() { // The line misses a '=' at the end to be valid base64 let line = OsString::from("SHA256 (empty) = 47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU"); let mut cached_regex = None; let line_info = LineInfo::parse(&line, &mut cached_regex).unwrap(); let result = get_expected_digest_as_hex_string(&line_info, None); assert!(result.is_none()); } #[test] fn test_print_file_report() { let opts = ChecksumOptions::default(); let cases: &[(&[u8], FileChecksumResult, &str, &[u8])] = &[ (b"filename", FileChecksumResult::Ok, "", b"filename: OK\n"), ( b"filename", FileChecksumResult::Failed, "", b"filename: FAILED\n", ), ( b"filename", FileChecksumResult::CantOpen, "", b"filename: FAILED open or read\n", ), ( b"filename", FileChecksumResult::Ok, "prefix", b"prefixfilename: OK\n", ), ( b"funky\xffname", FileChecksumResult::Ok, "", b"funky\xffname: OK\n", ), ]; for (filename, result, prefix, expected) in cases { let mut buffer: Vec = vec![]; print_file_report(&mut buffer, filename, *result, prefix, opts.verbose); assert_eq!(&buffer, expected) } } } uucore-0.0.30/src/lib/features/colors.rs000064400000000000000000000170351046102023000162360ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // cSpell:disable //! Provides color handling for `ls` and other utilities. /// The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the /// slackware version of dircolors) are recognized but ignored. /// Global config options can be specified before TERM or COLORTERM entries /// below are TERM or COLORTERM entries, which can be glob patterns, which /// restrict following config to systems with matching environment variables. pub static TERMS: &[&str] = &[ "Eterm", "alacritty*", "ansi", "*color*", "con[0-9]*x[0-9]*", "cons25", "console", "cygwin", "*direct*", "dtterm", "foot", "gnome", "hurd", "jfbterm", "konsole", "kterm", "linux", "linux-c", "mlterm", "putty", "rxvt*", "screen*", "st", "terminator", "tmux*", "vt100", "xterm*", ]; /// Below are the color init strings for the basic file types. /// One can use codes for 256 or more colors supported by modern terminals. /// The default color codes use the capabilities of an 8 color terminal /// with some additional attributes as per the following codes: /// Attribute codes: /// 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed /// Text color codes: /// 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white /// Background color codes: /// 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white /// #NORMAL 00 /// no color code at all /// #FILE 00 /// regular file: use no color at all pub static FILE_TYPES: &[(&str, &str, &str)] = &[ ("RESET", "rs", "0"), // reset to "normal" color ("DIR", "di", "01;34"), // directory ("LINK", "ln", "01;36"), // symbolic link ("MULTIHARDLINK", "mh", "00"), // regular file with more than one link ("FIFO", "pi", "40;33"), // pipe ("SOCK", "so", "01;35"), // socket ("DOOR", "do", "01;35"), // door ("BLK", "bd", "40;33;01"), // block device driver ("CHR", "cd", "40;33;01"), // character device driver ("ORPHAN", "or", "40;31;01"), // symlink to nonexistent file, or non-stat'able file ("MISSING", "mi", "00"), // ... and the files they point to ("SETUID", "su", "37;41"), // file that is setuid (u+s) ("SETGID", "sg", "30;43"), // file that is setgid (g+s) ("CAPABILITY", "ca", "00"), // file with capability ("STICKY_OTHER_WRITABLE", "tw", "30;42"), // dir that is sticky and other-writable (+t,o+w) ("OTHER_WRITABLE", "ow", "34;42"), // dir that is other-writable (o+w) and not sticky ("STICKY", "st", "37;44"), // dir with the sticky bit set (+t) and not other-writable ("EXEC", "ex", "01;32"), // files with execute permission ]; /// Colors for file types /// /// List any file extensions like '.gz' or '.tar' that you would like ls /// to color below. Put the extension, a space, and the color init string. /// (and any comments you want to add after a '#') pub static FILE_COLORS: &[(&str, &str)] = &[ /* // Executables (Windows) (".cmd", "01;32"), (".exe", "01;32"), (".com", "01;32"), (".btm", "01;32"), (".bat", "01;32"), (".sh", "01;32"), (".csh", "01;32"),*/ // Archives or compressed (".tar", "01;31"), (".tgz", "01;31"), (".arc", "01;31"), (".arj", "01;31"), (".taz", "01;31"), (".lha", "01;31"), (".lz4", "01;31"), (".lzh", "01;31"), (".lzma", "01;31"), (".tlz", "01;31"), (".txz", "01;31"), (".tzo", "01;31"), (".t7z", "01;31"), (".zip", "01;31"), (".z", "01;31"), (".dz", "01;31"), (".gz", "01;31"), (".lrz", "01;31"), (".lz", "01;31"), (".lzo", "01;31"), (".xz", "01;31"), (".zst", "01;31"), (".tzst", "01;31"), (".bz2", "01;31"), (".bz", "01;31"), (".tbz", "01;31"), (".tbz2", "01;31"), (".tz", "01;31"), (".deb", "01;31"), (".rpm", "01;31"), (".jar", "01;31"), (".war", "01;31"), (".ear", "01;31"), (".sar", "01;31"), (".rar", "01;31"), (".alz", "01;31"), (".ace", "01;31"), (".zoo", "01;31"), (".cpio", "01;31"), (".7z", "01;31"), (".rz", "01;31"), (".cab", "01;31"), (".wim", "01;31"), (".swm", "01;31"), (".dwm", "01;31"), (".esd", "01;31"), // Image formats (".avif", "01;35"), (".jpg", "01;35"), (".jpeg", "01;35"), (".mjpg", "01;35"), (".mjpeg", "01;35"), (".gif", "01;35"), (".bmp", "01;35"), (".pbm", "01;35"), (".pgm", "01;35"), (".ppm", "01;35"), (".tga", "01;35"), (".xbm", "01;35"), (".xpm", "01;35"), (".tif", "01;35"), (".tiff", "01;35"), (".png", "01;35"), (".svg", "01;35"), (".svgz", "01;35"), (".mng", "01;35"), (".pcx", "01;35"), (".mov", "01;35"), (".mpg", "01;35"), (".mpeg", "01;35"), (".m2v", "01;35"), (".mkv", "01;35"), (".webm", "01;35"), (".webp", "01;35"), (".ogm", "01;35"), (".mp4", "01;35"), (".m4v", "01;35"), (".mp4v", "01;35"), (".vob", "01;35"), (".qt", "01;35"), (".nuv", "01;35"), (".wmv", "01;35"), (".asf", "01;35"), (".rm", "01;35"), (".rmvb", "01;35"), (".flc", "01;35"), (".avi", "01;35"), (".fli", "01;35"), (".flv", "01;35"), (".gl", "01;35"), (".dl", "01;35"), (".xcf", "01;35"), (".xwd", "01;35"), (".yuv", "01;35"), (".cgm", "01;35"), (".emf", "01;35"), // https://wiki.xiph.org/MIME_Types_and_File_Extensions (".ogv", "01;35"), (".ogx", "01;35"), // Audio formats (".aac", "00;36"), (".au", "00;36"), (".flac", "00;36"), (".m4a", "00;36"), (".mid", "00;36"), (".midi", "00;36"), (".mka", "00;36"), (".mp3", "00;36"), (".mpc", "00;36"), (".ogg", "00;36"), (".ra", "00;36"), (".wav", "00;36"), // https://wiki.xiph.org/MIME_Types_and_File_Extensions (".oga", "00;36"), (".opus", "00;36"), (".spx", "00;36"), (".xspf", "00;36"), // Backup files ("*~", "00;90"), ("*#", "00;90"), (".bak", "00;90"), (".old", "00;90"), (".orig", "00;90"), (".part", "00;90"), (".rej", "00;90"), (".swp", "00;90"), (".tmp", "00;90"), (".dpkg-dist", "00;90"), (".dpkg-old", "00;90"), (".ucf-dist", "00;90"), (".ucf-new", "00;90"), (".ucf-old", "00;90"), (".rpmnew", "00;90"), (".rpmorig", "00;90"), (".rpmsave", "00;90"), ]; /// Below are the terminal color capabilities pub static FILE_ATTRIBUTE_CODES: &[(&str, &str)] = &[ ("normal", "no"), ("norm", "no"), ("file", "fi"), ("reset", "rs"), ("dir", "di"), ("lnk", "ln"), ("link", "ln"), ("symlink", "ln"), ("orphan", "or"), ("missing", "mi"), ("fifo", "pi"), ("pipe", "pi"), ("sock", "so"), ("blk", "bd"), ("block", "bd"), ("chr", "cd"), ("char", "cd"), ("door", "do"), ("exec", "ex"), ("left", "lc"), ("leftcode", "lc"), ("right", "rc"), ("rightcode", "rc"), ("end", "ec"), ("endcode", "ec"), ("suid", "su"), ("setuid", "su"), ("sgid", "sg"), ("setgid", "sg"), ("sticky", "st"), ("other_writable", "ow"), ("owr", "ow"), ("sticky_other_writable", "tw"), ("owt", "tw"), ("capability", "ca"), ("multihardlink", "mh"), ("clrtoeol", "cl"), ]; uucore-0.0.30/src/lib/features/custom_tz_fmt.rs000064400000000000000000000036701046102023000176320ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use chrono::{TimeZone, Utc}; use chrono_tz::{OffsetName, Tz}; use iana_time_zone::get_timezone; /// Get the alphabetic abbreviation of the current timezone. /// /// For example, "UTC" or "CET" or "PDT" fn timezone_abbreviation() -> String { let tz = match std::env::var("TZ") { // TODO Support other time zones... Ok(s) if s == "UTC0" || s.is_empty() => Tz::Etc__UTC, _ => match get_timezone() { Ok(tz_str) => tz_str.parse().unwrap(), Err(_) => Tz::Etc__UTC, }, }; let offset = tz.offset_from_utc_date(&Utc::now().date_naive()); offset.abbreviation().unwrap_or("UTC").to_string() } /// Adapt the given string to be accepted by the chrono library crate. /// /// # Arguments /// /// fmt: the format of the string /// /// # Return /// /// A string that can be used as parameter of the chrono functions that use formats pub fn custom_time_format(fmt: &str) -> String { // TODO - Revisit when chrono 0.5 is released. https://github.com/chronotope/chrono/issues/970 // GNU `date` uses `%N` for nano seconds, however the `chrono` crate uses `%f`. fmt.replace("%N", "%f") .replace("%Z", timezone_abbreviation().as_ref()) } #[cfg(test)] mod tests { use super::{custom_time_format, timezone_abbreviation}; #[test] fn test_custom_time_format() { assert_eq!(custom_time_format("%Y-%m-%d %H-%M-%S"), "%Y-%m-%d %H-%M-%S"); assert_eq!(custom_time_format("%d-%m-%Y %H-%M-%S"), "%d-%m-%Y %H-%M-%S"); assert_eq!(custom_time_format("%Y-%m-%d %H-%M-%S"), "%Y-%m-%d %H-%M-%S"); assert_eq!( custom_time_format("%Y-%m-%d %H-%M-%S.%N"), "%Y-%m-%d %H-%M-%S.%f" ); assert_eq!(custom_time_format("%Z"), timezone_abbreviation()); } } uucore-0.0.30/src/lib/features/encoding.rs000064400000000000000000000141071046102023000165200ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (encodings) lsbf msbf // spell-checker:ignore unpadded use crate::error::{UResult, USimpleError}; use data_encoding::Encoding; use data_encoding_macro::new_encoding; use std::collections::VecDeque; // Re-export for the faster decoding/encoding logic pub mod for_base_common { pub use data_encoding::*; } pub mod for_cksum { pub use data_encoding::BASE64; } #[derive(Clone, Copy, Debug)] pub enum Format { Base64, Base64Url, Base32, Base32Hex, Base16, Base2Lsbf, Base2Msbf, Z85, } pub const BASE2LSBF: Encoding = new_encoding! { symbols: "01", bit_order: LeastSignificantFirst, }; pub const BASE2MSBF: Encoding = new_encoding! { symbols: "01", bit_order: MostSignificantFirst, }; pub struct Z85Wrapper {} pub struct EncodingWrapper { pub alphabet: &'static [u8], pub encoding: Encoding, pub unpadded_multiple: usize, pub valid_decoding_multiple: usize, } impl EncodingWrapper { pub fn new( encoding: Encoding, valid_decoding_multiple: usize, unpadded_multiple: usize, alphabet: &'static [u8], ) -> Self { assert!(valid_decoding_multiple > 0); assert!(unpadded_multiple > 0); assert!(!alphabet.is_empty()); Self { alphabet, encoding, unpadded_multiple, valid_decoding_multiple, } } } pub trait SupportsFastDecodeAndEncode { /// Returns the list of characters used by this encoding fn alphabet(&self) -> &'static [u8]; fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()>; fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()>; /// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance: /// /// "The quick brown" /// /// is 15 characters (divisible by 3), so it is encoded in Base64 without padding: /// /// "VGhlIHF1aWNrIGJyb3du" /// /// While: /// /// "The quick brown fox" /// /// is 19 characters, which is not divisible by 3, so its Base64 representation has padding: /// /// "VGhlIHF1aWNrIGJyb3duIGZveA==" /// /// The encoding performed by `fast_encode` depends on this number being correct. fn unpadded_multiple(&self) -> usize; /// Data to decode must be a length that is multiple of this number /// /// The decoding performed by `fast_decode` depends on this number being correct. fn valid_decoding_multiple(&self) -> usize; } impl SupportsFastDecodeAndEncode for Z85Wrapper { fn alphabet(&self) -> &'static [u8] { // Z85 alphabet b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" } fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { if input.first() == Some(&b'#') { return Err(USimpleError::new(1, "error: invalid input".to_owned())); } let decode_result = match z85::decode(input) { Ok(ve) => ve, Err(_de) => { return Err(USimpleError::new(1, "error: invalid input".to_owned())); } }; output.extend_from_slice(&decode_result); Ok(()) } fn valid_decoding_multiple(&self) -> usize { 5 } fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { // According to the spec we should not accept inputs whose len is not a multiple of 4. // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. if input.len() % 4 != 0 { return Err(USimpleError::new( 1, "error: invalid input (length must be multiple of 4 characters)".to_owned(), )); } let string = z85::encode(input); output.extend(string.as_bytes()); Ok(()) } fn unpadded_multiple(&self) -> usize { 4 } } impl SupportsFastDecodeAndEncode for EncodingWrapper { fn alphabet(&self) -> &'static [u8] { self.alphabet } // Adapted from `decode` in the "data-encoding" crate fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { let decode_len_result = match self.encoding.decode_len(input.len()) { Ok(us) => us, Err(_de) => { return Err(USimpleError::new(1, "error: invalid input".to_owned())); } }; let output_len = output.len(); output.resize(output_len + decode_len_result, 0); match self.encoding.decode_mut(input, &mut (output[output_len..])) { Ok(us) => { // See: // https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut // "Returns the length of the decoded output. This length may be smaller than the output length if the input contained padding or ignored characters. The output bytes after the returned length are not initialized and should not be read." output.truncate(output_len + us); } Err(_de) => { return Err(USimpleError::new(1, "error: invalid input".to_owned())); } } Ok(()) } fn valid_decoding_multiple(&self) -> usize { self.valid_decoding_multiple } // Adapted from `encode_append` in the "data-encoding" crate fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { let output_len = output.len(); output.resize(output_len + self.encoding.encode_len(input.len()), 0); let make_contiguous_result = output.make_contiguous(); self.encoding .encode_mut(input, &mut (make_contiguous_result[output_len..])); Ok(()) } fn unpadded_multiple(&self) -> usize { self.unpadded_multiple } } uucore-0.0.30/src/lib/features/entries.rs000064400000000000000000000327701046102023000164110ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) Passwd cstr fnam gecos ngroups egid //! Get password/group file entry //! //! # Examples: //! //! ``` //! use uucore::entries::{self, Locate}; //! //! let root_group = if cfg!(any(target_os = "linux", target_os = "android")) { //! "root" //! } else { //! "wheel" //! }; //! //! assert_eq!("root", entries::uid2usr(0).unwrap()); //! assert_eq!(0, entries::usr2uid("root").unwrap()); //! assert!(entries::gid2grp(0).is_ok()); //! assert!(entries::grp2gid(root_group).is_ok()); //! //! assert!(entries::Passwd::locate(0).is_ok()); //! assert!(entries::Passwd::locate("0").is_ok()); //! assert!(entries::Passwd::locate("root").is_ok()); //! //! assert!(entries::Group::locate(0).is_ok()); //! assert!(entries::Group::locate("0").is_ok()); //! assert!(entries::Group::locate(root_group).is_ok()); //! ``` #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] use libc::time_t; use libc::{c_char, c_int, gid_t, uid_t}; use libc::{getgrgid, getgrnam, getgroups}; use libc::{getpwnam, getpwuid, group, passwd}; use std::ffi::{CStr, CString}; use std::io::Error as IOError; use std::io::ErrorKind; use std::io::Result as IOResult; use std::ptr; use std::sync::{LazyLock, Mutex}; extern "C" { /// From: `` /// > The getgrouplist() function scans the group database to obtain /// > the list of groups that user belongs to. fn getgrouplist( name: *const c_char, gid: gid_t, groups: *mut gid_t, ngroups: *mut c_int, ) -> c_int; } /// From: `` /// > getgroups() returns the supplementary group IDs of the calling /// > process in list. /// > If size is zero, list is not modified, but the total number of /// > supplementary group IDs for the process is returned. This allows /// > the caller to determine the size of a dynamically allocated list /// > to be used in a further call to getgroups(). pub fn get_groups() -> IOResult> { let mut groups = Vec::new(); loop { let ngroups = match unsafe { getgroups(0, ptr::null_mut()) } { -1 => return Err(IOError::last_os_error()), // Not just optimization; 0 would mess up the next call 0 => return Ok(Vec::new()), n => n, }; // This is a small buffer, so we can afford to zero-initialize it and // use safe Vec operations groups.resize(ngroups.try_into().unwrap(), 0); let res = unsafe { getgroups(ngroups, groups.as_mut_ptr()) }; if res == -1 { let err = IOError::last_os_error(); if err.raw_os_error() == Some(libc::EINVAL) { // Number of groups has increased, retry continue; } else { return Err(err); } } else { // Number of groups may have decreased groups.truncate(res.try_into().unwrap()); return Ok(groups); } } } /// The list of group IDs returned from GNU's `groups` and GNU's `id --groups` /// starts with the effective group ID (egid). /// This is a wrapper for `get_groups()` to mimic this behavior. /// /// If `arg_id` is `None` (default), `get_groups_gnu` moves the effective /// group id (egid) to the first entry in the returned Vector. /// If `arg_id` is `Some(x)`, `get_groups_gnu` moves the id with value `x` /// to the first entry in the returned Vector. This might be necessary /// for `id --groups --real` if `gid` and `egid` are not equal. /// /// From: `` /// > As implied by the definition of supplementary groups, the /// > effective group ID may appear in the array returned by /// > getgroups() or it may be returned only by getegid(). Duplication /// > may exist, but the application needs to call getegid() to be sure /// > of getting all of the information. Various implementation /// > variations and administrative sequences cause the set of groups /// > appearing in the result of getgroups() to vary in order and as to /// > whether the effective group ID is included, even when the set of /// > groups is the same (in the mathematical sense of ``set''). (The /// > history of a process and its parents could affect the details of /// > the result.) #[cfg(all(unix, not(target_os = "redox"), feature = "process"))] pub fn get_groups_gnu(arg_id: Option) -> IOResult> { let groups = get_groups()?; let egid = arg_id.unwrap_or_else(crate::features::process::getegid); Ok(sort_groups(groups, egid)) } #[cfg(all(unix, not(target_os = "redox"), feature = "process"))] fn sort_groups(mut groups: Vec, egid: gid_t) -> Vec { if let Some(index) = groups.iter().position(|&x| x == egid) { groups[..=index].rotate_right(1); } else { groups.insert(0, egid); } groups } #[derive(Clone, Debug)] pub struct Passwd { /// AKA passwd.pw_name pub name: String, /// AKA passwd.pw_uid pub uid: uid_t, /// AKA passwd.pw_gid pub gid: gid_t, /// AKA passwd.pw_gecos pub user_info: Option, /// AKA passwd.pw_shell pub user_shell: Option, /// AKA passwd.pw_dir pub user_dir: Option, /// AKA passwd.pw_passwd pub user_passwd: Option, /// AKA passwd.pw_class #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub user_access_class: Option, /// AKA passwd.pw_change #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub passwd_change_time: time_t, /// AKA passwd.pw_expire #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] pub expiration: time_t, } /// # Safety /// ptr must point to a valid C string. /// /// Returns None if ptr is null. unsafe fn cstr2string(ptr: *const c_char) -> Option { if ptr.is_null() { None } else { Some(CStr::from_ptr(ptr).to_string_lossy().into_owned()) } } impl Passwd { /// # Safety /// All the pointed-to strings must be valid and not change while /// the function runs. That means PW_LOCK must be held. unsafe fn from_raw(raw: passwd) -> Self { Self { name: cstr2string(raw.pw_name).expect("passwd without name"), uid: raw.pw_uid, gid: raw.pw_gid, #[cfg(not(all( target_os = "android", any(target_arch = "x86", target_arch = "arm") )))] user_info: cstr2string(raw.pw_gecos), #[cfg(all(target_os = "android", any(target_arch = "x86", target_arch = "arm")))] user_info: None, user_shell: cstr2string(raw.pw_shell), user_dir: cstr2string(raw.pw_dir), user_passwd: cstr2string(raw.pw_passwd), #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] user_access_class: cstr2string(raw.pw_class), #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] passwd_change_time: raw.pw_change, #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] expiration: raw.pw_expire, } } /// This is a wrapper function for `libc::getgrouplist`. /// /// From: `` /// > If the number of groups of which user is a member is less than or /// > equal to *ngroups, then the value *ngroups is returned. /// > If the user is a member of more than *ngroups groups, then /// > getgrouplist() returns -1. In this case, the value returned in /// > *ngroups can be used to resize the buffer passed to a further /// > call getgrouplist(). /// /// However, on macOS/darwin (and maybe others?) `getgrouplist` does /// not update `ngroups` if `ngroups` is too small. Therefore, if not /// updated by `getgrouplist`, `ngroups` needs to be increased in a /// loop until `getgrouplist` stops returning -1. pub fn belongs_to(&self) -> Vec { let mut ngroups: c_int = 8; let mut ngroups_old: c_int; let mut groups = vec![0; ngroups.try_into().unwrap()]; let name = CString::new(self.name.as_bytes()).unwrap(); loop { ngroups_old = ngroups; if unsafe { getgrouplist(name.as_ptr(), self.gid, groups.as_mut_ptr(), &mut ngroups) } == -1 { if ngroups == ngroups_old { ngroups *= 2; } groups.resize(ngroups.try_into().unwrap(), 0); } else { break; } } let ngroups = ngroups.try_into().unwrap(); assert!(ngroups <= groups.len()); groups.truncate(ngroups); groups } } #[derive(Clone, Debug)] pub struct Group { /// AKA group.gr_name pub name: String, /// AKA group.gr_gid pub gid: gid_t, } impl Group { /// # Safety /// gr_name must be valid and not change while /// the function runs. That means PW_LOCK must be held. unsafe fn from_raw(raw: group) -> Self { Self { name: cstr2string(raw.gr_name).expect("group without name"), gid: raw.gr_gid, } } } /// Fetch desired entry. pub trait Locate { fn locate(key: K) -> IOResult where Self: ::std::marker::Sized; } // These functions are not thread-safe: // > The return value may point to a static area, and may be // > overwritten by subsequent calls to getpwent(3), getpwnam(), // > or getpwuid(). // This applies not just to the struct but also the strings it points // to, so we must copy all the data we want before releasing the lock. // (Technically we must also ensure that the raw functions aren't being called // anywhere else in the program.) static PW_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); macro_rules! f { ($fnam:ident, $fid:ident, $t:ident, $st:ident) => { impl Locate<$t> for $st { fn locate(k: $t) -> IOResult { let _guard = PW_LOCK.lock(); // SAFETY: We're holding PW_LOCK. unsafe { let data = $fid(k); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { // FIXME: Resource limits, signals and I/O failure may // cause this too. See getpwnam(3). // errno must be set to zero before the call. We can // use libc::__errno_location() on some platforms. // The same applies for the two cases below. Err(IOError::new( ErrorKind::NotFound, format!("No such id: {}", k), )) } } } } impl<'a> Locate<&'a str> for $st { fn locate(k: &'a str) -> IOResult { let _guard = PW_LOCK.lock(); if let Ok(id) = k.parse::<$t>() { // SAFETY: We're holding PW_LOCK. unsafe { let data = $fid(id); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { Err(IOError::new( ErrorKind::NotFound, format!("No such id: {}", id), )) } } } else { // SAFETY: We're holding PW_LOCK. unsafe { let cstring = CString::new(k).unwrap(); let data = $fnam(cstring.as_ptr()); if !data.is_null() { Ok($st::from_raw(ptr::read(data as *const _))) } else { Err(IOError::new( ErrorKind::NotFound, format!("Not found: {}", k), )) } } } } } }; } f!(getpwnam, getpwuid, uid_t, Passwd); f!(getgrnam, getgrgid, gid_t, Group); #[inline] pub fn uid2usr(id: uid_t) -> IOResult { Passwd::locate(id).map(|p| p.name) } #[inline] pub fn gid2grp(id: gid_t) -> IOResult { Group::locate(id).map(|p| p.name) } #[inline] pub fn usr2uid(name: &str) -> IOResult { Passwd::locate(name).map(|p| p.uid) } #[inline] pub fn grp2gid(name: &str) -> IOResult { Group::locate(name).map(|p| p.gid) } #[cfg(test)] mod test { use super::*; #[test] fn test_sort_groups() { assert_eq!(sort_groups(vec![1, 2, 3], 4), vec![4, 1, 2, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 3), vec![3, 1, 2]); assert_eq!(sort_groups(vec![1, 2, 3], 2), vec![2, 1, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 1), vec![1, 2, 3]); assert_eq!(sort_groups(vec![1, 2, 3], 0), vec![0, 1, 2, 3]); } #[test] fn test_entries_get_groups_gnu() { if let Ok(mut groups) = get_groups() { if let Some(last) = groups.pop() { groups.insert(0, last); assert_eq!(get_groups_gnu(Some(last)).unwrap(), groups); } } } } uucore-0.0.30/src/lib/features/format/argument.rs000064400000000000000000000077071046102023000200540ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use crate::{ error::set_exit_code, features::format::num_parser::{ParseError, ParsedNumber}, quoting_style::{escape_name, Quotes, QuotingStyle}, show_error, show_warning, }; use os_display::Quotable; use std::ffi::OsStr; /// An argument for formatting /// /// Each of these variants is only accepted by their respective directives. For /// example, [`FormatArgument::Char`] requires a `%c` directive. /// /// The [`FormatArgument::Unparsed`] variant contains a string that can be /// parsed into other types. This is used by the `printf` utility. #[derive(Clone, Debug)] pub enum FormatArgument { Char(char), String(String), UnsignedInt(u64), SignedInt(i64), Float(f64), /// Special argument that gets coerced into the other variants Unparsed(String), } pub trait ArgumentIter<'a>: Iterator { fn get_char(&mut self) -> u8; fn get_i64(&mut self) -> i64; fn get_u64(&mut self) -> u64; fn get_f64(&mut self) -> f64; fn get_str(&mut self) -> &'a str; } impl<'a, T: Iterator> ArgumentIter<'a> for T { fn get_char(&mut self) -> u8 { let Some(next) = self.next() else { return b'\0'; }; match next { FormatArgument::Char(c) => *c as u8, FormatArgument::Unparsed(s) => s.bytes().next().unwrap_or(b'\0'), _ => b'\0', } } fn get_u64(&mut self) -> u64 { let Some(next) = self.next() else { return 0; }; match next { FormatArgument::UnsignedInt(n) => *n, FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s), _ => 0, } } fn get_i64(&mut self) -> i64 { let Some(next) = self.next() else { return 0; }; match next { FormatArgument::SignedInt(n) => *n, FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s), _ => 0, } } fn get_f64(&mut self) -> f64 { let Some(next) = self.next() else { return 0.0; }; match next { FormatArgument::Float(n) => *n, FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s), _ => 0.0, } } fn get_str(&mut self) -> &'a str { match self.next() { Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s, _ => "", } } } fn extract_value(p: Result>, input: &str) -> T { match p { Ok(v) => v, Err(e) => { set_exit_code(1); let input = escape_name( OsStr::new(input), &QuotingStyle::C { quotes: Quotes::None, }, ); match e { ParseError::Overflow => { show_error!("{}: Numerical result out of range", input.quote()); Default::default() } ParseError::NotNumeric => { show_error!("{}: expected a numeric value", input.quote()); Default::default() } ParseError::PartialMatch(v, rest) => { let bytes = input.as_encoded_bytes(); if !bytes.is_empty() && bytes[0] == b'\'' { show_warning!( "{}: character(s) following character constant have been ignored", &rest, ); } else { show_error!("{}: value not completely converted", input.quote()); } v } } } } } uucore-0.0.30/src/lib/features/format/escape.rs000064400000000000000000000110311046102023000174530ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Parsing of escape sequences #[derive(Debug)] pub enum EscapedChar { /// A single byte Byte(u8), /// A unicode character Char(char), /// A character prefixed with a backslash (i.e. an invalid escape sequence) Backslash(u8), /// Specifies that the string should stop (`\c`) End, } #[repr(u8)] #[derive(Clone, Copy)] enum Base { Oct = 8, Hex = 16, } impl Base { fn max_digits(&self) -> u8 { match self { Self::Oct => 3, Self::Hex => 2, } } fn convert_digit(&self, c: u8) -> Option { match self { Self::Oct => { if matches!(c, b'0'..=b'7') { Some(c - b'0') } else { None } } Self::Hex => match c { b'0'..=b'9' => Some(c - b'0'), b'A'..=b'F' => Some(c - b'A' + 10), b'a'..=b'f' => Some(c - b'a' + 10), _ => None, }, } } } /// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences fn parse_code(input: &mut &[u8], base: Base) -> Option { // All arithmetic on `ret` needs to be wrapping, because octal input can // take 3 digits, which is 9 bits, and therefore more than what fits in a // `u8`. GNU just seems to wrap these values. // Note that if we instead make `ret` a `u32` and use `char::from_u32` will // yield incorrect results because it will interpret values larger than // `u8::MAX` as unicode. let [c, rest @ ..] = input else { return None }; let mut ret = base.convert_digit(*c)?; *input = rest; for _ in 1..base.max_digits() { let [c, rest @ ..] = input else { break }; let Some(n) = base.convert_digit(*c) else { break; }; ret = ret.wrapping_mul(base as u8).wrapping_add(n); *input = rest; } Some(ret) } // spell-checker:disable-next /// Parse `\uHHHH` and `\UHHHHHHHH` // TODO: This should print warnings and possibly halt execution when it fails to parse // TODO: If the character cannot be converted to u32, the input should be printed. fn parse_unicode(input: &mut &[u8], digits: u8) -> Option { let (c, rest) = input.split_first()?; let mut ret = Base::Hex.convert_digit(*c)? as u32; *input = rest; for _ in 1..digits { let (c, rest) = input.split_first()?; let n = Base::Hex.convert_digit(*c)?; ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32); *input = rest; } char::from_u32(ret) } /// Represents an invalid escape sequence. #[derive(Debug)] pub struct EscapeError {} /// Parse an escape sequence, like `\n` or `\xff`, etc. pub fn parse_escape_code(rest: &mut &[u8]) -> Result { if let [c, new_rest @ ..] = rest { // This is for the \NNN syntax for octal sequences. // Note that '0' is intentionally omitted because that // would be the \0NNN syntax. if let b'1'..=b'7' = c { if let Some(parsed) = parse_code(rest, Base::Oct) { return Ok(EscapedChar::Byte(parsed)); } } *rest = new_rest; match c { b'\\' => Ok(EscapedChar::Byte(b'\\')), b'"' => Ok(EscapedChar::Byte(b'"')), b'a' => Ok(EscapedChar::Byte(b'\x07')), b'b' => Ok(EscapedChar::Byte(b'\x08')), b'c' => Ok(EscapedChar::End), b'e' => Ok(EscapedChar::Byte(b'\x1b')), b'f' => Ok(EscapedChar::Byte(b'\x0c')), b'n' => Ok(EscapedChar::Byte(b'\n')), b'r' => Ok(EscapedChar::Byte(b'\r')), b't' => Ok(EscapedChar::Byte(b'\t')), b'v' => Ok(EscapedChar::Byte(b'\x0b')), b'x' => { if let Some(c) = parse_code(rest, Base::Hex) { Ok(EscapedChar::Byte(c)) } else { Err(EscapeError {}) } } b'0' => Ok(EscapedChar::Byte( parse_code(rest, Base::Oct).unwrap_or(b'\0'), )), b'u' => Ok(EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0'))), b'U' => Ok(EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0'))), c => Ok(EscapedChar::Backslash(*c)), } } else { Ok(EscapedChar::Byte(b'\\')) } } uucore-0.0.30/src/lib/features/format/human.rs000064400000000000000000000043201046102023000173260ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore gnulibs sfmt //! `human`-size formatting //! //! Format sizes like gnulibs human_readable() would use number_prefix::NumberPrefix; #[derive(Copy, Clone, PartialEq)] pub enum SizeFormat { Bytes, Binary, // Powers of 1024, --human-readable, -h Decimal, // Powers of 1000, --si } // There are a few peculiarities to how GNU formats the sizes: // 1. One decimal place is given if and only if the size is smaller than 10 // 2. It rounds sizes up. // 3. The human-readable format uses powers for 1024, but does not display the "i" // that is commonly used to denote Kibi, Mebi, etc. // 4. Kibi and Kilo are denoted differently ("k" and "K", respectively) fn format_prefixed(prefixed: &NumberPrefix) -> String { match prefixed { NumberPrefix::Standalone(bytes) => bytes.to_string(), NumberPrefix::Prefixed(prefix, bytes) => { // Remove the "i" from "Ki", "Mi", etc. if present let prefix_str = prefix.symbol().trim_end_matches('i'); // Check whether we get more than 10 if we round up to the first decimal // because we want do display 9.81 as "9.9", not as "10". if (10.0 * bytes).ceil() >= 100.0 { format!("{:.0}{}", bytes.ceil(), prefix_str) } else { format!("{:.1}{}", (10.0 * bytes).ceil() / 10.0, prefix_str) } } } } pub fn human_readable(size: u64, sfmt: SizeFormat) -> String { match sfmt { SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)), SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)), SizeFormat::Bytes => size.to_string(), } } #[cfg(test)] #[test] fn test_human_readable() { let test_cases = [ (133_456_345, SizeFormat::Binary, "128M"), (12 * 1024 * 1024, SizeFormat::Binary, "12M"), (8500, SizeFormat::Binary, "8.4K"), ]; for &(size, sfmt, expected_str) in &test_cases { assert_eq!(human_readable(size, sfmt), expected_str); } } uucore-0.0.30/src/lib/features/format/mod.rs000064400000000000000000000262101046102023000167770ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! `printf`-style formatting //! //! Rust has excellent formatting capabilities, but the coreutils require very //! specific formatting that needs to work exactly like the GNU utilities. //! Naturally, the GNU behavior is based on the C `printf` functionality. //! //! Additionally, we need support for escape sequences for the `printf` utility. //! //! The [`printf`] and [`sprintf`] functions closely match the behavior of the //! corresponding C functions: the former renders a formatted string //! to stdout, the latter renders to a new [`String`] object. //! //! There are three kinds of parsing that we might want to do: //! //! 1. Parse only `printf` directives (for e.g. `seq`, `dd`) //! 2. Parse only escape sequences (for e.g. `echo`) //! 3. Parse both `printf` specifiers and escape sequences (for e.g. `printf`) //! //! This module aims to combine all three use cases. An iterator parsing each //! of these cases is provided by [`parse_spec_only`], [`parse_escape_only`] //! and [`parse_spec_and_escape`], respectively. //! //! There is a special [`Format`] type, which can be used to parse a format //! string containing exactly one directive and does not use any `*` in that //! directive. This format can be printed in a type-safe manner without failing //! (modulo IO errors). mod argument; mod escape; pub mod human; pub mod num_format; pub mod num_parser; mod spec; pub use argument::*; pub use spec::Spec; use std::{ error::Error, fmt::Display, io::{stdout, Write}, ops::ControlFlow, }; use os_display::Quotable; use crate::error::UError; pub use self::{ escape::{parse_escape_code, EscapedChar}, num_format::Formatter, }; #[derive(Debug)] pub enum FormatError { SpecError(Vec), IoError(std::io::Error), NoMoreArguments, InvalidArgument(FormatArgument), TooManySpecs(Vec), NeedAtLeastOneSpec(Vec), WrongSpecType, InvalidPrecision(String), /// The format specifier ends with a %, as in `%f%`. EndsWithPercent(Vec), /// The escape sequence `\x` appears without a literal hexadecimal value. MissingHex, } impl Error for FormatError {} impl UError for FormatError {} impl From for FormatError { fn from(value: std::io::Error) -> Self { Self::IoError(value) } } impl Display for FormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::SpecError(s) => write!( f, "%{}: invalid conversion specification", String::from_utf8_lossy(s) ), Self::TooManySpecs(s) => write!( f, "format '{}' has too many % directives", String::from_utf8_lossy(s) ), Self::NeedAtLeastOneSpec(s) => write!( f, "format '{}' has no % directive", String::from_utf8_lossy(s) ), Self::EndsWithPercent(s) => { write!(f, "format {} ends in %", String::from_utf8_lossy(s).quote()) } Self::InvalidPrecision(precision) => write!(f, "invalid precision: '{precision}'"), // TODO: Error message below needs some work Self::WrongSpecType => write!(f, "wrong % directive type was given"), Self::IoError(_) => write!(f, "io error"), Self::NoMoreArguments => write!(f, "no more arguments"), Self::InvalidArgument(_) => write!(f, "invalid argument"), Self::MissingHex => write!(f, "missing hexadecimal number in escape"), } } } /// A single item to format pub enum FormatItem { /// A format specifier Spec(Spec), /// A single character Char(C), } pub trait FormatChar { fn write(&self, writer: impl Write) -> std::io::Result>; } impl FormatChar for u8 { fn write(&self, mut writer: impl Write) -> std::io::Result> { writer.write_all(&[*self])?; Ok(ControlFlow::Continue(())) } } impl FormatChar for EscapedChar { fn write(&self, mut writer: impl Write) -> std::io::Result> { match self { Self::Byte(c) => { writer.write_all(&[*c])?; } Self::Char(c) => { write!(writer, "{c}")?; } Self::Backslash(c) => { writer.write_all(&[b'\\', *c])?; } Self::End => return Ok(ControlFlow::Break(())), } Ok(ControlFlow::Continue(())) } } impl FormatItem { pub fn write<'a>( &self, writer: impl Write, args: &mut impl Iterator, ) -> Result, FormatError> { match self { Self::Spec(spec) => spec.write(writer, args)?, Self::Char(c) => return c.write(writer).map_err(FormatError::IoError), }; Ok(ControlFlow::Continue(())) } } /// Parse a format string containing % directives and escape sequences pub fn parse_spec_and_escape( fmt: &[u8], ) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => None, [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) } [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { Ok(spec) => spec, Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } [b'\\', rest @ ..] => { current = rest; Some(match parse_escape_code(&mut current) { Ok(c) => Ok(FormatItem::Char(c)), Err(_) => Err(FormatError::MissingHex), }) } [c, rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(EscapedChar::Byte(*c)))) } }) } /// Parse a format string containing % directives pub fn parse_spec_only( fmt: &[u8], ) -> impl Iterator, FormatError>> + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => None, [b'%'] => Some(Err(FormatError::EndsWithPercent(fmt.to_vec()))), [b'%', b'%', rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(b'%'))) } [b'%', rest @ ..] => { current = rest; let spec = match Spec::parse(&mut current) { Ok(spec) => spec, Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))), }; Some(Ok(FormatItem::Spec(spec))) } [c, rest @ ..] => { current = rest; Some(Ok(FormatItem::Char(*c))) } }) } /// Parse a format string containing escape sequences pub fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { let mut current = fmt; std::iter::from_fn(move || match current { [] => None, [b'\\', rest @ ..] => { current = rest; Some(parse_escape_code(&mut current).unwrap_or(EscapedChar::Backslash(b'x'))) } [c, rest @ ..] => { current = rest; Some(EscapedChar::Byte(*c)) } }) } /// Write a formatted string to stdout. /// /// `format_string` contains the template and `args` contains the /// arguments to render into the template. /// /// See also [`sprintf`], which creates a new formatted [`String`]. /// /// # Examples /// /// ```rust /// use uucore::format::{printf, FormatArgument}; /// /// printf("hello %s", &[FormatArgument::String("world".into())]).unwrap(); /// // prints "hello world" /// ``` pub fn printf<'a>( format_string: impl AsRef<[u8]>, arguments: impl IntoIterator, ) -> Result<(), FormatError> { printf_writer(stdout(), format_string, arguments) } fn printf_writer<'a>( mut writer: impl Write, format_string: impl AsRef<[u8]>, args: impl IntoIterator, ) -> Result<(), FormatError> { let mut args = args.into_iter(); for item in parse_spec_only(format_string.as_ref()) { item?.write(&mut writer, &mut args)?; } Ok(()) } /// Create a new formatted string. /// /// `format_string` contains the template and `args` contains the /// arguments to render into the template. /// /// See also [`printf`], which prints to stdout. /// /// # Examples /// /// ```rust /// use uucore::format::{sprintf, FormatArgument}; /// /// let s = sprintf("hello %s", &[FormatArgument::String("world".into())]).unwrap(); /// let s = std::str::from_utf8(&s).unwrap(); /// assert_eq!(s, "hello world"); /// ``` pub fn sprintf<'a>( format_string: impl AsRef<[u8]>, arguments: impl IntoIterator, ) -> Result, FormatError> { let mut writer = Vec::new(); printf_writer(&mut writer, format_string, arguments)?; Ok(writer) } /// A parsed format for a single float value /// /// This is used by `seq`. It can be constructed with [`Format::parse`] /// and can write a value with [`Format::fmt`]. /// /// It can only accept a single specification without any asterisk parameters. /// If it does get more specifications, it will return an error. pub struct Format { prefix: Vec, suffix: Vec, formatter: F, } impl Format { pub fn parse(format_string: impl AsRef<[u8]>) -> Result { let mut iter = parse_spec_only(format_string.as_ref()); let mut prefix = Vec::new(); let mut spec = None; for item in &mut iter { match item? { FormatItem::Spec(s) => { spec = Some(s); break; } FormatItem::Char(c) => prefix.push(c), } } let Some(spec) = spec else { return Err(FormatError::NeedAtLeastOneSpec( format_string.as_ref().to_vec(), )); }; let formatter = F::try_from_spec(spec)?; let mut suffix = Vec::new(); for item in &mut iter { match item { // If the `format_string` is of the form `%f%f` or // `%f%`, then return an error. Ok(FormatItem::Spec(_)) | Err(FormatError::EndsWithPercent(_)) => { return Err(FormatError::TooManySpecs(format_string.as_ref().to_vec())); } Ok(FormatItem::Char(c)) => suffix.push(c), Err(e) => return Err(e), } } Ok(Self { prefix, suffix, formatter, }) } pub fn fmt(&self, mut w: impl Write, f: F::Input) -> std::io::Result<()> { w.write_all(&self.prefix)?; self.formatter.fmt(&mut w, f)?; w.write_all(&self.suffix)?; Ok(()) } } uucore-0.0.30/src/lib/features/format/num_format.rs000064400000000000000000000547221046102023000204000ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Utilities for formatting numbers in various formats use std::cmp::min; use std::io::Write; use super::{ spec::{CanAsterisk, Spec}, FormatError, }; pub trait Formatter { type Input; fn fmt(&self, writer: impl Write, x: Self::Input) -> std::io::Result<()>; fn try_from_spec(s: Spec) -> Result where Self: Sized; } #[derive(Clone, Copy, Debug)] pub enum UnsignedIntVariant { Decimal, Octal(Prefix), Hexadecimal(Case, Prefix), } #[derive(Clone, Copy, Debug)] pub enum FloatVariant { Decimal, Scientific, Shortest, Hexadecimal, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Case { Lowercase, Uppercase, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Prefix { No, Yes, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ForceDecimal { No, Yes, } #[derive(Clone, Copy, Debug)] pub enum PositiveSign { None, Plus, Space, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum NumberAlignment { Left, RightSpace, RightZero, } pub struct SignedInt { pub width: usize, pub precision: usize, pub positive_sign: PositiveSign, pub alignment: NumberAlignment, } impl Formatter for SignedInt { type Input = i64; fn fmt(&self, writer: impl Write, x: Self::Input) -> std::io::Result<()> { let s = if self.precision > 0 { format!("{:0>width$}", x.abs(), width = self.precision) } else { x.abs().to_string() }; let sign_indicator = get_sign_indicator(self.positive_sign, &x); write_output(writer, sign_indicator, s, self.width, self.alignment) } fn try_from_spec(s: Spec) -> Result { let Spec::SignedInt { width, precision, positive_sign, alignment, } = s else { return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { width, precision, positive_sign, alignment, }) } } pub struct UnsignedInt { pub variant: UnsignedIntVariant, pub width: usize, pub precision: usize, pub alignment: NumberAlignment, } impl Formatter for UnsignedInt { type Input = u64; fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> { let mut s = match self.variant { UnsignedIntVariant::Decimal => format!("{x}"), UnsignedIntVariant::Octal(_) => format!("{x:o}"), UnsignedIntVariant::Hexadecimal(Case::Lowercase, _) => { format!("{x:x}") } UnsignedIntVariant::Hexadecimal(Case::Uppercase, _) => { format!("{x:X}") } }; // Zeroes do not get a prefix. An octal value does also not get a // prefix if the padded value will not start with a zero. let prefix = match (x, self.variant) { (1.., UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes)) => "0x", (1.., UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes)) => "0X", (1.., UnsignedIntVariant::Octal(Prefix::Yes)) if s.len() >= self.precision => "0", _ => "", }; s = format!("{prefix}{s:0>width$}", width = self.precision); match self.alignment { NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}", width = self.width), NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width), } } fn try_from_spec(s: Spec) -> Result { // A signed int spec might be mapped to an unsigned int spec if no sign is specified let s = if let Spec::SignedInt { width, precision, positive_sign: PositiveSign::None, alignment, } = s { Spec::UnsignedInt { variant: UnsignedIntVariant::Decimal, width, precision, alignment, } } else { s }; let Spec::UnsignedInt { variant, width, precision, alignment, } = s else { return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => 0, Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { variant, width, precision, alignment, }) } } pub struct Float { pub variant: FloatVariant, pub case: Case, pub force_decimal: ForceDecimal, pub width: usize, pub positive_sign: PositiveSign, pub alignment: NumberAlignment, pub precision: usize, } impl Default for Float { fn default() -> Self { Self { variant: FloatVariant::Decimal, case: Case::Lowercase, force_decimal: ForceDecimal::No, width: 0, positive_sign: PositiveSign::None, alignment: NumberAlignment::Left, precision: 6, } } } impl Formatter for Float { type Input = f64; fn fmt(&self, writer: impl Write, x: Self::Input) -> std::io::Result<()> { let mut s = if x.is_finite() { match self.variant { FloatVariant::Decimal => { format_float_decimal(x, self.precision, self.force_decimal) } FloatVariant::Scientific => { format_float_scientific(x, self.precision, self.case, self.force_decimal) } FloatVariant::Shortest => { format_float_shortest(x, self.precision, self.case, self.force_decimal) } FloatVariant::Hexadecimal => { format_float_hexadecimal(x, self.precision, self.case, self.force_decimal) } } } else { format_float_non_finite(x, self.case) }; // The format function will parse `x` together with its sign char, // which should be placed in `sign_indicator`. So drop it here s = if x < 0. { s[1..].to_string() } else { s }; let sign_indicator = get_sign_indicator(self.positive_sign, &x); write_output(writer, sign_indicator, s, self.width, self.alignment) } fn try_from_spec(s: Spec) -> Result where Self: Sized, { let Spec::Float { variant, case, force_decimal, width, positive_sign, alignment, precision, } = s else { return Err(FormatError::WrongSpecType); }; let width = match width { Some(CanAsterisk::Fixed(x)) => x, None => 0, Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; let precision = match precision { Some(CanAsterisk::Fixed(x)) => x, None => { if matches!(variant, FloatVariant::Shortest) { 6 } else { 0 } } Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType), }; Ok(Self { variant, case, force_decimal, width, positive_sign, alignment, precision, }) } } fn get_sign_indicator(sign: PositiveSign, x: &T) -> String { if *x >= T::default() { match sign { PositiveSign::None => String::new(), PositiveSign::Plus => String::from("+"), PositiveSign::Space => String::from(" "), } } else { String::from("-") } } fn format_float_non_finite(f: f64, case: Case) -> String { debug_assert!(!f.is_finite()); let mut s = format!("{f}"); if case == Case::Uppercase { s.make_ascii_uppercase(); } s } fn format_float_decimal(f: f64, precision: usize, force_decimal: ForceDecimal) -> String { if precision == 0 && force_decimal == ForceDecimal::Yes { format!("{f:.0}.") } else { format!("{f:.precision$}") } } fn format_float_scientific( f: f64, precision: usize, case: Case, force_decimal: ForceDecimal, ) -> String { if f == 0.0 { return if force_decimal == ForceDecimal::Yes && precision == 0 { "0.e+00".into() } else { format!("{:.*}e+00", precision, 0.0) }; } let mut exponent: i32 = f.log10().floor() as i32; let mut normalized = f / 10.0_f64.powi(exponent); // If the normalized value will be rounded to a value greater than 10 // we need to correct. if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { normalized /= 10.0; exponent += 1; } let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { "." } else { "" }; let exp_char = match case { Case::Lowercase => 'e', Case::Uppercase => 'E', }; format!("{normalized:.precision$}{additional_dot}{exp_char}{exponent:+03}") } fn format_float_shortest( f: f64, precision: usize, case: Case, force_decimal: ForceDecimal, ) -> String { // Precision here is about how many digits should be displayed // instead of how many digits for the fractional part, this means that if // we pass this to rust's format string, it's always gonna be one less. let precision = precision.saturating_sub(1); if f == 0.0 { return match (force_decimal, precision) { (ForceDecimal::Yes, 0) => "0.".into(), (ForceDecimal::Yes, _) => { format!("{:.*}", precision, 0.0) } (ForceDecimal::No, _) => "0".into(), }; } // Retrieve the exponent. Note that log10 is undefined for negative numbers. // To avoid NaN or zero (due to i32 conversion), use the absolute value of f. let mut exponent = f.abs().log10().floor() as i32; if f != 0.0 && exponent < -4 || exponent > precision as i32 { // Scientific-ish notation (with a few differences) let mut normalized = f / 10.0_f64.powi(exponent); // If the normalized value will be rounded to a value greater than 10 // we need to correct. if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0 { normalized /= 10.0; exponent += 1; } let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { "." } else { "" }; let mut normalized = format!("{normalized:.precision$}"); if force_decimal == ForceDecimal::No { strip_fractional_zeroes_and_dot(&mut normalized); } let exp_char = match case { Case::Lowercase => 'e', Case::Uppercase => 'E', }; format!("{normalized}{additional_dot}{exp_char}{exponent:+03}") } else { // Decimal-ish notation with a few differences: // - The precision works differently and specifies the total number // of digits instead of the digits in the fractional part. // - If we don't force the decimal, `.` and trailing `0` in the fractional part // are trimmed. let decimal_places = (precision as i32 - exponent) as usize; let mut formatted = if decimal_places == 0 && force_decimal == ForceDecimal::Yes { format!("{f:.0}.") } else { format!("{f:.decimal_places$}") }; if force_decimal == ForceDecimal::No { strip_fractional_zeroes_and_dot(&mut formatted); } formatted } } fn format_float_hexadecimal( f: f64, precision: usize, case: Case, force_decimal: ForceDecimal, ) -> String { let (sign, first_digit, mantissa, exponent) = if f == 0.0 { ("", 0, 0, 0) } else { let bits = f.to_bits(); let sign = if (bits >> 63) == 1 { "-" } else { "" }; let exponent_bits = ((bits >> 52) & 0x7ff) as i64; let exponent = exponent_bits - 1023; let mantissa = bits & 0xf_ffff_ffff_ffff; (sign, 1, mantissa, exponent) }; let mut s = match (precision, force_decimal) { (0, ForceDecimal::No) => format!("{sign}0x{first_digit}p{exponent:+}"), (0, ForceDecimal::Yes) => format!("{sign}0x{first_digit}.p{exponent:+}"), _ => format!("{sign}0x{first_digit}.{mantissa:0>13x}p{exponent:+}"), }; if case == Case::Uppercase { s.make_ascii_uppercase(); } s } fn strip_fractional_zeroes_and_dot(s: &mut String) { let mut trim_to = s.len(); for (pos, c) in s.char_indices().rev() { if pos + c.len_utf8() == trim_to && (c == '0' || c == '.') { trim_to = pos; } if c == '.' { s.truncate(trim_to); break; } } } fn write_output( mut writer: impl Write, sign_indicator: String, mut s: String, width: usize, alignment: NumberAlignment, ) -> std::io::Result<()> { // Take length of `sign_indicator`, which could be 0 or 1, into consideration when padding // by storing remaining_width indicating the actual width needed. // Using min() because self.width could be 0, 0usize - 1usize should be avoided let remaining_width = width - min(width, sign_indicator.len()); match alignment { NumberAlignment::Left => write!(writer, "{sign_indicator}{s: { let is_sign = sign_indicator.starts_with('-') || sign_indicator.starts_with('+'); // When sign_indicator is in ['-', '+'] if is_sign && remaining_width > 0 { // Make sure sign_indicator is just next to number, e.g. "% +5.1f" 1 ==> $ +1.0 s = sign_indicator + s.as_str(); write!(writer, "{s:>width$}", width = remaining_width + 1) // Since we now add sign_indicator and s together, plus 1 } else { write!(writer, "{sign_indicator}{s:>remaining_width$}") } } NumberAlignment::RightZero => { write!(writer, "{sign_indicator}{s:0>remaining_width$}") } } } #[cfg(test)] mod test { use crate::format::num_format::{Case, ForceDecimal}; #[test] fn unsigned_octal() { use super::{Formatter, NumberAlignment, Prefix, UnsignedInt, UnsignedIntVariant}; let f = |x| { let mut s = Vec::new(); UnsignedInt { variant: UnsignedIntVariant::Octal(Prefix::Yes), width: 0, precision: 0, alignment: NumberAlignment::Left, } .fmt(&mut s, x) .unwrap(); String::from_utf8(s).unwrap() }; assert_eq!(f(0), "0"); assert_eq!(f(5), "05"); assert_eq!(f(8), "010"); } #[test] fn decimal_float() { use super::format_float_decimal; let f = |x| format_float_decimal(x, 6, ForceDecimal::No); assert_eq!(f(0.0), "0.000000"); assert_eq!(f(1.0), "1.000000"); assert_eq!(f(100.0), "100.000000"); assert_eq!(f(123_456.789), "123456.789000"); assert_eq!(f(12.345_678_9), "12.345679"); assert_eq!(f(1_000_000.0), "1000000.000000"); assert_eq!(f(99_999_999.0), "99999999.000000"); assert_eq!(f(1.999_999_5), "1.999999"); assert_eq!(f(1.999_999_6), "2.000000"); } #[test] fn scientific_float() { use super::format_float_scientific; let f = |x| format_float_scientific(x, 6, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.0), "0.000000e+00"); assert_eq!(f(1.0), "1.000000e+00"); assert_eq!(f(100.0), "1.000000e+02"); assert_eq!(f(123_456.789), "1.234568e+05"); assert_eq!(f(12.345_678_9), "1.234568e+01"); assert_eq!(f(1_000_000.0), "1.000000e+06"); assert_eq!(f(99_999_999.0), "1.000000e+08"); } #[test] fn scientific_float_zero_precision() { use super::format_float_scientific; let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.0), "0e+00"); assert_eq!(f(1.0), "1e+00"); assert_eq!(f(100.0), "1e+02"); assert_eq!(f(123_456.789), "1e+05"); assert_eq!(f(12.345_678_9), "1e+01"); assert_eq!(f(1_000_000.0), "1e+06"); assert_eq!(f(99_999_999.0), "1e+08"); let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.0), "0.e+00"); assert_eq!(f(1.0), "1.e+00"); assert_eq!(f(100.0), "1.e+02"); assert_eq!(f(123_456.789), "1.e+05"); assert_eq!(f(12.345_678_9), "1.e+01"); assert_eq!(f(1_000_000.0), "1.e+06"); assert_eq!(f(99_999_999.0), "1.e+08"); } #[test] fn shortest_float() { use super::format_float_shortest; let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.0), "0"); assert_eq!(f(1.0), "1"); assert_eq!(f(100.0), "100"); assert_eq!(f(123_456.789), "123457"); assert_eq!(f(12.345_678_9), "12.3457"); assert_eq!(f(1_000_000.0), "1e+06"); assert_eq!(f(99_999_999.0), "1e+08"); } #[test] fn shortest_float_force_decimal() { use super::format_float_shortest; let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.0), "0.00000"); assert_eq!(f(1.0), "1.00000"); assert_eq!(f(100.0), "100.000"); assert_eq!(f(123_456.789), "123457."); assert_eq!(f(12.345_678_9), "12.3457"); assert_eq!(f(1_000_000.0), "1.00000e+06"); assert_eq!(f(99_999_999.0), "1.00000e+08"); } #[test] fn shortest_float_force_decimal_zero_precision() { use super::format_float_shortest; let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.0), "0"); assert_eq!(f(1.0), "1"); assert_eq!(f(100.0), "1e+02"); assert_eq!(f(123_456.789), "1e+05"); assert_eq!(f(12.345_678_9), "1e+01"); assert_eq!(f(1_000_000.0), "1e+06"); assert_eq!(f(99_999_999.0), "1e+08"); let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.0), "0."); assert_eq!(f(1.0), "1."); assert_eq!(f(100.0), "1.e+02"); assert_eq!(f(123_456.789), "1.e+05"); assert_eq!(f(12.345_678_9), "1.e+01"); assert_eq!(f(1_000_000.0), "1.e+06"); assert_eq!(f(99_999_999.0), "1.e+08"); } #[test] fn hexadecimal_float() { use super::format_float_hexadecimal; let f = |x| format_float_hexadecimal(x, 6, Case::Lowercase, ForceDecimal::No); // TODO(#7364): These values do not match coreutils output, but are possible correct representations. assert_eq!(f(0.00001), "0x1.4f8b588e368f1p-17"); assert_eq!(f(0.125), "0x1.0000000000000p-3"); assert_eq!(f(256.0), "0x1.0000000000000p+8"); assert_eq!(f(65536.0), "0x1.0000000000000p+16"); assert_eq!(f(-0.00001), "-0x1.4f8b588e368f1p-17"); assert_eq!(f(-0.125), "-0x1.0000000000000p-3"); assert_eq!(f(-256.0), "-0x1.0000000000000p+8"); assert_eq!(f(-65536.0), "-0x1.0000000000000p+16"); let f = |x| format_float_hexadecimal(x, 0, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.125), "0x1p-3"); assert_eq!(f(256.0), "0x1p+8"); assert_eq!(f(-0.125), "-0x1p-3"); assert_eq!(f(-256.0), "-0x1p+8"); let f = |x| format_float_hexadecimal(x, 0, Case::Lowercase, ForceDecimal::Yes); assert_eq!(f(0.125), "0x1.p-3"); assert_eq!(f(256.0), "0x1.p+8"); assert_eq!(f(-0.125), "-0x1.p-3"); assert_eq!(f(-256.0), "-0x1.p+8"); } #[test] fn strip_insignificant_end() { use super::strip_fractional_zeroes_and_dot; let f = |s| { let mut s = String::from(s); strip_fractional_zeroes_and_dot(&mut s); s }; assert_eq!(&f("1000"), "1000"); assert_eq!(&f("1000."), "1000"); assert_eq!(&f("1000.02030"), "1000.0203"); assert_eq!(&f("1000.00000"), "1000"); } #[test] fn shortest_float_abs_value_less_than_one() { use super::format_float_shortest; let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.1171875), "0.117188"); assert_eq!(f(0.01171875), "0.0117188"); assert_eq!(f(0.001171875), "0.00117187"); assert_eq!(f(0.0001171875), "0.000117187"); assert_eq!(f(0.001171875001), "0.00117188"); assert_eq!(f(-0.1171875), "-0.117188"); assert_eq!(f(-0.01171875), "-0.0117188"); assert_eq!(f(-0.001171875), "-0.00117187"); assert_eq!(f(-0.0001171875), "-0.000117187"); assert_eq!(f(-0.001171875001), "-0.00117188"); } #[test] fn shortest_float_switch_decimal_scientific() { use super::format_float_shortest; let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::No); assert_eq!(f(0.001), "0.001"); assert_eq!(f(0.0001), "0.0001"); assert_eq!(f(0.00001), "1e-05"); assert_eq!(f(0.000001), "1e-06"); assert_eq!(f(-0.001), "-0.001"); assert_eq!(f(-0.0001), "-0.0001"); assert_eq!(f(-0.00001), "-1e-05"); assert_eq!(f(-0.000001), "-1e-06"); } } uucore-0.0.30/src/lib/features/format/num_parser.rs000064400000000000000000000327621046102023000204040ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Utilities for parsing numbers in various formats // spell-checker:ignore powf copysign prec inity /// Base for number parsing #[derive(Clone, Copy, PartialEq)] pub enum Base { /// Binary base Binary = 2, /// Octal base Octal = 8, /// Decimal base Decimal = 10, /// Hexadecimal base Hexadecimal = 16, } impl Base { /// Return the digit value of a character in the given base pub fn digit(&self, c: char) -> Option { fn from_decimal(c: char) -> u64 { u64::from(c) - u64::from('0') } match self { Self::Binary => ('0'..='1').contains(&c).then(|| from_decimal(c)), Self::Octal => ('0'..='7').contains(&c).then(|| from_decimal(c)), Self::Decimal => c.is_ascii_digit().then(|| from_decimal(c)), Self::Hexadecimal => match c.to_ascii_lowercase() { '0'..='9' => Some(from_decimal(c)), c @ 'a'..='f' => Some(u64::from(c) - u64::from('a') + 10), _ => None, }, } } } /// Type returned if a number could not be parsed in its entirety #[derive(Debug, PartialEq)] pub enum ParseError<'a, T> { /// The input as a whole makes no sense NotNumeric, /// The beginning of the input made sense and has been parsed, /// while the remaining doesn't. PartialMatch(T, &'a str), /// The integral part has overflowed the requested type, or /// has overflowed the `u64` internal storage when parsing the /// integral part of a floating point number. Overflow, } impl<'a, T> ParseError<'a, T> { fn map(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> { match self { Self::NotNumeric => ParseError::NotNumeric, Self::Overflow => ParseError::Overflow, Self::PartialMatch(v, s) => f(v, s), } } } /// A number parser for binary, octal, decimal, hexadecimal and single characters. /// /// Internally, in order to get the maximum possible precision and cover the full /// range of u64 and i64 without losing precision for f64, the returned number is /// decomposed into: /// - A `base` value /// - A `neg` sign bit /// - A `integral` positive part /// - A `fractional` positive part /// - A `precision` representing the number of digits in the fractional part /// /// If the fractional part cannot be represented on a `u64`, parsing continues /// silently by ignoring non-significant digits. pub struct ParsedNumber { base: Base, negative: bool, integral: u64, fractional: u64, precision: usize, } impl ParsedNumber { fn into_i64(self) -> Option { if self.negative { i64::try_from(-i128::from(self.integral)).ok() } else { i64::try_from(self.integral).ok() } } /// Parse a number as i64. No fractional part is allowed. pub fn parse_i64(input: &str) -> Result> { match Self::parse(input, true) { Ok(v) => v.into_i64().ok_or(ParseError::Overflow), Err(e) => Err(e.map(|v, rest| { v.into_i64() .map(|v| ParseError::PartialMatch(v, rest)) .unwrap_or(ParseError::Overflow) })), } } /// Parse a number as u64. No fractional part is allowed. pub fn parse_u64(input: &str) -> Result> { match Self::parse(input, true) { Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => { Err(ParseError::NotNumeric) } Ok(v) => Ok(v.integral), Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))), } } fn into_f64(self) -> f64 { let n = self.integral as f64 + (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64); if self.negative { -n } else { n } } /// Parse a number as f64 pub fn parse_f64(input: &str) -> Result> { match Self::parse(input, false) { Ok(v) => Ok(v.into_f64()), Err(ParseError::NotNumeric) => Self::parse_f64_special_values(input), Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))), } } fn parse_f64_special_values(input: &str) -> Result> { let (sign, rest) = if let Some(input) = input.strip_prefix('-') { (-1.0, input) } else { (1.0, input) }; let prefix = rest .chars() .take(3) .map(|c| c.to_ascii_lowercase()) .collect::(); let special = match prefix.as_str() { "inf" => f64::INFINITY, "nan" => f64::NAN, _ => return Err(ParseError::NotNumeric), } .copysign(sign); if rest.len() == 3 { Ok(special) } else { Err(ParseError::PartialMatch(special, &rest[3..])) } } #[allow(clippy::cognitive_complexity)] fn parse(input: &str, integral_only: bool) -> Result> { // Parse the "'" prefix separately if let Some(rest) = input.strip_prefix('\'') { let mut chars = rest.char_indices().fuse(); let v = chars.next().map(|(_, c)| Self { base: Base::Decimal, negative: false, integral: u64::from(c), fractional: 0, precision: 0, }); return match (v, chars.next()) { (Some(v), None) => Ok(v), (Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])), (None, _) => Err(ParseError::NotNumeric), }; } // Initial minus sign let (negative, unsigned) = if let Some(input) = input.strip_prefix('-') { (true, input) } else { (false, input) }; // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix // will not be consumed in case the parsable string contains only "0": the leading extra "0" // will have no influence on the result. let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { if let Some(rest) = rest.strip_prefix(['b', 'B']) { (Base::Binary, rest) } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { (Base::Hexadecimal, rest) } else if integral_only { (Base::Octal, unsigned) } else { (Base::Decimal, unsigned) } } else { (Base::Decimal, unsigned) }; if rest.is_empty() { return Err(ParseError::NotNumeric); } // Parse the integral part of the number let mut chars = rest.chars().enumerate().fuse().peekable(); let mut integral = 0u64; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); integral = integral .checked_mul(base as u64) .and_then(|n| n.checked_add(d)) .ok_or(ParseError::Overflow)?; } // Parse the fractional part of the number if there can be one and the input contains // a '.' decimal separator. let (mut fractional, mut precision) = (0u64, 0); if matches!(chars.peek(), Some(&(_, '.'))) && matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only { chars.next(); let mut ended = false; while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { chars.next(); if !ended { if let Some(f) = fractional .checked_mul(base as u64) .and_then(|n| n.checked_add(d)) { (fractional, precision) = (f, precision + 1); } else { ended = true; } } } } // If nothing has been parsed, declare the parsing unsuccessful if let Some((0, _)) = chars.peek() { return Err(ParseError::NotNumeric); } // Return what has been parsed so far. It there are extra characters, mark the // parsing as a partial match. let parsed = Self { base, negative, integral, fractional, precision, }; if let Some((first_unparsed, _)) = chars.next() { Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) } else { Ok(parsed) } } } #[cfg(test)] mod tests { use super::{ParseError, ParsedNumber}; #[test] fn test_decimal_u64() { assert_eq!(Ok(123), ParsedNumber::parse_u64("123")); assert_eq!( Ok(u64::MAX), ParsedNumber::parse_u64(&format!("{}", u64::MAX)) ); assert!(matches!( ParsedNumber::parse_u64("-123"), Err(ParseError::NotNumeric) )); assert!(matches!( ParsedNumber::parse_u64(""), Err(ParseError::NotNumeric) )); assert!(matches!( ParsedNumber::parse_u64("123.15"), Err(ParseError::PartialMatch(123, ".15")) )); } #[test] fn test_decimal_i64() { assert_eq!(Ok(123), ParsedNumber::parse_i64("123")); assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123")); assert!(matches!( ParsedNumber::parse_i64("--123"), Err(ParseError::NotNumeric) )); assert_eq!( Ok(i64::MAX), ParsedNumber::parse_i64(&format!("{}", i64::MAX)) ); assert_eq!( Ok(i64::MIN), ParsedNumber::parse_i64(&format!("{}", i64::MIN)) ); assert!(matches!( ParsedNumber::parse_i64(&format!("{}", u64::MAX)), Err(ParseError::Overflow) )); assert!(matches!( ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)), Err(ParseError::Overflow) )); } #[test] fn test_decimal_f64() { assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123")); assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123")); assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.")); assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.")); assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0")); assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0")); assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15")); assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15")); assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15")); assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15")); assert_eq!( Ok(0.15), ParsedNumber::parse_f64(".150000000000000000000000000231313") ); assert!(matches!(ParsedNumber::parse_f64("1.2.3"), Err(ParseError::PartialMatch(f, ".3")) if f == 1.2)); assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf")); assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf")); assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan()); assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive()); assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan()); assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative()); assert!(matches!(ParsedNumber::parse_f64("-infinity"), Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok()); assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok()); } #[test] fn test_hexadecimal() { assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123")); assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123")); assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE")); assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123")); assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8")); assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1")); assert_eq!(Ok(15.007_812_5), ParsedNumber::parse_f64("0xf.02")); } #[test] fn test_octal() { assert_eq!(Ok(0), ParsedNumber::parse_u64("0")); assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123")); assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123")); assert_eq!(Ok(0), ParsedNumber::parse_u64("00")); assert!(matches!( ParsedNumber::parse_u64("008"), Err(ParseError::PartialMatch(0, "8")) )); assert!(matches!( ParsedNumber::parse_u64("08"), Err(ParseError::PartialMatch(0, "8")) )); assert!(matches!( ParsedNumber::parse_u64("0."), Err(ParseError::PartialMatch(0, ".")) )); } #[test] fn test_binary() { assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011")); assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011")); } } uucore-0.0.30/src/lib/features/format/spec.rs000064400000000000000000000417151046102023000171610ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) intmax ptrdiff padlen use crate::quoting_style::{escape_name, QuotingStyle}; use super::{ num_format::{ self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix, UnsignedIntVariant, }, parse_escape_only, ArgumentIter, FormatChar, FormatError, }; use std::{io::Write, ops::ControlFlow}; /// A parsed specification for formatting a value /// /// This might require more than one argument to resolve width or precision /// values that are given as `*`. #[derive(Debug)] pub enum Spec { Char { width: Option>, align_left: bool, }, String { precision: Option>, width: Option>, align_left: bool, }, EscapedString, QuotedString, SignedInt { width: Option>, precision: Option>, positive_sign: PositiveSign, alignment: NumberAlignment, }, UnsignedInt { variant: UnsignedIntVariant, width: Option>, precision: Option>, alignment: NumberAlignment, }, Float { variant: FloatVariant, case: Case, force_decimal: ForceDecimal, width: Option>, positive_sign: PositiveSign, alignment: NumberAlignment, precision: Option>, }, } /// Precision and width specified might use an asterisk to indicate that they are /// determined by an argument. #[derive(Clone, Copy, Debug)] pub enum CanAsterisk { Fixed(T), Asterisk, } /// Size of the expected type (ignored) /// /// We ignore this parameter entirely, but we do parse it. /// It could be used in the future if the need arises. enum Length { /// signed/unsigned char ("hh") Char, /// signed/unsigned short int ("h") Short, /// signed/unsigned long int ("l") Long, /// signed/unsigned long long int ("ll") LongLong, /// intmax_t ("j") IntMaxT, /// size_t ("z") SizeT, /// ptrdiff_t ("t") PtfDiffT, /// long double ("L") LongDouble, } #[derive(Default, PartialEq, Eq)] struct Flags { minus: bool, plus: bool, space: bool, hash: bool, zero: bool, } impl Flags { pub fn parse(rest: &mut &[u8], index: &mut usize) -> Self { let mut flags = Self::default(); while let Some(x) = rest.get(*index) { match x { b'-' => flags.minus = true, b'+' => flags.plus = true, b' ' => flags.space = true, b'#' => flags.hash = true, b'0' => flags.zero = true, _ => break, } *index += 1; } flags } /// Whether any of the flags is set to true fn any(&self) -> bool { self != &Self::default() } } impl Spec { pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { // Based on the C++ reference, the spec format looks like: // // %[flags][width][.precision][length]specifier // // However, we have already parsed the '%'. let mut index = 0; let start = *rest; let flags = Flags::parse(rest, &mut index); let positive_sign = match flags { Flags { plus: true, .. } => PositiveSign::Plus, Flags { space: true, .. } => PositiveSign::Space, _ => PositiveSign::None, }; let width = eat_asterisk_or_number(rest, &mut index); let precision = if let Some(b'.') = rest.get(index) { index += 1; Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0))) } else { None }; // The `0` flag is ignored if `-` is given or a precision is specified. // So the only case for RightZero, is when `-` is not given and the // precision is none. let alignment = if flags.minus { NumberAlignment::Left } else if flags.zero && precision.is_none() { NumberAlignment::RightZero } else { NumberAlignment::RightSpace }; // We ignore the length. It's not really relevant to printf let _ = Self::parse_length(rest, &mut index); let Some(type_spec) = rest.get(index) else { return Err(&start[..index]); }; index += 1; *rest = &start[index..]; Ok(match type_spec { // GNU accepts minus, plus and space even though they are not used b'c' => { if flags.zero || flags.hash || precision.is_some() { return Err(&start[..index]); } Self::Char { width, align_left: flags.minus, } } b's' => { if flags.zero || flags.hash { return Err(&start[..index]); } Self::String { precision, width, align_left: flags.minus, } } b'b' => { if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } Self::EscapedString } b'q' => { if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } Self::QuotedString } b'd' | b'i' => { if flags.hash { return Err(&start[..index]); } Self::SignedInt { width, precision, alignment, positive_sign, } } c @ (b'u' | b'o' | b'x' | b'X') => { // Normal unsigned integer cannot have a prefix if *c == b'u' && flags.hash { return Err(&start[..index]); } let prefix = if flags.hash { Prefix::Yes } else { Prefix::No }; let variant = match c { b'u' => UnsignedIntVariant::Decimal, b'o' => UnsignedIntVariant::Octal(prefix), b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix), b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), _ => unreachable!(), }; Self::UnsignedInt { variant, precision, width, alignment, } } c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float { width, precision, variant: match c { b'f' | b'F' => FloatVariant::Decimal, b'e' | b'E' => FloatVariant::Scientific, b'g' | b'G' => FloatVariant::Shortest, b'a' | b'A' => FloatVariant::Hexadecimal, _ => unreachable!(), }, force_decimal: if flags.hash { ForceDecimal::Yes } else { ForceDecimal::No }, case: if c.is_ascii_uppercase() { Case::Uppercase } else { Case::Lowercase }, alignment: if flags.zero && !flags.minus { NumberAlignment::RightZero // float should always try to zero pad despite the precision } else { alignment }, positive_sign, }, _ => return Err(&start[..index]), }) } fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option { // Parse 0..N length options, keep the last one // Even though it is just ignored. We might want to use it later and we // should parse those characters. // // TODO: This needs to be configurable: `seq` accepts only one length // param let mut length = None; loop { let new_length = rest.get(*index).and_then(|c| { Some(match c { b'h' => { if let Some(b'h') = rest.get(*index + 1) { *index += 1; Length::Char } else { Length::Short } } b'l' => { if let Some(b'l') = rest.get(*index + 1) { *index += 1; Length::Long } else { Length::LongLong } } b'j' => Length::IntMaxT, b'z' => Length::SizeT, b't' => Length::PtfDiffT, b'L' => Length::LongDouble, _ => return None, }) }); if new_length.is_some() { *index += 1; length = new_length; } else { break; } } length } pub fn write<'a>( &self, mut writer: impl Write, mut args: impl ArgumentIter<'a>, ) -> Result<(), FormatError> { match self { Self::Char { width, align_left } => { let (width, neg_width) = resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default(); write_padded(writer, &[args.get_char()], width, *align_left || neg_width) } Self::String { width, align_left, precision, } => { let (width, neg_width) = resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default(); // GNU does do this truncation on a byte level, see for instance: // printf "%.1s" 🙃 // > � // For now, we let printf panic when we truncate within a code point. // TODO: We need to not use Rust's formatting for aligning the output, // so that we can just write bytes to stdout without panicking. let precision = resolve_asterisk(*precision, &mut args); let s = args.get_str(); let truncated = match precision { Some(p) if p < s.len() => &s[..p], _ => s, }; write_padded( writer, truncated.as_bytes(), width, *align_left || neg_width, ) } Self::EscapedString => { let s = args.get_str(); let mut parsed = Vec::new(); for c in parse_escape_only(s.as_bytes()) { match c.write(&mut parsed)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => { // TODO: This should break the _entire execution_ of printf break; } }; } writer.write_all(&parsed).map_err(FormatError::IoError) } Self::QuotedString => { let s = escape_name( args.get_str().as_ref(), &QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }, ); #[cfg(unix)] let bytes = std::os::unix::ffi::OsStringExt::into_vec(s); #[cfg(not(unix))] let bytes = s.to_string_lossy().as_bytes().to_owned(); writer.write_all(&bytes).map_err(FormatError::IoError) } Self::SignedInt { width, precision, positive_sign, alignment, } => { let width = resolve_asterisk(*width, &mut args).unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0); let i = args.get_i64(); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); } num_format::SignedInt { width, precision, positive_sign: *positive_sign, alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } Self::UnsignedInt { variant, width, precision, alignment, } => { let width = resolve_asterisk(*width, &mut args).unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0); let i = args.get_u64(); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); } num_format::UnsignedInt { variant: *variant, precision, width, alignment: *alignment, } .fmt(writer, i) .map_err(FormatError::IoError) } Self::Float { variant, case, force_decimal, width, positive_sign, alignment, precision, } => { let width = resolve_asterisk(*width, &mut args).unwrap_or(0); let precision = resolve_asterisk(*precision, &mut args).unwrap_or(6); let f = args.get_f64(); if precision as u64 > i32::MAX as u64 { return Err(FormatError::InvalidPrecision(precision.to_string())); } num_format::Float { width, precision, variant: *variant, case: *case, force_decimal: *force_decimal, positive_sign: *positive_sign, alignment: *alignment, } .fmt(writer, f) .map_err(FormatError::IoError) } } } } fn resolve_asterisk<'a>( option: Option>, mut args: impl ArgumentIter<'a>, ) -> Option { match option { None => None, Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)), Some(CanAsterisk::Fixed(w)) => Some(w), } } fn resolve_asterisk_maybe_negative<'a>( option: Option>, mut args: impl ArgumentIter<'a>, ) -> Option<(usize, bool)> { match option { None => None, Some(CanAsterisk::Asterisk) => { let nb = args.get_i64(); if nb < 0 { Some((usize::try_from(-(nb as isize)).ok().unwrap_or(0), true)) } else { Some((usize::try_from(nb).ok().unwrap_or(0), false)) } } Some(CanAsterisk::Fixed(w)) => Some((w, false)), } } fn write_padded( mut writer: impl Write, text: &[u8], width: usize, left: bool, ) -> Result<(), FormatError> { let padlen = width.saturating_sub(text.len()); if left { writer.write_all(text)?; write!(writer, "{: padlen$}", "")?; writer.write_all(text) } .map_err(FormatError::IoError) } fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option> { if let Some(b'*') = rest.get(*index) { *index += 1; Some(CanAsterisk::Asterisk) } else { eat_number(rest, index).map(CanAsterisk::Fixed) } } fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { match rest[*index..].iter().position(|b| !b.is_ascii_digit()) { None | Some(0) => None, Some(i) => { // TODO: This might need to handle errors better // For example in case of overflow. let parsed = std::str::from_utf8(&rest[*index..(*index + i)]) .unwrap() .parse() .unwrap(); *index += i; Some(parsed) } } } uucore-0.0.30/src/lib/features/fs.rs000064400000000000000000001004021046102023000153340ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to manage files and symlinks // spell-checker:ignore backport #[cfg(unix)] use libc::{ mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR, }; use std::collections::HashSet; use std::collections::VecDeque; use std::env; use std::ffi::{OsStr, OsString}; use std::fs; use std::fs::read_dir; use std::hash::Hash; use std::io::Stdin; use std::io::{Error, ErrorKind, Result as IOResult}; #[cfg(unix)] use std::os::unix::{fs::MetadataExt, io::AsRawFd}; use std::path::{Component, Path, PathBuf, MAIN_SEPARATOR}; #[cfg(target_os = "windows")] use winapi_util::AsHandleRef; /// Used to check if the `mode` has its `perm` bit set. /// /// This macro expands to `mode & perm != 0`. #[cfg(unix)] #[macro_export] macro_rules! has { ($mode:expr, $perm:expr) => { $mode & $perm != 0 }; } /// Information to uniquely identify a file pub struct FileInformation( #[cfg(unix)] nix::sys::stat::FileStat, #[cfg(windows)] winapi_util::file::Information, ); impl FileInformation { /// Get information from a currently open file #[cfg(unix)] pub fn from_file(file: &impl AsRawFd) -> IOResult { let stat = nix::sys::stat::fstat(file.as_raw_fd())?; Ok(Self(stat)) } /// Get information from a currently open file #[cfg(target_os = "windows")] pub fn from_file(file: &impl AsHandleRef) -> IOResult { let info = winapi_util::file::information(file.as_handle_ref())?; Ok(Self(info)) } /// Get information for a given path. /// /// If `path` points to a symlink and `dereference` is true, information about /// the link's target will be returned. pub fn from_path(path: impl AsRef, dereference: bool) -> IOResult { #[cfg(unix)] { let stat = if dereference { nix::sys::stat::stat(path.as_ref()) } else { nix::sys::stat::lstat(path.as_ref()) }; Ok(Self(stat?)) } #[cfg(target_os = "windows")] { use std::fs::OpenOptions; use std::os::windows::prelude::*; let mut open_options = OpenOptions::new(); let mut custom_flags = 0; if !dereference { custom_flags |= windows_sys::Win32::Storage::FileSystem::FILE_FLAG_OPEN_REPARSE_POINT; } custom_flags |= windows_sys::Win32::Storage::FileSystem::FILE_FLAG_BACKUP_SEMANTICS; open_options.custom_flags(custom_flags); let file = open_options.read(true).open(path.as_ref())?; Self::from_file(&file) } } pub fn file_size(&self) -> u64 { #[cfg(unix)] { assert!(self.0.st_size >= 0, "File size is negative"); self.0.st_size.try_into().unwrap() } #[cfg(target_os = "windows")] { self.0.file_size() } } #[cfg(windows)] pub fn file_index(&self) -> u64 { self.0.file_index() } pub fn number_of_links(&self) -> u64 { #[cfg(all( unix, not(target_vendor = "apple"), not(target_os = "aix"), not(target_os = "android"), not(target_os = "freebsd"), not(target_os = "netbsd"), not(target_os = "openbsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_arch = "aarch64"), not(target_arch = "riscv64"), not(target_arch = "loongarch64"), not(target_arch = "sparc64"), target_pointer_width = "64" ))] return self.0.st_nlink; #[cfg(all( unix, any( target_vendor = "apple", target_os = "android", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd", target_os = "illumos", target_os = "solaris", target_arch = "aarch64", target_arch = "riscv64", target_arch = "loongarch64", target_arch = "sparc64", not(target_pointer_width = "64") ) ))] return self.0.st_nlink.into(); #[cfg(target_os = "aix")] return self.0.st_nlink.try_into().unwrap(); #[cfg(windows)] return self.0.number_of_links(); } #[cfg(unix)] pub fn inode(&self) -> u64 { #[cfg(all( not(any(target_os = "freebsd", target_os = "netbsd")), target_pointer_width = "64" ))] return self.0.st_ino; #[cfg(any( target_os = "freebsd", target_os = "netbsd", not(target_pointer_width = "64") ))] return self.0.st_ino.into(); } } #[cfg(unix)] impl PartialEq for FileInformation { fn eq(&self, other: &Self) -> bool { self.0.st_dev == other.0.st_dev && self.0.st_ino == other.0.st_ino } } #[cfg(target_os = "windows")] impl PartialEq for FileInformation { fn eq(&self, other: &Self) -> bool { self.0.volume_serial_number() == other.0.volume_serial_number() && self.0.file_index() == other.0.file_index() } } impl Eq for FileInformation {} impl Hash for FileInformation { fn hash(&self, state: &mut H) { #[cfg(unix)] { self.0.st_dev.hash(state); self.0.st_ino.hash(state); } #[cfg(target_os = "windows")] { self.0.volume_serial_number().hash(state); self.0.file_index().hash(state); } } } /// Controls how symbolic links should be handled when canonicalizing a path. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum MissingHandling { /// Return an error if any part of the path is missing. Normal, /// Resolve symbolic links, ignoring errors on the final component. Existing, /// Resolve symbolic links, ignoring errors on the non-final components. Missing, } /// Controls when symbolic links are resolved #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ResolveMode { /// Do not resolve any symbolic links. None, /// Resolve symlinks as encountered when processing the path Physical, /// Resolve '..' elements before symlinks Logical, } /// Normalize a path by removing relative information /// For example, convert 'bar/../foo/bar.txt' => 'foo/bar.txt' /// copied from `` /// both projects are MIT `` /// for std impl progress see rfc `` /// replace this once that lands pub fn normalize_path(path: &Path) -> PathBuf { let mut components = path.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() { components.next(); PathBuf::from(c.as_os_str()) } else { PathBuf::new() }; for component in components { match component { Component::Prefix(..) => unreachable!(), Component::RootDir => { ret.push(component.as_os_str()); } Component::CurDir => {} Component::ParentDir => { ret.pop(); } Component::Normal(c) => { ret.push(c); } } } ret } fn resolve_symlink>(path: P) -> IOResult> { let result = if fs::symlink_metadata(&path)?.file_type().is_symlink() { Some(fs::read_link(&path)?) } else { None }; Ok(result) } enum OwningComponent { Prefix(OsString), RootDir, CurDir, ParentDir, Normal(OsString), } impl OwningComponent { fn as_os_str(&self) -> &OsStr { match self { Self::Prefix(s) => s.as_os_str(), Self::RootDir => Component::RootDir.as_os_str(), Self::CurDir => Component::CurDir.as_os_str(), Self::ParentDir => Component::ParentDir.as_os_str(), Self::Normal(s) => s.as_os_str(), } } } impl<'a> From> for OwningComponent { fn from(comp: Component<'a>) -> Self { match comp { Component::Prefix(_) => Self::Prefix(comp.as_os_str().to_os_string()), Component::RootDir => Self::RootDir, Component::CurDir => Self::CurDir, Component::ParentDir => Self::ParentDir, Component::Normal(s) => Self::Normal(s.to_os_string()), } } } /// Return the canonical, absolute form of a path. /// /// This function is a generalization of [`std::fs::canonicalize`] that /// allows controlling how symbolic links are resolved and how to deal /// with missing components. It returns the canonical, absolute form of /// a path. /// The `miss_mode` parameter controls how missing path elements are handled /// /// * [`MissingHandling::Normal`] makes this function behave like /// [`std::fs::canonicalize`], resolving symbolic links and returning /// an error if the path does not exist. /// * [`MissingHandling::Missing`] makes this function ignore non-final /// components of the path that could not be resolved. /// * [`MissingHandling::Existing`] makes this function return an error /// if the final component of the path does not exist. /// /// The `res_mode` parameter controls how symbolic links are /// resolved: /// /// * [`ResolveMode::None`] makes this function not try to resolve /// any symbolic links. /// * [`ResolveMode::Physical`] makes this function resolve symlinks as they /// are encountered /// * [`ResolveMode::Logical`] makes this function resolve '..' components /// before symlinks /// #[allow(clippy::cognitive_complexity)] pub fn canonicalize>( original: P, miss_mode: MissingHandling, res_mode: ResolveMode, ) -> IOResult { const SYMLINKS_TO_LOOK_FOR_LOOPS: i32 = 20; let original = original.as_ref(); let has_to_be_directory = (miss_mode == MissingHandling::Normal || miss_mode == MissingHandling::Existing) && { let path_str = original.to_string_lossy(); path_str.ends_with(MAIN_SEPARATOR) || path_str.ends_with('/') }; let original = if original.is_absolute() { original.to_path_buf() } else { let current_dir = env::current_dir()?; dunce::canonicalize(current_dir)?.join(original) }; let path = if res_mode == ResolveMode::Logical { normalize_path(&original) } else { original }; let mut parts: VecDeque = path.components().map(|part| part.into()).collect(); let mut result = PathBuf::new(); let mut followed_symlinks = 0; let mut visited_files = HashSet::new(); while let Some(part) = parts.pop_front() { match part { OwningComponent::Prefix(s) => { result.push(s); continue; } OwningComponent::RootDir | OwningComponent::Normal(..) => { result.push(part.as_os_str()); } OwningComponent::CurDir => {} OwningComponent::ParentDir => { result.pop(); } } if res_mode == ResolveMode::None { continue; } match resolve_symlink(&result) { Ok(Some(link_path)) => { for link_part in link_path.components().rev() { parts.push_front(link_part.into()); } if followed_symlinks < SYMLINKS_TO_LOOK_FOR_LOOPS { followed_symlinks += 1; } else { let file_info = FileInformation::from_path(result.parent().unwrap(), false).unwrap(); let mut path_to_follow = PathBuf::new(); for part in &parts { path_to_follow.push(part.as_os_str()); } if !visited_files.insert((file_info, path_to_follow)) { return Err(Error::new( ErrorKind::InvalidInput, "Too many levels of symbolic links", )); // TODO use ErrorKind::FilesystemLoop when stable } } result.pop(); } Err(e) => { if miss_mode == MissingHandling::Existing || (miss_mode == MissingHandling::Normal && !parts.is_empty()) { return Err(e); } } _ => {} } } // raise Not a directory if required match miss_mode { MissingHandling::Existing => { if has_to_be_directory { read_dir(&result)?; } } MissingHandling::Normal => { if result.exists() { if has_to_be_directory { read_dir(&result)?; } } else if let Some(parent) = result.parent() { read_dir(parent)?; } } MissingHandling::Missing => {} } Ok(result) } #[cfg(not(unix))] /// Display the permissions of a file pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let write = if metadata.permissions().readonly() { '-' } else { 'w' }; if display_file_type { let file_type = if metadata.is_symlink() { 'l' } else if metadata.is_dir() { 'd' } else { '-' }; format!("{file_type}r{write}xr{write}xr{write}x") } else { format!("r{write}xr{write}xr{write}x") } } #[cfg(unix)] /// Display the permissions of a file pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let mode: mode_t = metadata.mode() as mode_t; display_permissions_unix(mode, display_file_type) } /// Returns a character representation of the file type based on its mode. /// This function is specific to Unix-like systems. /// /// - `mode`: The mode of the file, typically obtained from file metadata. /// /// # Returns /// - 'd' for directories /// - 'c' for character devices /// - 'b' for block devices /// - '-' for regular files /// - 'p' for FIFOs (named pipes) /// - 'l' for symbolic links /// - 's' for sockets /// - '?' for any other unrecognized file types #[cfg(unix)] fn get_file_display(mode: mode_t) -> char { match mode & S_IFMT { S_IFDIR => 'd', S_IFCHR => 'c', S_IFBLK => 'b', S_IFREG => '-', S_IFIFO => 'p', S_IFLNK => 'l', S_IFSOCK => 's', // TODO: Other file types _ => '?', } } // The logic below is more readable written this way. #[allow(clippy::if_not_else)] #[allow(clippy::cognitive_complexity)] #[cfg(unix)] /// Display the permissions of a file on a unix like system pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String { let mut result; if display_file_type { result = String::with_capacity(10); result.push(get_file_display(mode)); } else { result = String::with_capacity(9); } result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); result.push(if has!(mode, S_ISUID as mode_t) { if has!(mode, S_IXUSR) { 's' } else { 'S' } } else if has!(mode, S_IXUSR) { 'x' } else { '-' }); result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); result.push(if has!(mode, S_ISGID as mode_t) { if has!(mode, S_IXGRP) { 's' } else { 'S' } } else if has!(mode, S_IXGRP) { 'x' } else { '-' }); result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); result.push(if has!(mode, S_ISVTX as mode_t) { if has!(mode, S_IXOTH) { 't' } else { 'T' } } else if has!(mode, S_IXOTH) { 'x' } else { '-' }); result } /// For some programs like install or mkdir, dir/. can be provided /// Special case to match GNU's behavior: /// install -d foo/. should work and just create foo/ /// std::fs::create_dir("foo/."); fails in pure Rust pub fn dir_strip_dot_for_creation(path: &Path) -> PathBuf { if path.to_string_lossy().ends_with("/.") { // Do a simple dance to strip the "/." Path::new(&path).components().collect::() } else { path.to_path_buf() } } /// Checks if `p1` and `p2` are the same file. /// If error happens when trying to get files' metadata, returns false pub fn paths_refer_to_same_file>(p1: P, p2: P, dereference: bool) -> bool { infos_refer_to_same_file( FileInformation::from_path(p1, dereference), FileInformation::from_path(p2, dereference), ) } /// Checks if `p1` and `p2` are the same file information. /// If error happens when trying to get files' metadata, returns false pub fn infos_refer_to_same_file( info1: IOResult, info2: IOResult, ) -> bool { if let Ok(info1) = info1 { if let Ok(info2) = info2 { return info1 == info2; } } false } /// Converts absolute `path` to be relative to absolute `to` path. pub fn make_path_relative_to, P2: AsRef>(path: P1, to: P2) -> PathBuf { let path = path.as_ref(); let to = to.as_ref(); let common_prefix_size = path .components() .zip(to.components()) .take_while(|(first, second)| first == second) .count(); let path_suffix = path .components() .skip(common_prefix_size) .map(|x| x.as_os_str()); let mut components: Vec<_> = to .components() .skip(common_prefix_size) .map(|_| Component::ParentDir.as_os_str()) .chain(path_suffix) .collect(); if components.is_empty() { components.push(Component::CurDir.as_os_str()); } components.iter().collect() } /// Checks if there is a symlink loop in the given path. /// /// A symlink loop is a chain of symlinks where the last symlink points back to one of the previous symlinks in the chain. /// /// # Arguments /// /// * `path` - A reference to a `Path` representing the starting path to check for symlink loops. /// /// # Returns /// /// * `bool` - Returns `true` if a symlink loop is detected, `false` otherwise. pub fn is_symlink_loop(path: &Path) -> bool { let mut visited_symlinks = HashSet::new(); let mut current_path = path.to_path_buf(); while let (Ok(metadata), Ok(link)) = ( current_path.symlink_metadata(), fs::read_link(¤t_path), ) { if !metadata.file_type().is_symlink() { return false; } if !visited_symlinks.insert(current_path.clone()) { return true; } current_path = link; } false } #[cfg(not(unix))] // Hard link comparison is not supported on non-Unix platforms pub fn are_hardlinks_to_same_file(_source: &Path, _target: &Path) -> bool { false } /// Checks if two paths are hard links to the same file. /// /// # Arguments /// /// * `source` - A reference to a `Path` representing the source path. /// * `target` - A reference to a `Path` representing the target path. /// /// # Returns /// /// * `bool` - Returns `true` if the paths are hard links to the same file, and `false` otherwise. #[cfg(unix)] pub fn are_hardlinks_to_same_file(source: &Path, target: &Path) -> bool { let (Ok(source_metadata), Ok(target_metadata)) = (fs::symlink_metadata(source), fs::symlink_metadata(target)) else { return false; }; source_metadata.ino() == target_metadata.ino() && source_metadata.dev() == target_metadata.dev() } #[cfg(not(unix))] pub fn are_hardlinks_or_one_way_symlink_to_same_file(_source: &Path, _target: &Path) -> bool { false } /// Checks if either two paths are hard links to the same file or if the source path is a symbolic link which when fully resolved points to target path /// /// # Arguments /// /// * `source` - A reference to a `Path` representing the source path. /// * `target` - A reference to a `Path` representing the target path. /// /// # Returns /// /// * `bool` - Returns `true` if either of above conditions are true, and `false` otherwise. #[cfg(unix)] pub fn are_hardlinks_or_one_way_symlink_to_same_file(source: &Path, target: &Path) -> bool { let (Ok(source_metadata), Ok(target_metadata)) = (fs::metadata(source), fs::symlink_metadata(target)) else { return false; }; source_metadata.ino() == target_metadata.ino() && source_metadata.dev() == target_metadata.dev() } /// Returns true if the passed `path` ends with a path terminator. /// /// This function examines the last character of the path to determine /// if it is a directory separator. It supports both Unix-style (`/`) /// and Windows-style (`\`) separators. /// /// # Arguments /// /// * `path` - A reference to the path to be checked. #[cfg(unix)] pub fn path_ends_with_terminator(path: &Path) -> bool { use std::os::unix::prelude::OsStrExt; path.as_os_str() .as_bytes() .last() .is_some_and(|&byte| byte == b'/' || byte == b'\\') } #[cfg(windows)] pub fn path_ends_with_terminator(path: &Path) -> bool { use std::os::windows::prelude::OsStrExt; path.as_os_str() .encode_wide() .last() .is_some_and(|wide| wide == b'/'.into() || wide == b'\\'.into()) } /// Checks if the standard input (stdin) is a directory. /// /// # Arguments /// /// * `stdin` - A reference to the standard input handle. /// /// # Returns /// /// * `bool` - Returns `true` if stdin is a directory, `false` otherwise. pub fn is_stdin_directory(stdin: &Stdin) -> bool { #[cfg(unix)] { use nix::sys::stat::fstat; let mode = fstat(stdin.as_raw_fd()).unwrap().st_mode as mode_t; has!(mode, S_IFDIR) } #[cfg(windows)] { use std::os::windows::io::AsRawHandle; let handle = stdin.as_raw_handle(); if let Ok(metadata) = fs::metadata(format!("{}", handle as usize)) { return metadata.is_dir(); } false } } pub mod sane_blksize { #[cfg(not(target_os = "windows"))] use std::os::unix::fs::MetadataExt; use std::{fs::metadata, path::Path}; pub const DEFAULT: u64 = 512; pub const MAX: u64 = (u32::MAX / 8 + 1) as u64; /// Provides sanity checked blksize value from the provided value. /// /// If the provided value is a invalid values a meaningful adaption /// of that value is done. pub fn sane_blksize(st_blksize: u64) -> u64 { match st_blksize { 0 => DEFAULT, 1..=MAX => st_blksize, _ => DEFAULT, } } /// Provides the blksize information from the provided metadata. /// /// If the metadata contain invalid values a meaningful adaption /// of that value is done. pub fn sane_blksize_from_metadata(_metadata: &std::fs::Metadata) -> u64 { #[cfg(not(target_os = "windows"))] { sane_blksize(_metadata.blksize()) } #[cfg(target_os = "windows")] { DEFAULT } } /// Provides the blksize information from given file path's filesystem. /// /// If the metadata can't be fetched or contain invalid values a /// meaningful adaption of that value is done. pub fn sane_blksize_from_path(path: &Path) -> u64 { match metadata(path) { Ok(metadata) => sane_blksize_from_metadata(&metadata), Err(_) => DEFAULT, } } } /// Extracts the filename component from the given `file` path and returns it as an `Option<&str>`. /// /// If the `file` path contains a filename, this function returns `Some(filename)` where `filename` is /// the extracted filename as a string slice (`&str`). If the `file` path does not have a filename /// component or if the filename is not valid UTF-8, it returns `None`. /// /// # Arguments /// /// * `file`: A reference to a `Path` representing the file path from which to extract the filename. /// /// # Returns /// /// * `Some(filename)`: If a valid filename exists in the `file` path, where `filename` is the /// extracted filename as a string slice (`&str`). /// * `None`: If the `file` path does not contain a valid filename or if the filename is not valid UTF-8. pub fn get_filename(file: &Path) -> Option<&str> { file.file_name().and_then(|filename| filename.to_str()) } #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. use super::*; #[cfg(unix)] use std::io::Write; #[cfg(unix)] use std::os::unix; #[cfg(unix)] use tempfile::{tempdir, NamedTempFile}; struct NormalizePathTestCase<'a> { path: &'a str, test: &'a str, } const NORMALIZE_PATH_TESTS: [NormalizePathTestCase; 8] = [ NormalizePathTestCase { path: "./foo/bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "bar/../foo/bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "foo///bar.txt", test: "foo/bar.txt", }, NormalizePathTestCase { path: "foo///bar", test: "foo/bar", }, NormalizePathTestCase { path: "foo//./bar", test: "foo/bar", }, NormalizePathTestCase { path: "/foo//./bar", test: "/foo/bar", }, NormalizePathTestCase { path: r"C:/you/later/", test: "C:/you/later", }, NormalizePathTestCase { path: "\\networkShare/a//foo//./bar", test: "\\networkShare/a/foo/bar", }, ]; #[test] fn test_normalize_path() { for test in &NORMALIZE_PATH_TESTS { let path = Path::new(test.path); let normalized = normalize_path(path); assert_eq!( test.test .replace('/', std::path::MAIN_SEPARATOR.to_string().as_str()), normalized.to_str().expect("Path is not valid utf-8!") ); } } #[cfg(unix)] #[test] fn test_display_permissions() { // spell-checker:ignore (perms) brwsr drwxr rwxr assert_eq!( "drwxr-xr-x", display_permissions_unix(S_IFDIR | 0o755, true) ); assert_eq!( "rwxr-xr-x", display_permissions_unix(S_IFDIR | 0o755, false) ); assert_eq!( "-rw-r--r--", display_permissions_unix(S_IFREG | 0o644, true) ); assert_eq!( "srw-r-----", display_permissions_unix(S_IFSOCK | 0o640, true) ); assert_eq!( "lrw-r-xr-x", display_permissions_unix(S_IFLNK | 0o655, true) ); assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true)); assert_eq!( "brwSr-xr-x", display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true) ); assert_eq!( "brwsr-xr-x", display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true) ); assert_eq!( "prw---sr--", display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true) ); assert_eq!( "prw---Sr--", display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true) ); assert_eq!( "c---r-xr-t", display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true) ); assert_eq!( "c---r-xr-T", display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true) ); } #[cfg(unix)] #[test] fn test_is_symlink_loop_no_loop() { let temp_dir = tempdir().unwrap(); let file_path = temp_dir.path().join("file.txt"); let symlink_path = temp_dir.path().join("symlink"); fs::write(&file_path, "test content").unwrap(); unix::fs::symlink(&file_path, &symlink_path).unwrap(); assert!(!is_symlink_loop(&symlink_path)); } #[cfg(unix)] #[test] fn test_is_symlink_loop_direct_loop() { let temp_dir = tempdir().unwrap(); let symlink_path = temp_dir.path().join("loop"); unix::fs::symlink(&symlink_path, &symlink_path).unwrap(); assert!(is_symlink_loop(&symlink_path)); } #[cfg(unix)] #[test] fn test_is_symlink_loop_indirect_loop() { let temp_dir = tempdir().unwrap(); let symlink1_path = temp_dir.path().join("symlink1"); let symlink2_path = temp_dir.path().join("symlink2"); unix::fs::symlink(&symlink1_path, &symlink2_path).unwrap(); unix::fs::symlink(&symlink2_path, &symlink1_path).unwrap(); assert!(is_symlink_loop(&symlink1_path)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_same_file() { let mut temp_file = NamedTempFile::new().unwrap(); writeln!(temp_file, "Test content").unwrap(); let path1 = temp_file.path(); let path2 = temp_file.path(); assert!(are_hardlinks_to_same_file(path1, path2)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_different_files() { let mut temp_file1 = NamedTempFile::new().unwrap(); writeln!(temp_file1, "Test content 1").unwrap(); let mut temp_file2 = NamedTempFile::new().unwrap(); writeln!(temp_file2, "Test content 2").unwrap(); let path1 = temp_file1.path(); let path2 = temp_file2.path(); assert!(!are_hardlinks_to_same_file(path1, path2)); } #[cfg(unix)] #[test] fn test_are_hardlinks_to_same_file_hard_link() { let mut temp_file = NamedTempFile::new().unwrap(); writeln!(temp_file, "Test content").unwrap(); let path1 = temp_file.path(); let path2 = temp_file.path().with_extension("hardlink"); fs::hard_link(path1, &path2).unwrap(); assert!(are_hardlinks_to_same_file(path1, &path2)); } #[cfg(unix)] #[test] fn test_get_file_display() { assert_eq!(get_file_display(S_IFDIR | 0o755), 'd'); assert_eq!(get_file_display(S_IFCHR | 0o644), 'c'); assert_eq!(get_file_display(S_IFBLK | 0o600), 'b'); assert_eq!(get_file_display(S_IFREG | 0o777), '-'); assert_eq!(get_file_display(S_IFIFO | 0o666), 'p'); assert_eq!(get_file_display(S_IFLNK | 0o777), 'l'); assert_eq!(get_file_display(S_IFSOCK | 0o600), 's'); assert_eq!(get_file_display(0o777), '?'); } #[test] fn test_path_ends_with_terminator() { // Path ends with a forward slash assert!(path_ends_with_terminator(Path::new("/some/path/"))); // Path ends with a backslash assert!(path_ends_with_terminator(Path::new("C:\\some\\path\\"))); // Path does not end with a terminator assert!(!path_ends_with_terminator(Path::new("/some/path"))); assert!(!path_ends_with_terminator(Path::new("C:\\some\\path"))); // Empty path assert!(!path_ends_with_terminator(Path::new(""))); // Root path assert!(path_ends_with_terminator(Path::new("/"))); assert!(path_ends_with_terminator(Path::new("C:\\"))); } #[test] fn test_sane_blksize() { assert_eq!(512, sane_blksize::sane_blksize(0)); assert_eq!(512, sane_blksize::sane_blksize(512)); assert_eq!(4096, sane_blksize::sane_blksize(4096)); assert_eq!(0x2000_0000, sane_blksize::sane_blksize(0x2000_0000)); assert_eq!(512, sane_blksize::sane_blksize(0x2000_0001)); } #[test] fn test_get_file_name() { let file_path = PathBuf::from("~/foo.txt"); assert!(matches!(get_filename(&file_path), Some("foo.txt"))); } } uucore-0.0.30/src/lib/features/fsext.rs000064400000000000000000001070451046102023000160670ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to manage file systems // spell-checker:ignore DATETIME getmntinfo subsecond (fs) cifs smbfs #[cfg(any(target_os = "linux", target_os = "android"))] const LINUX_MTAB: &str = "/etc/mtab"; #[cfg(any(target_os = "linux", target_os = "android"))] const LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; #[cfg(all(unix, not(any(target_os = "aix", target_os = "redox"))))] static MOUNT_OPT_BIND: &str = "bind"; #[cfg(windows)] const MAX_PATH: usize = 266; #[cfg(windows)] static EXIT_ERR: i32 = 1; #[cfg(any( windows, target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] #[cfg(windows)] use crate::show_warning; #[cfg(windows)] use std::ffi::OsStr; #[cfg(windows)] use std::os::windows::ffi::OsStrExt; #[cfg(windows)] use windows_sys::Win32::{ Foundation::{ERROR_NO_MORE_FILES, INVALID_HANDLE_VALUE}, Storage::FileSystem::{ FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDiskFreeSpaceW, GetDriveTypeW, GetVolumeInformationW, GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, }, System::WindowsProgramming::DRIVE_REMOTE, }; #[cfg(windows)] #[allow(non_snake_case)] fn LPWSTR2String(buf: &[u16]) -> String { let len = buf.iter().position(|&n| n == 0).unwrap(); String::from_utf16(&buf[..len]).unwrap() } #[cfg(windows)] fn to_nul_terminated_wide_string(s: impl AsRef) -> Vec { s.as_ref() .encode_wide() .chain(Some(0)) .collect::>() } #[cfg(unix)] use libc::{ mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, }; use std::borrow::Cow; #[cfg(unix)] use std::ffi::CStr; #[cfg(unix)] use std::ffi::CString; use std::io::Error as IOError; #[cfg(unix)] use std::mem; #[cfg(windows)] use std::path::Path; use std::time::UNIX_EPOCH; #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd", target_os = "openbsd" ))] pub use libc::statfs as StatFs; #[cfg(any( target_os = "aix", target_os = "netbsd", target_os = "dragonfly", target_os = "illumos", target_os = "solaris", target_os = "redox" ))] pub use libc::statvfs as StatFs; #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd", target_os = "openbsd", ))] pub use libc::statfs as statfs_fn; #[cfg(any( target_os = "aix", target_os = "netbsd", target_os = "illumos", target_os = "solaris", target_os = "dragonfly", target_os = "redox" ))] pub use libc::statvfs as statfs_fn; pub trait BirthTime { fn birth(&self) -> Option<(u64, u32)>; } use std::fs::Metadata; impl BirthTime for Metadata { fn birth(&self) -> Option<(u64, u32)> { self.created() .ok() .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) .map(|e| (e.as_secs(), e.subsec_nanos())) } } #[derive(Debug, Clone)] pub struct MountInfo { /// Stores `volume_name` in windows platform and `dev_id` in unix platform pub dev_id: String, pub dev_name: String, pub fs_type: String, pub mount_root: String, pub mount_dir: String, /// We only care whether this field contains "bind" pub mount_option: String, pub remote: bool, pub dummy: bool, } #[cfg(any(target_os = "linux", target_os = "android"))] fn replace_special_chars(s: String) -> String { // Replace // // * ASCII space with a regular space character, // * \011 ASCII horizontal tab with a tab character, // * ASCII backslash with an actual backslash character. // s.replace(r#"\040"#, " ") .replace(r#"\011"#, " ") .replace(r#"\134"#, r#"\"#) } impl MountInfo { #[cfg(any(target_os = "linux", target_os = "android"))] fn new(file_name: &str, raw: &[&str]) -> Option { let dev_name; let fs_type; let mount_root; let mount_dir; let mount_option; match file_name { // spell-checker:ignore (word) noatime // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue // "man proc" for more details LINUX_MOUNTINFO => { const FIELDS_OFFSET: usize = 6; let after_fields = raw[FIELDS_OFFSET..].iter().position(|c| *c == "-").unwrap() + FIELDS_OFFSET + 1; dev_name = raw[after_fields + 1].to_string(); fs_type = raw[after_fields].to_string(); mount_root = raw[3].to_string(); mount_dir = replace_special_chars(raw[4].to_string()); mount_option = raw[5].to_string(); } LINUX_MTAB => { dev_name = raw[0].to_string(); fs_type = raw[2].to_string(); mount_root = String::new(); mount_dir = replace_special_chars(raw[1].to_string()); mount_option = raw[3].to_string(); } _ => return None, }; let dev_id = mount_dev_id(&mount_dir); let dummy = is_dummy_filesystem(&fs_type, &mount_option); let remote = is_remote_filesystem(&dev_name, &fs_type); Some(Self { dev_id, dev_name, fs_type, mount_root, mount_dir, mount_option, remote, dummy, }) } #[cfg(windows)] fn new(mut volume_name: String) -> Option { let mut dev_name_buf = [0u16; MAX_PATH]; volume_name.pop(); unsafe { QueryDosDeviceW( OsStr::new(&volume_name) .encode_wide() .chain(Some(0)) .skip(4) .collect::>() .as_ptr(), dev_name_buf.as_mut_ptr(), dev_name_buf.len() as u32, ) }; volume_name.push('\\'); let dev_name = LPWSTR2String(&dev_name_buf); let mut mount_root_buf = [0u16; MAX_PATH]; let success = unsafe { let volume_name = to_nul_terminated_wide_string(&volume_name); GetVolumePathNamesForVolumeNameW( volume_name.as_ptr(), mount_root_buf.as_mut_ptr(), mount_root_buf.len() as u32, ptr::null_mut(), ) }; if 0 == success { // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` return None; } let mount_root = LPWSTR2String(&mount_root_buf); let mut fs_type_buf = [0u16; MAX_PATH]; let success = unsafe { let mount_root = to_nul_terminated_wide_string(&mount_root); GetVolumeInformationW( mount_root.as_ptr(), ptr::null_mut(), 0, ptr::null_mut(), ptr::null_mut(), ptr::null_mut(), fs_type_buf.as_mut_ptr(), fs_type_buf.len() as u32, ) }; let fs_type = if 0 == success { None } else { Some(LPWSTR2String(&fs_type_buf)) }; let remote = DRIVE_REMOTE == unsafe { let mount_root = to_nul_terminated_wide_string(&mount_root); GetDriveTypeW(mount_root.as_ptr()) }; Some(Self { dev_id: volume_name, dev_name, fs_type: fs_type.unwrap_or_default(), mount_root, mount_dir: String::new(), mount_option: String::new(), remote, dummy: false, }) } } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd", ))] impl From for MountInfo { fn from(statfs: StatFs) -> Self { let dev_name = unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_mntfromname[0]) .to_string_lossy() .into_owned() }; let fs_type = unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_fstypename[0]) .to_string_lossy() .into_owned() }; let mount_dir = unsafe { // spell-checker:disable-next-line CStr::from_ptr(&statfs.f_mntonname[0]) .to_string_lossy() .into_owned() }; let dev_id = mount_dev_id(&mount_dir); let dummy = is_dummy_filesystem(&fs_type, ""); let remote = is_remote_filesystem(&dev_name, &fs_type); Self { dev_id, dev_name, fs_type, mount_dir, mount_root: String::new(), mount_option: String::new(), remote, dummy, } } } #[cfg(all(unix, not(any(target_os = "aix", target_os = "redox"))))] fn is_dummy_filesystem(fs_type: &str, mount_option: &str) -> bool { // spell-checker:disable match fs_type { "autofs" | "proc" | "subfs" // for Linux 2.6/3.x | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" // FreeBSD, Linux 2.4 | "devfs" // for NetBSD 3.0 | "kernfs" // for Irix 6.5 | "ignore" => true, _ => fs_type == "none" && !mount_option.contains(MOUNT_OPT_BIND) } // spell-checker:enable } #[cfg(all(unix, not(any(target_os = "aix", target_os = "redox"))))] fn is_remote_filesystem(dev_name: &str, fs_type: &str) -> bool { dev_name.find(':').is_some() || (dev_name.starts_with("//") && fs_type == "smbfs" || fs_type == "cifs") || dev_name == "-hosts" } #[cfg(all(unix, not(any(target_os = "aix", target_os = "redox"))))] fn mount_dev_id(mount_dir: &str) -> String { use std::os::unix::fs::MetadataExt; if let Ok(stat) = std::fs::metadata(mount_dir) { // Why do we cast this to i32? (stat.dev() as i32).to_string() } else { String::new() } } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] use libc::c_int; #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] extern "C" { #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] #[link_name = "getmntinfo$INODE64"] fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; #[cfg(any( target_os = "netbsd", target_os = "openbsd", all(target_vendor = "apple", target_arch = "aarch64") ))] #[link_name = "getmntinfo"] fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; // Rust on FreeBSD uses 11.x ABI for filesystem metadata syscalls. // Call the right version of the symbol for getmntinfo() result to // match libc StatFS layout. #[cfg(target_os = "freebsd")] #[link_name = "getmntinfo@FBSD_1.0"] fn get_mount_info(mount_buffer_p: *mut *mut StatFs, flags: c_int) -> c_int; } use crate::error::UResult; #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd", target_os = "windows" ))] use crate::error::USimpleError; #[cfg(any(target_os = "linux", target_os = "android"))] use std::fs::File; #[cfg(any(target_os = "linux", target_os = "android"))] use std::io::{BufRead, BufReader}; #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "windows", target_os = "netbsd", target_os = "openbsd" ))] use std::ptr; #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd" ))] use std::slice; /// Read file system list. pub fn read_fs_list() -> UResult> { #[cfg(any(target_os = "linux", target_os = "android"))] { let (file_name, f) = File::open(LINUX_MOUNTINFO) .map(|f| (LINUX_MOUNTINFO, f)) .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f)))?; let reader = BufReader::new(f); Ok(reader .lines() .map_while(Result::ok) .filter_map(|line| { let raw_data = line.split_whitespace().collect::>(); MountInfo::new(file_name, &raw_data) }) .collect::>()) } #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "netbsd", target_os = "openbsd" ))] { let mut mount_buffer_ptr: *mut StatFs = ptr::null_mut(); let len = unsafe { get_mount_info(&mut mount_buffer_ptr, 1_i32) }; if len < 0 { return Err(USimpleError::new(1, "get_mount_info() failed")); } let mounts = unsafe { slice::from_raw_parts(mount_buffer_ptr, len as usize) }; Ok(mounts .iter() .map(|m| MountInfo::from(*m)) .collect::>()) } #[cfg(windows)] { let mut volume_name_buf = [0u16; MAX_PATH]; // As recommended in the MS documentation, retrieve the first volume before the others let find_handle = unsafe { FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as u32) }; if INVALID_HANDLE_VALUE == find_handle { let os_err = IOError::last_os_error(); let msg = format!("FindFirstVolumeW failed: {}", os_err); return Err(USimpleError::new(EXIT_ERR, msg)); } let mut mounts = Vec::::new(); loop { let volume_name = LPWSTR2String(&volume_name_buf); if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { show_warning!("A bad path was skipped: {}", volume_name); continue; } if let Some(m) = MountInfo::new(volume_name) { mounts.push(m); } if 0 == unsafe { FindNextVolumeW( find_handle, volume_name_buf.as_mut_ptr(), volume_name_buf.len() as u32, ) } { let err = IOError::last_os_error(); if err.raw_os_error() != Some(ERROR_NO_MORE_FILES as i32) { let msg = format!("FindNextVolumeW failed: {err}"); return Err(USimpleError::new(EXIT_ERR, msg)); } break; } } unsafe { FindVolumeClose(find_handle); } Ok(mounts) } #[cfg(any( target_os = "aix", target_os = "redox", target_os = "illumos", target_os = "solaris" ))] { // No method to read mounts, yet Ok(Vec::new()) } } #[derive(Debug, Clone)] pub struct FsUsage { pub blocksize: u64, pub blocks: u64, pub bfree: u64, pub bavail: u64, pub bavail_top_bit_set: bool, pub files: u64, pub ffree: u64, } impl FsUsage { #[cfg(unix)] pub fn new(statvfs: StatFs) -> Self { { #[cfg(all( not(any(target_os = "freebsd", target_os = "openbsd")), target_pointer_width = "64" ))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks, bfree: statvfs.f_bfree, bavail: statvfs.f_bavail, bavail_top_bit_set: ((statvfs.f_bavail) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files, ffree: statvfs.f_ffree, }; #[cfg(all( not(any(target_os = "freebsd", target_os = "openbsd")), not(target_pointer_width = "64") ))] return Self { blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks.into(), bfree: statvfs.f_bfree.into(), bavail: statvfs.f_bavail.into(), bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files.into(), ffree: statvfs.f_ffree.into(), }; #[cfg(target_os = "freebsd")] return Self { blocksize: statvfs.f_bsize, // or `statvfs.f_frsize` ? blocks: statvfs.f_blocks, bfree: statvfs.f_bfree, bavail: statvfs.f_bavail.try_into().unwrap(), bavail_top_bit_set: ((std::convert::TryInto::::try_into(statvfs.f_bavail) .unwrap()) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files, ffree: statvfs.f_ffree.try_into().unwrap(), }; #[cfg(target_os = "openbsd")] return Self { blocksize: statvfs.f_bsize.into(), blocks: statvfs.f_blocks, bfree: statvfs.f_bfree, bavail: statvfs.f_bavail.try_into().unwrap(), bavail_top_bit_set: ((std::convert::TryInto::::try_into(statvfs.f_bavail) .unwrap()) & (1u64.rotate_right(1))) != 0, files: statvfs.f_files, ffree: statvfs.f_ffree, }; } } #[cfg(windows)] pub fn new(path: &Path) -> UResult { let mut root_path = [0u16; MAX_PATH]; let success = unsafe { let path = to_nul_terminated_wide_string(path); GetVolumePathNamesForVolumeNameW( //path_utf8.as_ptr(), path.as_ptr(), root_path.as_mut_ptr(), root_path.len() as u32, ptr::null_mut(), ) }; if 0 == success { let msg = format!( "GetVolumePathNamesForVolumeNameW failed: {}", IOError::last_os_error() ); return Err(USimpleError::new(EXIT_ERR, msg)); } let mut sectors_per_cluster = 0; let mut bytes_per_sector = 0; let mut number_of_free_clusters = 0; let mut total_number_of_clusters = 0; unsafe { let path = to_nul_terminated_wide_string(path); GetDiskFreeSpaceW( path.as_ptr(), &mut sectors_per_cluster, &mut bytes_per_sector, &mut number_of_free_clusters, &mut total_number_of_clusters, ); } let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; Ok(Self { // f_bsize File system block size. blocksize: bytes_per_cluster, // f_blocks - Total number of blocks on the file system, in units of f_frsize. // frsize = Fundamental file system block size (fragment size). blocks: total_number_of_clusters as u64, // Total number of free blocks. bfree: number_of_free_clusters as u64, // Total number of free blocks available to non-privileged processes. bavail: 0, bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, // Total number of file nodes (inodes) on the file system. files: 0, // Not available on windows // Total number of free file nodes (inodes). ffree: 0, // Meaningless on Windows }) } } #[cfg(unix)] pub trait FsMeta { fn fs_type(&self) -> i64; fn io_size(&self) -> u64; fn block_size(&self) -> i64; fn total_blocks(&self) -> u64; fn free_blocks(&self) -> u64; fn avail_blocks(&self) -> u64; fn total_file_nodes(&self) -> u64; fn free_file_nodes(&self) -> u64; fn fsid(&self) -> u64; fn namelen(&self) -> u64; } #[cfg(unix)] impl FsMeta for StatFs { fn block_size(&self) -> i64 { #[cfg(all( not(target_env = "musl"), not(target_vendor = "apple"), not(target_os = "aix"), not(target_os = "android"), not(target_os = "freebsd"), not(target_os = "netbsd"), not(target_os = "openbsd"), not(target_os = "illumos"), not(target_os = "solaris"), not(target_os = "redox"), not(target_arch = "s390x"), target_pointer_width = "64" ))] return self.f_bsize; #[cfg(all( not(target_env = "musl"), not(target_os = "freebsd"), not(target_os = "netbsd"), not(target_os = "redox"), any( target_arch = "s390x", target_vendor = "apple", all(target_os = "android", target_pointer_width = "32"), target_os = "openbsd", not(target_pointer_width = "64") ) ))] return self.f_bsize.into(); #[cfg(any( target_env = "musl", target_os = "aix", target_os = "freebsd", target_os = "netbsd", target_os = "illumos", target_os = "solaris", target_os = "redox", all(target_os = "android", target_pointer_width = "64"), ))] return self.f_bsize.try_into().unwrap(); } fn total_blocks(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_blocks; #[cfg(not(target_pointer_width = "64"))] return self.f_blocks.into(); } fn free_blocks(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_bfree; #[cfg(not(target_pointer_width = "64"))] return self.f_bfree.into(); } fn avail_blocks(&self) -> u64 { #[cfg(all( not(target_os = "freebsd"), not(target_os = "openbsd"), target_pointer_width = "64" ))] return self.f_bavail; #[cfg(all( not(target_os = "freebsd"), not(target_os = "openbsd"), not(target_pointer_width = "64") ))] return self.f_bavail.into(); #[cfg(any(target_os = "freebsd", target_os = "openbsd"))] return self.f_bavail.try_into().unwrap(); } fn total_file_nodes(&self) -> u64 { #[cfg(target_pointer_width = "64")] return self.f_files; #[cfg(not(target_pointer_width = "64"))] return self.f_files.into(); } fn free_file_nodes(&self) -> u64 { #[cfg(all(not(target_os = "freebsd"), target_pointer_width = "64"))] return self.f_ffree; #[cfg(all(not(target_os = "freebsd"), not(target_pointer_width = "64")))] return self.f_ffree.into(); #[cfg(target_os = "freebsd")] return self.f_ffree.try_into().unwrap(); } #[cfg(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd" ))] fn fs_type(&self) -> i64 { #[cfg(all( not(target_env = "musl"), not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "freebsd"), not(target_arch = "s390x"), target_pointer_width = "64" ))] return self.f_type; #[cfg(all( not(target_env = "musl"), any( target_vendor = "apple", all(target_os = "android", target_pointer_width = "32"), target_os = "freebsd", target_arch = "s390x", not(target_pointer_width = "64") ) ))] return self.f_type.into(); #[cfg(any( target_env = "musl", all(target_os = "android", target_pointer_width = "64"), ))] return self.f_type.try_into().unwrap(); } #[cfg(not(any( target_os = "linux", target_os = "android", target_vendor = "apple", target_os = "freebsd" )))] fn fs_type(&self) -> i64 { // FIXME: statvfs doesn't have an equivalent, so we need to do something else unimplemented!() } #[cfg(any(target_os = "linux", target_os = "android"))] fn io_size(&self) -> u64 { self.f_frsize as u64 } #[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "netbsd"))] fn io_size(&self) -> u64 { #[cfg(target_os = "freebsd")] return self.f_iosize; #[cfg(not(target_os = "freebsd"))] return self.f_iosize as u64; } // XXX: dunno if this is right #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "netbsd" )))] fn io_size(&self) -> u64 { self.f_bsize as u64 } // Linux, SunOS, HP-UX, 4.4BSD, FreeBSD have a system call statfs() that returns // a struct statfs, containing a fsid_t f_fsid, where fsid_t is defined // as struct { int val[2]; } // // Solaris, Irix and POSIX have a system call statvfs(2) that returns a // struct statvfs, containing an unsigned long f_fsid #[cfg(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "openbsd" ))] fn fsid(&self) -> u64 { let f_fsid: &[u32; 2] = unsafe { &*(&self.f_fsid as *const nix::sys::statfs::fsid_t as *const [u32; 2]) }; ((u64::from(f_fsid[0])) << 32) | u64::from(f_fsid[1]) } #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "openbsd" )))] fn fsid(&self) -> u64 { self.f_fsid as u64 } #[cfg(any(target_os = "linux", target_os = "android"))] fn namelen(&self) -> u64 { self.f_namelen as u64 } #[cfg(target_vendor = "apple")] fn namelen(&self) -> u64 { 1024 } #[cfg(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] fn namelen(&self) -> u64 { self.f_namemax as u64 // spell-checker:disable-line } // XXX: should everything just use statvfs? #[cfg(not(any( target_vendor = "apple", target_os = "freebsd", target_os = "linux", target_os = "android", target_os = "netbsd", target_os = "openbsd" )))] fn namelen(&self) -> u64 { self.f_namemax as u64 // spell-checker:disable-line } } #[cfg(unix)] pub fn statfs

(path: P) -> Result where P: Into>, { match CString::new(path) { Ok(p) => { let mut buffer: StatFs = unsafe { mem::zeroed() }; unsafe { match statfs_fn(p.as_ptr(), &mut buffer) { 0 => Ok(buffer), _ => { let errno = IOError::last_os_error().raw_os_error().unwrap_or(0); Err(CStr::from_ptr(strerror(errno)) .to_str() .map_err(|_| "Error message contains invalid UTF-8".to_owned())? .to_owned()) } } } } Err(e) => Err(e.to_string()), } } #[cfg(unix)] pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { match mode & S_IFMT { S_IFREG => { if size == 0 { "regular empty file" } else { "regular file" } } S_IFDIR => "directory", S_IFLNK => "symbolic link", S_IFCHR => "character special file", S_IFBLK => "block special file", S_IFIFO => "fifo", S_IFSOCK => "socket", // TODO: Other file types // See coreutils/gnulib/lib/file-type.c // spell-checker:disable-line _ => "weird file", } } pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { // spell-checker:disable match fstype { 0x6163_6673 => "acfs".into(), 0xADF5 => "adfs".into(), 0xADFF => "affs".into(), 0x5346_414F => "afs".into(), 0x0904_1934 => "anon-inode FS".into(), 0x6175_6673 => "aufs".into(), 0x0187 => "autofs".into(), 0x4246_5331 => "befs".into(), 0x6264_6576 => "bdevfs".into(), 0xCA45_1A4E => "bcachefs".into(), 0x1BAD_FACE => "bfs".into(), 0xCAFE_4A11 => "bpf_fs".into(), 0x4249_4E4D => "binfmt_misc".into(), 0x9123_683E => "btrfs".into(), 0x7372_7279 => "btrfs_test".into(), 0x00C3_6400 => "ceph".into(), 0x0027_E0EB => "cgroupfs".into(), 0xFF53_4D42 => "cifs".into(), 0x7375_7245 => "coda".into(), 0x012F_F7B7 => "coh".into(), 0x6265_6570 => "configfs".into(), 0x28CD_3D45 => "cramfs".into(), 0x453D_CD28 => "cramfs-wend".into(), 0x6462_6720 => "debugfs".into(), 0x1373 => "devfs".into(), 0x1CD1 => "devpts".into(), 0xF15F => "ecryptfs".into(), 0xDE5E_81E4 => "efivarfs".into(), 0x0041_4A53 => "efs".into(), 0x5DF5 => "exofs".into(), 0x137D => "ext".into(), 0xEF53 => "ext2/ext3".into(), 0xEF51 => "ext2".into(), 0xF2F5_2010 => "f2fs".into(), 0x4006 => "fat".into(), 0x1983_0326 => "fhgfs".into(), 0x6573_5546 => "fuseblk".into(), 0x6573_5543 => "fusectl".into(), 0x0BAD_1DEA => "futexfs".into(), 0x0116_1970 => "gfs/gfs2".into(), 0x4750_4653 => "gpfs".into(), 0x4244 => "hfs".into(), 0x482B => "hfs+".into(), 0x4858 => "hfsx".into(), 0x00C0_FFEE => "hostfs".into(), 0xF995_E849 => "hpfs".into(), 0x9584_58F6 => "hugetlbfs".into(), 0x1130_7854 => "inodefs".into(), 0x0131_11A8 => "ibrix".into(), 0x2BAD_1DEA => "inotifyfs".into(), 0x9660 => "isofs".into(), 0x4004 => "isofs".into(), 0x4000 => "isofs".into(), 0x07C0 => "jffs".into(), 0x72B6 => "jffs2".into(), 0x3153_464A => "jfs".into(), 0x6B41_4653 => "k-afs".into(), 0xC97E_8168 => "logfs".into(), 0x0BD0_0BD0 => "lustre".into(), 0x5346_314D => "m1fs".into(), 0x137F => "minix".into(), 0x138F => "minix (30 char.)".into(), 0x2468 => "minix v2".into(), 0x2478 => "minix v2 (30 char.)".into(), 0x4D5A => "minix3".into(), 0x1980_0202 => "mqueue".into(), 0x4D44 => "msdos".into(), 0x564C => "novell".into(), 0x6969 => "nfs".into(), 0x6E66_7364 => "nfsd".into(), 0x3434 => "nilfs".into(), 0x6E73_6673 => "nsfs".into(), 0x5346_544E => "ntfs".into(), 0x9FA1 => "openprom".into(), 0x7461_636F => "ocfs2".into(), 0x794C_7630 => "overlayfs".into(), 0xAAD7_AAEA => "panfs".into(), 0x5049_5045 => "pipefs".into(), 0x7C7C_6673 => "prl_fs".into(), 0x9FA0 => "proc".into(), 0x6165_676C => "pstorefs".into(), 0x002F => "qnx4".into(), 0x6819_1122 => "qnx6".into(), 0x8584_58F6 => "ramfs".into(), 0x5265_4973 => "reiserfs".into(), 0x7275 => "romfs".into(), 0x6759_6969 => "rpc_pipefs".into(), 0x7363_6673 => "securityfs".into(), 0xF97C_FF8C => "selinux".into(), 0x4341_5D53 => "smackfs".into(), 0x517B => "smb".into(), 0xFE53_4D42 => "smb2".into(), 0xBEEF_DEAD => "snfs".into(), 0x534F_434B => "sockfs".into(), 0x7371_7368 => "squashfs".into(), 0x6265_6572 => "sysfs".into(), 0x012F_F7B6 => "sysv2".into(), 0x012F_F7B5 => "sysv4".into(), 0x0102_1994 => "tmpfs".into(), 0x7472_6163 => "tracefs".into(), 0x2405_1905 => "ubifs".into(), 0x1501_3346 => "udf".into(), 0x0001_1954 => "ufs".into(), 0x5419_0100 => "ufs".into(), 0x9FA2 => "usbdevfs".into(), 0x0102_1997 => "v9fs".into(), 0xBACB_ACBC => "vmhgfs".into(), 0xA501_FCF5 => "vxfs".into(), 0x565A_4653 => "vzfs".into(), 0x5346_4846 => "wslfs".into(), 0xABBA_1974 => "xenfs".into(), 0x012F_F7B4 => "xenix".into(), 0x5846_5342 => "xfs".into(), 0x012F_D16D => "xia".into(), 0x2FC1_2FC1 => "zfs".into(), 0xDE => "zfs".into(), other => format!("UNKNOWN ({other:#x})").into(), } // spell-checker:enable } #[cfg(test)] mod tests { use super::*; #[test] #[cfg(unix)] fn test_file_type() { assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); assert_eq!("weird file", pretty_filetype(0, 0)); } #[test] fn test_fs_type() { // spell-checker:disable assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); assert_eq!("tmpfs", pretty_fstype(0x0102_1994)); assert_eq!("nfs", pretty_fstype(0x6969)); assert_eq!("btrfs", pretty_fstype(0x9123_683e)); assert_eq!("xfs", pretty_fstype(0x5846_5342)); assert_eq!("zfs", pretty_fstype(0x2FC1_2FC1)); assert_eq!("ntfs", pretty_fstype(0x5346_544e)); assert_eq!("fat", pretty_fstype(0x4006)); assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); // spell-checker:enable } #[test] #[cfg(any(target_os = "linux", target_os = "android"))] fn test_mountinfo() { // spell-checker:ignore (word) relatime let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.mount_root, "/"); assert_eq!(info.mount_dir, "/mnt"); assert_eq!(info.mount_option, "rw,relatime"); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); // Test parsing with different amounts of optional fields. let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime master:1 - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); let info = MountInfo::new( LINUX_MOUNTINFO, &"106 109 253:6 / /mnt rw,relatime master:1 shared:2 - xfs /dev/fs0 rw" .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.fs_type, "xfs"); assert_eq!(info.dev_name, "/dev/fs0"); } #[test] #[cfg(any(target_os = "linux", target_os = "android"))] fn test_mountinfo_dir_special_chars() { let info = MountInfo::new( LINUX_MOUNTINFO, &r#"317 61 7:0 / /mnt/f\134\040\011oo rw,relatime shared:641 - ext4 /dev/loop0 rw"# .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.mount_dir, r#"/mnt/f\ oo"#); let info = MountInfo::new( LINUX_MTAB, &r#"/dev/loop0 /mnt/f\134\040\011oo ext4 rw,relatime 0 0"# .split_ascii_whitespace() .collect::>(), ) .unwrap(); assert_eq!(info.mount_dir, r#"/mnt/f\ oo"#); } } uucore-0.0.30/src/lib/features/fsxattr.rs000064400000000000000000000204611046102023000164250ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore getxattr posix_acl_default //! Set of functions to manage xattr on files and dirs use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; /// Copies extended attributes (xattrs) from one file or directory to another. /// /// # Arguments /// /// * `source` - A reference to the source path. /// * `dest` - A reference to the destination path. /// /// # Returns /// /// A result indicating success or failure. pub fn copy_xattrs>(source: P, dest: P) -> std::io::Result<()> { for attr_name in xattr::list(&source)? { if let Some(value) = xattr::get(&source, &attr_name)? { xattr::set(&dest, &attr_name, &value)?; } } Ok(()) } /// Retrieves the extended attributes (xattrs) of a given file or directory. /// /// # Arguments /// /// * `source` - A reference to the path of the file or directory. /// /// # Returns /// /// A result containing a HashMap of attributes names and values, or an error. pub fn retrieve_xattrs>(source: P) -> std::io::Result>> { let mut attrs = HashMap::new(); for attr_name in xattr::list(&source)? { if let Some(value) = xattr::get(&source, &attr_name)? { attrs.insert(attr_name, value); } } Ok(attrs) } /// Applies extended attributes (xattrs) to a given file or directory. /// /// # Arguments /// /// * `dest` - A reference to the path of the file or directory. /// * `xattrs` - A HashMap containing attribute names and their corresponding values. /// /// # Returns /// /// A result indicating success or failure. pub fn apply_xattrs>( dest: P, xattrs: HashMap>, ) -> std::io::Result<()> { for (attr, value) in xattrs { xattr::set(&dest, &attr, &value)?; } Ok(()) } /// Checks if a file has an Access Control List (ACL) based on its extended attributes. /// /// # Arguments /// /// * `file` - A reference to the path of the file. /// /// # Returns /// /// `true` if the file has extended attributes (indicating an ACL), `false` otherwise. pub fn has_acl>(file: P) -> bool { // don't use exacl here, it is doing more getxattr call then needed xattr::list(file).is_ok_and(|acl| { // if we have extra attributes, we have an acl acl.count() > 0 }) } /// Returns the permissions bits of a file or directory which has Access Control List (ACL) entries based on its /// extended attributes (Only works for linux) /// /// # Arguments /// /// * `source` - A reference to the path of the file. /// /// # Returns /// /// `u32` the perm bits of a file having extended attributes of type 'system.posix_acl_default' with permissions /// otherwise returns a 0 if perm bits are 0 or the file has no extended attributes pub fn get_acl_perm_bits_from_xattr>(source: P) -> u32 { // TODO: Modify this to work on non linux unix systems. // Only default acl entries get inherited by objects under the path i.e. if child directories // will have their permissions modified. if let Ok(entries) = retrieve_xattrs(source) { let mut perm: u32 = 0; if let Some(value) = entries.get(&OsString::from("system.posix_acl_default")) { // value is xattr byte vector // value follows a starts with a 4 byte header, and then has posix_acl_entries, each // posix_acl_entry is separated by a u32 sequence i.e. 0xFFFFFFFF // // struct posix_acl_entries { // e_tag: u16 // e_perm: u16 // e_id: u32 // } // // Reference: `https://github.com/torvalds/linux/blob/master/include/uapi/linux/posix_acl_xattr.h` // // The value of the header is 0x0002, so we skip the first four bytes of the value and // process the rest let acl_entries = value .split_at(3) .1 .iter() .filter(|&x| *x != 255) .copied() .collect::>(); for entry in acl_entries.chunks_exact(4) { // Third byte and fourth byte will be the perm bits perm = (perm << 3) | u32::from(entry[2]) | u32::from(entry[3]); } return perm; } } 0 } // FIXME: 3 tests failed on OpenBSD #[cfg(not(target_os = "openbsd"))] #[cfg(test)] mod tests { use super::*; use std::fs::File; use tempfile::tempdir; #[test] fn test_copy_xattrs() { let temp_dir = tempdir().unwrap(); let source_path = temp_dir.path().join("source.txt"); let dest_path = temp_dir.path().join("dest.txt"); File::create(&source_path).unwrap(); File::create(&dest_path).unwrap(); let test_attr = "user.test"; let test_value = b"test value"; xattr::set(&source_path, test_attr, test_value).unwrap(); copy_xattrs(&source_path, &dest_path).unwrap(); let copied_value = xattr::get(&dest_path, test_attr).unwrap().unwrap(); assert_eq!(copied_value, test_value); } #[test] fn test_apply_and_retrieve_xattrs() { let temp_dir = tempdir().unwrap(); let file_path = temp_dir.path().join("test_file.txt"); File::create(&file_path).unwrap(); let mut test_xattrs = HashMap::new(); let test_attr = "user.test_attr"; let test_value = b"test value"; test_xattrs.insert(OsString::from(test_attr), test_value.to_vec()); apply_xattrs(&file_path, test_xattrs).unwrap(); let retrieved_xattrs = retrieve_xattrs(&file_path).unwrap(); assert!(retrieved_xattrs.contains_key(OsString::from(test_attr).as_os_str())); assert_eq!( retrieved_xattrs .get(OsString::from(test_attr).as_os_str()) .unwrap(), test_value ); } #[test] #[cfg(target_os = "linux")] fn test_get_perm_bits_from_xattrs() { let temp_dir = tempdir().unwrap(); let source_path = temp_dir.path().join("source_dir"); std::fs::create_dir(&source_path).unwrap(); let test_attr = "system.posix_acl_default"; // posix_acl entries are in the form of // struct posix_acl_entry{ // tag: u16, // perm: u16, // id: u32, // } // the fields are serialized in little endian. // The entries are preceded by a header of value of 0x0002 // Reference: `` // The id is undefined i.e. -1 which in u32 is 0xFFFFFFFF and tag and perm bits as given in the // header file. // Reference: `` // // // There is a bindgen bug which generates the ACL_OTHER constant whose value is 0x20 into 32. // which when the bug is fixed will need to be changed back to 20 from 32 in the vec 'test_value'. // // Reference `` // // The test_value vector is the header 0x0002 followed by tag and permissions for user_obj , tag // and permissions and for group_obj and finally the tag and permissions for ACL_OTHER. Each // entry has undefined id as mentioned above. // // let test_value = vec![ 2, 0, 0, 0, 1, 0, 7, 0, 255, 255, 255, 255, 4, 0, 0, 0, 255, 255, 255, 255, 32, 0, 0, 0, 255, 255, 255, 255, ]; xattr::set(&source_path, test_attr, test_value.as_slice()).unwrap(); let perm_bits = get_acl_perm_bits_from_xattr(source_path); assert_eq!(0o700, perm_bits); } #[test] fn test_file_has_acl() { let temp_dir = tempdir().unwrap(); let file_path = temp_dir.path().join("test_file.txt"); File::create(&file_path).unwrap(); assert!(!has_acl(&file_path)); let test_attr = "user.test_acl"; let test_value = b"test value"; xattr::set(&file_path, test_attr, test_value).unwrap(); assert!(has_acl(&file_path)); } } uucore-0.0.30/src/lib/features/lines.rs000064400000000000000000000071331046102023000160450ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) //! Iterate over lines, including the line ending character(s). //! //! This module provides the [`lines`] function, similar to the //! [`BufRead::lines`] method. While the [`BufRead::lines`] method //! yields [`String`] instances that do not include the line ending //! characters (`"\n"` or `"\r\n"`), our functions yield //! [`Vec`]<['u8']> instances that include the line ending //! characters. This is useful if the input data does not end with a //! newline character and you want to preserve the exact form of the //! input data. use std::io::BufRead; /// Returns an iterator over the lines, including line ending characters. /// /// This function is just like [`BufRead::lines`], but it includes the /// line ending characters in each yielded [`String`] if the input /// data has them. Set the `sep` parameter to the line ending /// character; for Unix line endings, use `b'\n'`. /// /// # Examples /// /// Use `sep` to specify an alternate character for line endings. For /// example, if lines are terminated by the null character `b'\0'`: /// /// ```rust,ignore /// use std::io::BufRead; /// use std::io::Cursor; /// /// let cursor = Cursor::new(b"x\0y\0z\0"); /// let mut it = lines(cursor, b'\0').map(|l| l.unwrap()); /// /// assert_eq!(it.next(), Some(Vec::from("x\0"))); /// assert_eq!(it.next(), Some(Vec::from("y\0"))); /// assert_eq!(it.next(), Some(Vec::from("z\0"))); /// assert_eq!(it.next(), None); /// ``` /// /// If the input data does not end with a newline character (`'\n'`), /// then the last [`String`] yielded by this iterator also does not /// end with a newline: /// /// ```rust,ignore /// let cursor = Cursor::new(b"x\ny\nz"); /// let mut it = lines(cursor, b'\n').map(|l| l.unwrap()); /// /// assert_eq!(it.next(), Some(Vec::from("x\n"))); /// assert_eq!(it.next(), Some(Vec::from("y\n"))); /// assert_eq!(it.next(), Some(Vec::from("z"))); /// assert_eq!(it.next(), None); /// ``` pub fn lines(reader: B, sep: u8) -> Lines where B: BufRead, { Lines { buf: reader, sep } } /// An iterator over the lines of an instance of `BufRead`. /// /// This struct is generally created by calling [`lines`] on a `BufRead`. /// Please see the documentation of [`lines`] for more details. pub struct Lines { buf: B, sep: u8, } impl Iterator for Lines { type Item = std::io::Result>; fn next(&mut self) -> Option>> { let mut buf = Vec::new(); match self.buf.read_until(self.sep, &mut buf) { Ok(0) => None, Ok(_n) => Some(Ok(buf)), Err(e) => Some(Err(e)), } } } #[cfg(test)] mod tests { use crate::lines::lines; use std::io::Cursor; #[test] fn test_lines() { let cursor = Cursor::new(b"x\ny\nz"); let mut it = lines(cursor, b'\n').map(|l| l.unwrap()); assert_eq!(it.next(), Some(Vec::from("x\n"))); assert_eq!(it.next(), Some(Vec::from("y\n"))); assert_eq!(it.next(), Some(Vec::from("z"))); assert_eq!(it.next(), None); } #[test] fn test_lines_zero_terminated() { use std::io::Cursor; let cursor = Cursor::new(b"x\0y\0z\0"); let mut it = lines(cursor, b'\0').map(|l| l.unwrap()); assert_eq!(it.next(), Some(Vec::from("x\0"))); assert_eq!(it.next(), Some(Vec::from("y\0"))); assert_eq!(it.next(), Some(Vec::from("z\0"))); assert_eq!(it.next(), None); } } uucore-0.0.30/src/lib/features/mode.rs000064400000000000000000000156561046102023000156700ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to parse modes // spell-checker:ignore (vars) fperm srwx use libc::{mode_t, umask, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; pub fn parse_numeric(fperm: u32, mut mode: &str, considering_dir: bool) -> Result { let (op, pos) = parse_op(mode).map_or_else(|_| (None, 0), |(op, pos)| (Some(op), pos)); mode = mode[pos..].trim(); let change = if mode.is_empty() { 0 } else { u32::from_str_radix(mode, 8).map_err(|e| e.to_string())? }; if change > 0o7777 { Err(format!("mode is too large ({change} > 7777")) } else { Ok(match op { Some('+') => fperm | change, Some('-') => fperm & !change, // If this is a directory, we keep the setgid and setuid bits, // unless the mode contains 5 or more octal digits or the mode is "=" None if considering_dir && mode.len() < 5 => change | (fperm & (0o4000 | 0o2000)), None | Some('=') => change, Some(_) => unreachable!(), }) } } pub fn parse_symbolic( mut fperm: u32, mut mode: &str, umask: u32, considering_dir: bool, ) -> Result { let (mask, pos) = parse_levels(mode); if pos == mode.len() { return Err(format!("invalid mode ({mode})")); } let respect_umask = pos == 0; mode = &mode[pos..]; while !mode.is_empty() { let (op, pos) = parse_op(mode)?; mode = &mode[pos..]; let (mut srwx, pos) = parse_change(mode, fperm, considering_dir); if respect_umask { srwx &= !umask; } mode = &mode[pos..]; match op { '+' => fperm |= srwx & mask, '-' => fperm &= !(srwx & mask), '=' => { if considering_dir { // keep the setgid and setuid bits for directories srwx |= fperm & (0o4000 | 0o2000); } fperm = (fperm & !mask) | (srwx & mask); } _ => unreachable!(), } } Ok(fperm) } fn parse_levels(mode: &str) -> (u32, usize) { let mut mask = 0; let mut pos = 0; for ch in mode.chars() { mask |= match ch { 'u' => 0o4700, 'g' => 0o2070, 'o' => 0o1007, 'a' => 0o7777, _ => break, }; pos += 1; } if pos == 0 { mask = 0o7777; // default to 'a' } (mask, pos) } fn parse_op(mode: &str) -> Result<(char, usize), String> { let ch = mode .chars() .next() .ok_or_else(|| "unexpected end of mode".to_owned())?; match ch { '+' | '-' | '=' => Ok((ch, 1)), _ => Err(format!( "invalid operator (expected +, -, or =, but found {ch})" )), } } fn parse_change(mode: &str, fperm: u32, considering_dir: bool) -> (u32, usize) { let mut srwx = 0; let mut pos = 0; for ch in mode.chars() { match ch { 'r' => srwx |= 0o444, 'w' => srwx |= 0o222, 'x' => srwx |= 0o111, 'X' => { if considering_dir || (fperm & 0o0111) != 0 { srwx |= 0o111; } } 's' => srwx |= 0o4000 | 0o2000, 't' => srwx |= 0o1000, 'u' => srwx = (fperm & 0o700) | ((fperm >> 3) & 0o070) | ((fperm >> 6) & 0o007), 'g' => srwx = ((fperm << 3) & 0o700) | (fperm & 0o070) | ((fperm >> 3) & 0o007), 'o' => srwx = ((fperm << 6) & 0o700) | ((fperm << 3) & 0o070) | (fperm & 0o007), _ => break, }; if ch == 'u' || ch == 'g' || ch == 'o' { // symbolic modes only allows perms to be a single letter of 'ugo' // therefore this must either be the first char or it is unexpected if pos != 0 { break; } pos = 1; break; } pos += 1; } if pos == 0 { srwx = 0; } (srwx, pos) } #[allow(clippy::unnecessary_cast)] pub fn parse_mode(mode: &str) -> Result { #[cfg(all( not(target_os = "freebsd"), not(target_vendor = "apple"), not(target_os = "android") ))] let fperm = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; #[cfg(any(target_os = "freebsd", target_vendor = "apple", target_os = "android"))] let fperm = (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) as u32; let result = if mode.chars().any(|c| c.is_ascii_digit()) { parse_numeric(fperm as u32, mode, true) } else { parse_symbolic(fperm as u32, mode, get_umask(), true) }; result.map(|mode| mode as mode_t) } pub fn get_umask() -> u32 { // There's no portable way to read the umask without changing it. // We have to replace it and then quickly set it back, hopefully before // some other thread is affected. // On modern Linux kernels the current umask could instead be read // from /proc/self/status. But that's a lot of work. // SAFETY: umask always succeeds and doesn't operate on memory. Races are // possible but it can't violate Rust's guarantees. let mask = unsafe { umask(0) }; unsafe { umask(mask) }; #[cfg(all( not(target_os = "freebsd"), not(target_vendor = "apple"), not(target_os = "android"), not(target_os = "redox") ))] return mask; #[cfg(any( target_os = "freebsd", target_vendor = "apple", target_os = "android", target_os = "redox" ))] return mask as u32; } // Iterate 'args' and delete the first occurrence // of a prefix '-' if it's associated with MODE // e.g. "chmod -v -xw -R FILE" -> "chmod -v xw -R FILE" pub fn strip_minus_from_mode(args: &mut Vec) -> bool { for arg in args { if arg == "--" { break; } if let Some(arg_stripped) = arg.strip_prefix('-') { if let Some('r' | 'w' | 'x' | 'X' | 's' | 't' | 'u' | 'g' | 'o' | '0'..='7') = arg.chars().nth(1) { *arg = arg_stripped.to_string(); return true; } } } false } #[cfg(test)] mod test { #[test] fn symbolic_modes() { assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); assert_eq!( super::parse_mode("+x").unwrap(), if crate::os::is_wsl_1() { 0o776 } else { 0o777 } ); assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); } #[test] fn numeric_modes() { assert_eq!(super::parse_mode("644").unwrap(), 0o644); assert_eq!(super::parse_mode("+100").unwrap(), 0o766); assert_eq!(super::parse_mode("-4").unwrap(), 0o662); } } uucore-0.0.30/src/lib/features/perms.rs000064400000000000000000000627451046102023000160730ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Common functions to manage permissions // spell-checker:ignore (jargon) TOCTOU use crate::display::Quotable; use crate::error::{strip_errno, UResult, USimpleError}; pub use crate::features::entries; use crate::show_error; use clap::{Arg, ArgMatches, Command}; use libc::{gid_t, uid_t}; use options::traverse; use walkdir::WalkDir; use std::io::Error as IOError; use std::io::Result as IOResult; use std::ffi::CString; use std::fs::Metadata; use std::os::unix::fs::MetadataExt; use std::os::unix::ffi::OsStrExt; use std::path::{Path, MAIN_SEPARATOR}; /// The various level of verbosity #[derive(PartialEq, Eq, Clone, Debug)] pub enum VerbosityLevel { Silent, Changes, Verbose, Normal, } #[derive(PartialEq, Eq, Clone, Debug)] pub struct Verbosity { pub groups_only: bool, pub level: VerbosityLevel, } /// Actually perform the change of owner on a path fn chown>(path: P, uid: uid_t, gid: gid_t, follow: bool) -> IOResult<()> { let path = path.as_ref(); let s = CString::new(path.as_os_str().as_bytes()).unwrap(); let ret = unsafe { if follow { libc::chown(s.as_ptr(), uid, gid) } else { libc::lchown(s.as_ptr(), uid, gid) } }; if ret == 0 { Ok(()) } else { Err(IOError::last_os_error()) } } /// Perform the change of owner on a path /// with the various options /// and error messages management pub fn wrap_chown>( path: P, meta: &Metadata, dest_uid: Option, dest_gid: Option, follow: bool, verbosity: Verbosity, ) -> Result { let dest_uid = dest_uid.unwrap_or_else(|| meta.uid()); let dest_gid = dest_gid.unwrap_or_else(|| meta.gid()); let path = path.as_ref(); let mut out: String = String::new(); if let Err(e) = chown(path, dest_uid, dest_gid, follow) { match verbosity.level { VerbosityLevel::Silent => (), level => { out = format!( "changing {} of {}: {}", if verbosity.groups_only { "group" } else { "ownership" }, path.quote(), e ); if level == VerbosityLevel::Verbose { out = if verbosity.groups_only { let gid = meta.gid(); format!( "{}\nfailed to change group of {} from {} to {}", out, path.quote(), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) } else { let uid = meta.uid(); let gid = meta.gid(); format!( "{}\nfailed to change ownership of {} from {}:{} to {}:{}", out, path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; }; } } return Err(out); } else { let changed = dest_uid != meta.uid() || dest_gid != meta.gid(); if changed { match verbosity.level { VerbosityLevel::Changes | VerbosityLevel::Verbose => { let gid = meta.gid(); out = if verbosity.groups_only { format!( "changed group of {} from {} to {}", path.quote(), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) } else { let gid = meta.gid(); let uid = meta.uid(); format!( "changed ownership of {} from {}:{} to {}:{}", path.quote(), entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; } _ => (), }; } else if verbosity.level == VerbosityLevel::Verbose { out = if verbosity.groups_only { format!( "group of {} retained as {}", path.quote(), entries::gid2grp(dest_gid).unwrap_or_default() ) } else { format!( "ownership of {} retained as {}:{}", path.quote(), entries::uid2usr(dest_uid).unwrap_or_else(|_| dest_uid.to_string()), entries::gid2grp(dest_gid).unwrap_or_else(|_| dest_gid.to_string()) ) }; } } Ok(out) } pub enum IfFrom { All, User(u32), Group(u32), UserGroup(u32, u32), } #[derive(PartialEq, Eq)] pub enum TraverseSymlinks { None, First, All, } pub struct ChownExecutor { pub dest_uid: Option, pub dest_gid: Option, pub raw_owner: String, // The owner of the file as input by the user in the command line. pub traverse_symlinks: TraverseSymlinks, pub verbosity: Verbosity, pub filter: IfFrom, pub files: Vec, pub recursive: bool, pub preserve_root: bool, pub dereference: bool, } #[cfg(test)] pub fn check_root(path: &Path, would_recurse_symlink: bool) -> bool { is_root(path, would_recurse_symlink) } /// In the context of chown and chgrp, check whether we are in a "preserve-root" scenario. /// /// In particular, we want to prohibit further traversal only if: /// (--preserve-root and -R present) && /// (path canonicalizes to "/") && /// ( /// (path is a symlink && would traverse/recurse this symlink) || /// (path is not a symlink) /// ) /// The first clause is checked by the caller, the second and third clause is checked here. /// The caller has to evaluate -P/-H/-L into 'would_recurse_symlink'. /// Recall that canonicalization resolves both relative paths (e.g. "..") and symlinks. fn is_root(path: &Path, would_traverse_symlink: bool) -> bool { // The third clause can be evaluated without any syscalls, so we do that first. // If we would_recurse_symlink, then the clause is true no matter whether the path is a symlink // or not. Otherwise, we only need to check here if the path can syntactically be a symlink: if !would_traverse_symlink { // We cannot check path.is_dir() here, as this would resolve symlinks, // which we need to avoid here. // All directory-ish paths match "*/", except ".", "..", "*/.", and "*/..". let path_bytes = path.as_os_str().as_encoded_bytes(); let looks_like_dir = path_bytes == [b'.'] || path_bytes == [b'.', b'.'] || path_bytes.ends_with(&[MAIN_SEPARATOR as u8]) || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.']) || path_bytes.ends_with(&[MAIN_SEPARATOR as u8, b'.', b'.']); if !looks_like_dir { return false; } } // FIXME: TOCTOU bug! canonicalize() runs at a different time than WalkDir's recursion decision. // However, we're forced to make the decision whether to warn about --preserve-root // *before* even attempting to chown the path, let alone doing the stat inside WalkDir. if let Ok(p) = path.canonicalize() { let path_buf = path.to_path_buf(); if p.parent().is_none() { if path_buf.as_os_str() == "/" { show_error!("it is dangerous to operate recursively on '/'"); } else { show_error!( "it is dangerous to operate recursively on {} (same as '/')", path_buf.quote() ); } show_error!("use --no-preserve-root to override this failsafe"); return true; } } false } pub fn get_metadata(file: &Path, follow: bool) -> Result { if follow { file.metadata() } else { file.symlink_metadata() } } impl ChownExecutor { pub fn exec(&self) -> UResult<()> { let mut ret = 0; for f in &self.files { ret |= self.traverse(f); } if ret != 0 { return Err(ret.into()); } Ok(()) } #[allow(clippy::cognitive_complexity)] fn traverse>(&self, root: P) -> i32 { let path = root.as_ref(); let Some(meta) = self.obtain_meta(path, self.dereference) else { if self.verbosity.level == VerbosityLevel::Verbose { println!( "failed to change ownership of {} to {}", path.quote(), self.raw_owner ); } return 1; }; if self.recursive && self.preserve_root && is_root(path, self.traverse_symlinks != TraverseSymlinks::None) { // Fail-fast, do not attempt to recurse. return 1; } let ret = if self.matched(meta.uid(), meta.gid()) { match wrap_chown( path, &meta, self.dest_uid, self.dest_gid, self.dereference, self.verbosity.clone(), ) { Ok(n) => { if !n.is_empty() { show_error!("{}", n); } 0 } Err(e) => { if self.verbosity.level != VerbosityLevel::Silent { show_error!("{}", e); } 1 } } } else { self.print_verbose_ownership_retained_as( path, meta.uid(), self.dest_gid.map(|_| meta.gid()), ); 0 }; if self.recursive { ret | self.dive_into(&root) } else { ret } } #[allow(clippy::cognitive_complexity)] fn dive_into>(&self, root: P) -> i32 { let root = root.as_ref(); // walkdir always dereferences the root directory, so we have to check it ourselves if self.traverse_symlinks == TraverseSymlinks::None && root.is_symlink() { return 0; } let mut ret = 0; let mut iterator = WalkDir::new(root) .follow_links(self.traverse_symlinks == TraverseSymlinks::All) .min_depth(1) .into_iter(); // We can't use a for loop because we need to manipulate the iterator inside the loop. while let Some(entry) = iterator.next() { let entry = match entry { Err(e) => { ret = 1; if let Some(path) = e.path() { show_error!( "cannot access '{}': {}", path.display(), if let Some(error) = e.io_error() { strip_errno(error) } else { "Too many levels of symbolic links".into() } ); } else { show_error!("{}", e); } continue; } Ok(entry) => entry, }; let path = entry.path(); let Some(meta) = self.obtain_meta(path, self.dereference) else { ret = 1; if entry.file_type().is_dir() { // Instruct walkdir to skip this directory to avoid getting another error // when walkdir tries to query the children of this directory. iterator.skip_current_dir(); } continue; }; if self.preserve_root && is_root(path, self.traverse_symlinks == TraverseSymlinks::All) { // Fail-fast, do not recurse further. return 1; } if !self.matched(meta.uid(), meta.gid()) { self.print_verbose_ownership_retained_as( path, meta.uid(), self.dest_gid.map(|_| meta.gid()), ); continue; } ret = match wrap_chown( path, &meta, self.dest_uid, self.dest_gid, self.dereference, self.verbosity.clone(), ) { Ok(n) => { if !n.is_empty() { show_error!("{}", n); } 0 } Err(e) => { if self.verbosity.level != VerbosityLevel::Silent { show_error!("{}", e); } 1 } } } ret } fn obtain_meta>(&self, path: P, follow: bool) -> Option { let path = path.as_ref(); get_metadata(path, follow) .inspect_err(|e| { if self.verbosity.level != VerbosityLevel::Silent { show_error!( "cannot {} {}: {}", if follow { "dereference" } else { "access" }, path.quote(), strip_errno(e) ); } }) .ok() } #[inline] fn matched(&self, uid: uid_t, gid: gid_t) -> bool { match self.filter { IfFrom::All => true, IfFrom::User(u) => u == uid, IfFrom::Group(g) => g == gid, IfFrom::UserGroup(u, g) => u == uid && g == gid, } } fn print_verbose_ownership_retained_as(&self, path: &Path, uid: u32, gid: Option) { if self.verbosity.level == VerbosityLevel::Verbose { let ownership = match (self.dest_uid, self.dest_gid, gid) { (Some(_), Some(_), Some(gid)) => format!( "{}:{}", entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()) ), (None, Some(_), Some(gid)) => { entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()) } _ => entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()), }; if self.verbosity.groups_only { println!("group of {} retained as {}", path.quote(), ownership); } else { println!("ownership of {} retained as {}", path.quote(), ownership); } } } } pub mod options { pub const HELP: &str = "help"; pub mod verbosity { pub const CHANGES: &str = "changes"; pub const QUIET: &str = "quiet"; pub const SILENT: &str = "silent"; pub const VERBOSE: &str = "verbose"; } pub mod preserve_root { pub const PRESERVE: &str = "preserve-root"; pub const NO_PRESERVE: &str = "no-preserve-root"; } pub mod dereference { pub const DEREFERENCE: &str = "dereference"; pub const NO_DEREFERENCE: &str = "no-dereference"; } pub const FROM: &str = "from"; pub const RECURSIVE: &str = "recursive"; pub mod traverse { pub const TRAVERSE: &str = "H"; pub const NO_TRAVERSE: &str = "P"; pub const EVERY: &str = "L"; } pub const REFERENCE: &str = "reference"; pub const ARG_OWNER: &str = "OWNER"; pub const ARG_GROUP: &str = "GROUP"; pub const ARG_FILES: &str = "FILE"; } pub struct GidUidOwnerFilter { pub dest_gid: Option, pub dest_uid: Option, pub raw_owner: String, pub filter: IfFrom, } type GidUidFilterOwnerParser = fn(&ArgMatches) -> UResult; /// Determines symbolic link traversal and recursion settings based on flags. /// Returns the updated `dereference` and `traverse_symlinks` values. pub fn configure_symlink_and_recursion( matches: &ArgMatches, ) -> Result<(bool, bool, TraverseSymlinks), Box> { let mut dereference = if matches.get_flag(options::dereference::DEREFERENCE) { Some(true) // Follow symlinks } else if matches.get_flag(options::dereference::NO_DEREFERENCE) { Some(false) // Do not follow symlinks } else { None // Default behavior }; let mut traverse_symlinks = if matches.get_flag("L") { TraverseSymlinks::All } else if matches.get_flag("H") { TraverseSymlinks::First } else { TraverseSymlinks::None }; let recursive = matches.get_flag(options::RECURSIVE); if recursive { if traverse_symlinks == TraverseSymlinks::None { if dereference == Some(true) { return Err(USimpleError::new( 1, "-R --dereference requires -H or -L".to_string(), )); } dereference = Some(false); } } else { traverse_symlinks = TraverseSymlinks::None; } Ok((recursive, dereference.unwrap_or(true), traverse_symlinks)) } /// Base implementation for `chgrp` and `chown`. /// /// An argument called `add_arg_if_not_reference` will be added to `command` if /// `args` does not contain the `--reference` option. /// `parse_gid_uid_and_filter` will be called to obtain the target gid and uid, and the filter, /// from `ArgMatches`. /// `groups_only` determines whether verbose output will only mention the group. #[allow(clippy::cognitive_complexity)] pub fn chown_base( mut command: Command, args: impl crate::Args, add_arg_if_not_reference: &'static str, parse_gid_uid_and_filter: GidUidFilterOwnerParser, groups_only: bool, ) -> UResult<()> { let args: Vec<_> = args.collect(); let mut reference = false; let mut help = false; // stop processing options on -- for arg in args.iter().take_while(|s| *s != "--") { if arg.to_string_lossy().starts_with("--reference=") || arg == "--reference" { reference = true; } else if arg == "--help" { // we stop processing once we see --help, // as it doesn't matter if we've seen reference or not help = true; break; } } if help || !reference { // add both positional arguments // arg_group is only required if command = command.arg( Arg::new(add_arg_if_not_reference) .value_name(add_arg_if_not_reference) .required(true), ); } command = command.arg( Arg::new(options::ARG_FILES) .value_name(options::ARG_FILES) .value_hint(clap::ValueHint::FilePath) .action(clap::ArgAction::Append) .required(true) .num_args(1..), ); let matches = command.try_get_matches_from(args)?; let files: Vec = matches .get_many::(options::ARG_FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); let preserve_root = matches.get_flag(options::preserve_root::PRESERVE); let (recursive, dereference, traverse_symlinks) = configure_symlink_and_recursion(&matches)?; let verbosity_level = if matches.get_flag(options::verbosity::CHANGES) { VerbosityLevel::Changes } else if matches.get_flag(options::verbosity::SILENT) || matches.get_flag(options::verbosity::QUIET) { VerbosityLevel::Silent } else if matches.get_flag(options::verbosity::VERBOSE) { VerbosityLevel::Verbose } else { VerbosityLevel::Normal }; let GidUidOwnerFilter { dest_gid, dest_uid, raw_owner, filter, } = parse_gid_uid_and_filter(&matches)?; let executor = ChownExecutor { traverse_symlinks, dest_gid, dest_uid, raw_owner, verbosity: Verbosity { groups_only, level: verbosity_level, }, recursive, dereference, preserve_root, files, filter, }; executor.exec() } pub fn common_args() -> Vec { vec![ Arg::new(traverse::TRAVERSE) .short(traverse::TRAVERSE.chars().next().unwrap()) .help("if a command line argument is a symbolic link to a directory, traverse it") .overrides_with_all([traverse::EVERY, traverse::NO_TRAVERSE]) .action(clap::ArgAction::SetTrue), Arg::new(traverse::EVERY) .short(traverse::EVERY.chars().next().unwrap()) .help("traverse every symbolic link to a directory encountered") .overrides_with_all([traverse::TRAVERSE, traverse::NO_TRAVERSE]) .action(clap::ArgAction::SetTrue), Arg::new(traverse::NO_TRAVERSE) .short(traverse::NO_TRAVERSE.chars().next().unwrap()) .help("do not traverse any symbolic links (default)") .overrides_with_all([traverse::TRAVERSE, traverse::EVERY]) .action(clap::ArgAction::SetTrue), Arg::new(options::dereference::DEREFERENCE) .long(options::dereference::DEREFERENCE) .help( "affect the referent of each symbolic link (this is the default), \ rather than the symbolic link itself", ) .action(clap::ArgAction::SetTrue), Arg::new(options::dereference::NO_DEREFERENCE) .short('h') .long(options::dereference::NO_DEREFERENCE) .help( "affect symbolic links instead of any referenced file \ (useful only on systems that can change the ownership of a symlink)", ) .action(clap::ArgAction::SetTrue), ] } #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. use super::*; #[cfg(unix)] use std::os::unix; use std::path::{Component, PathBuf}; #[cfg(unix)] use tempfile::tempdir; #[test] fn test_empty_string() { let path = PathBuf::new(); assert_eq!(path.to_str(), Some("")); // The main point to test here is that we don't crash. // The result should be 'false', to avoid unnecessary and confusing warnings. assert!(!is_root(&path, false)); assert!(!is_root(&path, true)); } #[allow(clippy::needless_borrow)] #[cfg(unix)] #[test] fn test_literal_root() { let component = Component::RootDir; let path: &Path = component.as_ref(); assert_eq!( path.to_str(), Some("/"), "cfg(unix) but using non-unix path delimiters?!" ); // Must return true, this is the main scenario that --preserve-root shall prevent. assert!(is_root(&path, false)); assert!(is_root(&path, true)); } #[cfg(unix)] #[test] fn test_symlink_slash() { let temp_dir = tempdir().unwrap(); let symlink_path = temp_dir.path().join("symlink"); unix::fs::symlink(PathBuf::from("/"), symlink_path).unwrap(); let symlink_path_slash = temp_dir.path().join("symlink/"); // Must return true, we're about to "accidentally" recurse on "/", // since "symlink/" always counts as an already-entered directory // Output from GNU: // $ chown --preserve-root -RH --dereference $(id -u) slink-to-root/ // chown: it is dangerous to operate recursively on 'slink-to-root/' (same as '/') // chown: use --no-preserve-root to override this failsafe // [$? = 1] // $ chown --preserve-root -RH --no-dereference $(id -u) slink-to-root/ // chown: it is dangerous to operate recursively on 'slink-to-root/' (same as '/') // chown: use --no-preserve-root to override this failsafe // [$? = 1] assert!(is_root(&symlink_path_slash, false)); assert!(is_root(&symlink_path_slash, true)); } #[cfg(unix)] #[test] fn test_symlink_no_slash() { // This covers both the commandline-argument case and the recursion case. let temp_dir = tempdir().unwrap(); let symlink_path = temp_dir.path().join("symlink"); unix::fs::symlink(PathBuf::from("/"), &symlink_path).unwrap(); // Only return true we're about to "accidentally" recurse on "/". assert!(!is_root(&symlink_path, false)); assert!(is_root(&symlink_path, true)); } } uucore-0.0.30/src/lib/features/pipes.rs000064400000000000000000000045301046102023000160510ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Thin pipe-related wrappers around functions from the `nix` crate. use std::fs::File; #[cfg(any(target_os = "linux", target_os = "android"))] use std::io::IoSlice; #[cfg(any(target_os = "linux", target_os = "android"))] use std::os::fd::AsFd; #[cfg(any(target_os = "linux", target_os = "android"))] use nix::fcntl::SpliceFFlags; pub use nix::{Error, Result}; /// A wrapper around [`nix::unistd::pipe`] that ensures the pipe is cleaned up. /// /// Returns two `File` objects: everything written to the second can be read /// from the first. pub fn pipe() -> Result<(File, File)> { let (read, write) = nix::unistd::pipe()?; Ok((File::from(read), File::from(write))) } /// Less noisy wrapper around [`nix::fcntl::splice`]. /// /// Up to `len` bytes are moved from `source` to `target`. Returns the number /// of successfully moved bytes. /// /// At least one of `source` and `target` must be some sort of pipe. /// To get around this requirement, consider splicing from your source into /// a [`pipe`] and then from the pipe into your target (with `splice_exact`): /// this is still very efficient. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn splice(source: &impl AsFd, target: &impl AsFd, len: usize) -> Result { nix::fcntl::splice(source, None, target, None, len, SpliceFFlags::empty()) } /// Splice wrapper which fully finishes the write. /// /// Exactly `len` bytes are moved from `source` into `target`. /// /// Panics if `source` runs out of data before `len` bytes have been moved. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn splice_exact(source: &impl AsFd, target: &impl AsFd, len: usize) -> Result<()> { let mut left = len; while left != 0 { let written = splice(source, target, left)?; assert_ne!(written, 0, "unexpected end of data"); left -= written; } Ok(()) } /// Copy data from `bytes` into `target`, which must be a pipe. /// /// Returns the number of successfully copied bytes. #[cfg(any(target_os = "linux", target_os = "android"))] pub fn vmsplice(target: &impl AsFd, bytes: &[u8]) -> Result { nix::fcntl::vmsplice(target, &[IoSlice::new(bytes)], SpliceFFlags::empty()) } uucore-0.0.30/src/lib/features/proc_info.rs000064400000000000000000000365511046102023000167170ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore exitstatus cmdline kworker pgrep pwait snice procps // spell-checker:ignore egid euid gettid ppid //! Set of functions to manage IDs //! //! This module provide [`ProcessInformation`] and [`TerminalType`] and corresponding //! functions for obtaining process information. //! //! And also provide [`walk_process`] function to collecting all the information of //! processes in current system. //! //! Utilities that rely on this module: //! `pgrep` (TBD) //! `pwait` (TBD) //! `snice` (TBD) //! // This file is currently flagged as dead code, because it isn't used anywhere // in the codebase. It may be useful in the future though, so we decide to keep // it. // The code was originally written in procps // (https://github.com/uutils/procps/blob/main/src/uu/pgrep/src/process.rs) // but was eventually moved here. // See https://github.com/uutils/coreutils/pull/6932 for discussion. #![allow(dead_code)] use crate::features::tty::Teletype; use std::hash::Hash; use std::{ collections::HashMap, fmt::{self, Display, Formatter}, fs, io, path::PathBuf, rc::Rc, }; use walkdir::{DirEntry, WalkDir}; /// State or process #[derive(Debug, PartialEq, Eq)] pub enum RunState { ///`R`, running Running, ///`S`, sleeping Sleeping, ///`D`, sleeping in an uninterruptible wait UninterruptibleWait, ///`Z`, zombie Zombie, ///`T`, traced or stopped Stopped, } impl Display for RunState { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Running => write!(f, "R"), Self::Sleeping => write!(f, "S"), Self::UninterruptibleWait => write!(f, "D"), Self::Zombie => write!(f, "Z"), Self::Stopped => write!(f, "T"), } } } impl TryFrom for RunState { type Error = io::Error; fn try_from(value: char) -> Result { match value { 'R' => Ok(Self::Running), 'S' => Ok(Self::Sleeping), 'D' => Ok(Self::UninterruptibleWait), 'Z' => Ok(Self::Zombie), 'T' => Ok(Self::Stopped), _ => Err(io::ErrorKind::InvalidInput.into()), } } } impl TryFrom<&str> for RunState { type Error = io::Error; fn try_from(value: &str) -> Result { if value.len() != 1 { return Err(io::ErrorKind::InvalidInput.into()); } Self::try_from( value .chars() .nth(0) .ok_or::(io::ErrorKind::InvalidInput.into())?, ) } } impl TryFrom for RunState { type Error = io::Error; fn try_from(value: String) -> Result { Self::try_from(value.as_str()) } } impl TryFrom<&String> for RunState { type Error = io::Error; fn try_from(value: &String) -> Result { Self::try_from(value.as_str()) } } /// Process ID and its information #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct ProcessInformation { pub pid: usize, pub cmdline: String, inner_status: String, inner_stat: String, /// Processed `/proc/self/status` file cached_status: Option>>, /// Processed `/proc/self/stat` file cached_stat: Option>>, cached_start_time: Option, cached_thread_ids: Option>>, } #[derive(Clone, Copy, Debug)] enum UidGid { Uid, Gid, } impl ProcessInformation { /// Try new with pid path such as `/proc/self` /// /// # Error /// /// If the files in path cannot be parsed into [ProcessInformation], /// it almost caused by wrong filesystem structure. /// /// - [The /proc Filesystem](https://docs.kernel.org/filesystems/proc.html#process-specific-subdirectories) pub fn try_new(value: PathBuf) -> Result { let dir_append = |mut path: PathBuf, str: String| { path.push(str); path }; let value = if value.is_symlink() { fs::read_link(value)? } else { value }; let pid = { value .iter() .last() .ok_or(io::ErrorKind::Other)? .to_str() .ok_or(io::ErrorKind::InvalidData)? .parse::() .map_err(|_| io::ErrorKind::InvalidData)? }; let cmdline = fs::read_to_string(dir_append(value.clone(), "cmdline".into()))? .replace('\0', " ") .trim_end() .into(); Ok(Self { pid, cmdline, inner_status: fs::read_to_string(dir_append(value.clone(), "status".into()))?, inner_stat: fs::read_to_string(dir_append(value, "stat".into()))?, ..Default::default() }) } pub fn proc_status(&self) -> &str { &self.inner_status } pub fn proc_stat(&self) -> &str { &self.inner_stat } /// Collect information from `/proc//status` file pub fn status(&mut self) -> Rc> { if let Some(c) = &self.cached_status { return Rc::clone(c); } let result = self .inner_status .lines() .filter_map(|it| it.split_once(':')) .map(|it| (it.0.to_string(), it.1.trim_start().to_string())) .collect::>(); let result = Rc::new(result); self.cached_status = Some(Rc::clone(&result)); Rc::clone(&result) } /// Collect information from `/proc//stat` file pub fn stat(&mut self) -> Rc> { if let Some(c) = &self.cached_stat { return Rc::clone(c); } let result: Vec<_> = stat_split(&self.inner_stat); let result = Rc::new(result); self.cached_stat = Some(Rc::clone(&result)); Rc::clone(&result) } /// Fetch start time from [ProcessInformation::cached_stat] /// /// - [The /proc Filesystem: Table 1-4](https://docs.kernel.org/filesystems/proc.html#id10) pub fn start_time(&mut self) -> Result { if let Some(time) = self.cached_start_time { return Ok(time); } // Kernel doc: https://docs.kernel.org/filesystems/proc.html#process-specific-subdirectories // Table 1-4 let time = self .stat() .get(21) .ok_or(io::ErrorKind::InvalidData)? .parse::() .map_err(|_| io::ErrorKind::InvalidData)?; self.cached_start_time = Some(time); Ok(time) } pub fn ppid(&mut self) -> Result { // the PPID is the fourth field in /proc//stat // (https://www.kernel.org/doc/html/latest/filesystems/proc.html#id10) self.stat() .get(3) .ok_or(io::ErrorKind::InvalidData)? .parse::() .map_err(|_| io::ErrorKind::InvalidData.into()) } fn get_uid_or_gid_field(&mut self, field: UidGid, index: usize) -> Result { self.status() .get(&format!("{:?}", field)) .ok_or(io::ErrorKind::InvalidData)? .split_whitespace() .nth(index) .ok_or(io::ErrorKind::InvalidData)? .parse::() .map_err(|_| io::ErrorKind::InvalidData.into()) } pub fn uid(&mut self) -> Result { self.get_uid_or_gid_field(UidGid::Uid, 0) } pub fn euid(&mut self) -> Result { self.get_uid_or_gid_field(UidGid::Uid, 1) } pub fn gid(&mut self) -> Result { self.get_uid_or_gid_field(UidGid::Gid, 0) } pub fn egid(&mut self) -> Result { self.get_uid_or_gid_field(UidGid::Gid, 1) } /// Fetch run state from [ProcessInformation::cached_stat] /// /// - [The /proc Filesystem: Table 1-4](https://docs.kernel.org/filesystems/proc.html#id10) /// /// # Error /// /// If parsing failed, this function will return [io::ErrorKind::InvalidInput] pub fn run_state(&mut self) -> Result { RunState::try_from(self.stat().get(2).unwrap().as_str()) } /// This function will scan the `/proc//fd` directory /// /// If the process does not belong to any terminal and mismatched permission, /// the result will contain [TerminalType::Unknown]. /// /// Otherwise [TerminalType::Unknown] does not appear in the result. pub fn tty(&self) -> Teletype { let path = PathBuf::from(format!("/proc/{}/fd", self.pid)); let Ok(result) = fs::read_dir(path) else { return Teletype::Unknown; }; for dir in result.flatten().filter(|it| it.path().is_symlink()) { if let Ok(path) = fs::read_link(dir.path()) { if let Ok(tty) = Teletype::try_from(path) { return tty; } } } Teletype::Unknown } pub fn thread_ids(&mut self) -> Rc> { if let Some(c) = &self.cached_thread_ids { return Rc::clone(c); } let thread_ids_dir = format!("/proc/{}/task", self.pid); let result = Rc::new( WalkDir::new(thread_ids_dir) .min_depth(1) .max_depth(1) .follow_links(false) .into_iter() .flatten() .flat_map(|it| { it.path() .file_name() .and_then(|it| it.to_str()) .and_then(|it| it.parse::().ok()) }) .collect::>(), ); self.cached_thread_ids = Some(Rc::clone(&result)); Rc::clone(&result) } } impl TryFrom for ProcessInformation { type Error = io::Error; fn try_from(value: DirEntry) -> Result { let value = value.into_path(); Self::try_new(value) } } impl Hash for ProcessInformation { fn hash(&self, state: &mut H) { // Make it faster. self.pid.hash(state); self.inner_status.hash(state); self.inner_stat.hash(state); } } /// Parsing `/proc/self/stat` file. /// /// In some case, the first pair (and the only one pair) will contains whitespace, /// so if we want to parse it, we have to write new algorithm. /// /// TODO: If possible, test and use regex to replace this algorithm. fn stat_split(stat: &str) -> Vec { let stat = String::from(stat); let mut buf = String::with_capacity(stat.len()); let l = stat.find('('); let r = stat.find(')'); let content = if let (Some(l), Some(r)) = (l, r) { let replaced = stat[(l + 1)..r].replace(' ', "$$"); buf.push_str(&stat[..l]); buf.push_str(&replaced); buf.push_str(&stat[(r + 1)..stat.len()]); &buf } else { &stat }; content .split_whitespace() .map(|it| it.replace("$$", " ")) .collect() } /// Iterating pid in current system pub fn walk_process() -> impl Iterator { WalkDir::new("/proc/") .max_depth(1) .follow_links(false) .into_iter() .flatten() .filter(|it| it.path().is_dir()) .flat_map(ProcessInformation::try_from) } #[cfg(test)] mod tests { use super::*; use crate::features::tty::Teletype; use std::{collections::HashSet, str::FromStr}; #[test] fn test_run_state_conversion() { assert_eq!(RunState::try_from("R").unwrap(), RunState::Running); assert_eq!(RunState::try_from("S").unwrap(), RunState::Sleeping); assert_eq!( RunState::try_from("D").unwrap(), RunState::UninterruptibleWait ); assert_eq!(RunState::try_from("T").unwrap(), RunState::Stopped); assert_eq!(RunState::try_from("Z").unwrap(), RunState::Zombie); assert!(RunState::try_from("G").is_err()); assert!(RunState::try_from("Rg").is_err()); } fn current_pid() -> usize { // Direct read link of /proc/self. // It's result must be current programs pid. fs::read_link("/proc/self") .unwrap() .to_str() .unwrap() .parse::() .unwrap() } #[test] fn test_walk_pid() { let current_pid = current_pid(); let find = walk_process().find(|it| it.pid == current_pid); assert!(find.is_some()); } #[test] fn test_pid_entry() { let current_pid = current_pid(); let pid_entry = ProcessInformation::try_new( PathBuf::from_str(&format!("/proc/{}", current_pid)).unwrap(), ) .unwrap(); let result = WalkDir::new(format!("/proc/{}/fd", current_pid)) .into_iter() .flatten() .map(DirEntry::into_path) .flat_map(|it| it.read_link()) .flat_map(Teletype::try_from) .collect::>(); assert_eq!(result.len(), 1); assert_eq!( pid_entry.tty(), Vec::from_iter(result.into_iter()).first().unwrap().clone() ); } #[test] fn test_thread_ids() { let main_tid = unsafe { crate::libc::gettid() }; std::thread::spawn(move || { let mut pid_entry = ProcessInformation::try_new( PathBuf::from_str(&format!("/proc/{}", current_pid())).unwrap(), ) .unwrap(); let thread_ids = pid_entry.thread_ids(); assert!(thread_ids.contains(&(main_tid as usize))); let new_thread_tid = unsafe { crate::libc::gettid() }; assert!(thread_ids.contains(&(new_thread_tid as usize))); }) .join() .unwrap(); } #[test] fn test_stat_split() { let case = "32 (idle_inject/3) S 2 0 0 0 -1 69238848 0 0 0 0 0 0 0 0 -51 0 1 0 34 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 3 50 1 0 0 0 0 0 0 0 0 0 0 0"; assert!(stat_split(case)[1] == "idle_inject/3"); let case = "3508 (sh) S 3478 3478 3478 0 -1 4194304 67 0 0 0 0 0 0 0 20 0 1 0 11911 2961408 238 18446744073709551615 94340156948480 94340157028757 140736274114368 0 0 0 0 4096 65538 1 0 0 17 8 0 0 0 0 0 94340157054704 94340157059616 94340163108864 140736274122780 140736274122976 140736274122976 140736274124784 0"; assert!(stat_split(case)[1] == "sh"); let case = "47246 (kworker /10:1-events) I 2 0 0 0 -1 69238880 0 0 0 0 17 29 0 0 20 0 1 0 1396260 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 10 0 0 0 0 0 0 0 0 0 0 0 0 0"; assert!(stat_split(case)[1] == "kworker /10:1-events"); } #[test] fn test_uid_gid() { let mut pid_entry = ProcessInformation::try_new( PathBuf::from_str(&format!("/proc/{}", current_pid())).unwrap(), ) .unwrap(); assert_eq!(pid_entry.uid().unwrap(), crate::process::getuid()); assert_eq!(pid_entry.euid().unwrap(), crate::process::geteuid()); assert_eq!(pid_entry.gid().unwrap(), crate::process::getgid()); assert_eq!(pid_entry.egid().unwrap(), crate::process::getegid()); } } uucore-0.0.30/src/lib/features/process.rs000064400000000000000000000114151046102023000164070ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) cvar exitstatus cmdline kworker getsid getpid // spell-checker:ignore (sys/unix) WIFSIGNALED ESRCH // spell-checker:ignore pgrep pwait snice use libc::{gid_t, pid_t, uid_t}; #[cfg(not(target_os = "redox"))] use nix::errno::Errno; use std::io; use std::process::Child; use std::process::ExitStatus; use std::thread; use std::time::{Duration, Instant}; // SAFETY: These functions always succeed and return simple integers. /// `geteuid()` returns the effective user ID of the calling process. pub fn geteuid() -> uid_t { unsafe { libc::geteuid() } } /// `getegid()` returns the effective group ID of the calling process. pub fn getegid() -> gid_t { unsafe { libc::getegid() } } /// `getgid()` returns the real group ID of the calling process. pub fn getgid() -> gid_t { unsafe { libc::getgid() } } /// `getuid()` returns the real user ID of the calling process. pub fn getuid() -> uid_t { unsafe { libc::getuid() } } /// `getpid()` returns the pid of the calling process. pub fn getpid() -> pid_t { unsafe { libc::getpid() } } /// `getsid()` returns the session ID of the process with process ID pid. /// /// If pid is 0, getsid() returns the session ID of the calling process. /// /// # Error /// /// - [Errno::EPERM] A process with process ID pid exists, but it is not in the same session as the calling process, and the implementation considers this an error. /// - [Errno::ESRCH] No process with process ID pid was found. /// /// /// # Platform /// /// This function only support standard POSIX implementation platform, /// so some system such as redox doesn't supported. #[cfg(not(target_os = "redox"))] pub fn getsid(pid: i32) -> Result { unsafe { let result = libc::getsid(pid); if Errno::last() == Errno::UnknownErrno { Ok(result) } else { Err(Errno::last()) } } } /// Missing methods for Child objects pub trait ChildExt { /// Send a signal to a Child process. /// /// Caller beware: if the process already exited then you may accidentally /// send the signal to an unrelated process that recycled the PID. fn send_signal(&mut self, signal: usize) -> io::Result<()>; /// Send a signal to a process group. fn send_signal_group(&mut self, signal: usize) -> io::Result<()>; /// Wait for a process to finish or return after the specified duration. /// A `timeout` of zero disables the timeout. fn wait_or_timeout(&mut self, timeout: Duration) -> io::Result>; } impl ChildExt for Child { fn send_signal(&mut self, signal: usize) -> io::Result<()> { if unsafe { libc::kill(self.id() as pid_t, signal as i32) } == 0 { Ok(()) } else { Err(io::Error::last_os_error()) } } fn send_signal_group(&mut self, signal: usize) -> io::Result<()> { // Ignore the signal, so we don't go into a signal loop. if unsafe { libc::signal(signal as i32, libc::SIG_IGN) } != 0 { return Err(io::Error::last_os_error()); } if unsafe { libc::kill(0, signal as i32) } == 0 { Ok(()) } else { Err(io::Error::last_os_error()) } } fn wait_or_timeout(&mut self, timeout: Duration) -> io::Result> { if timeout == Duration::from_micros(0) { return self.wait().map(Some); } // .try_wait() doesn't drop stdin, so we do it manually drop(self.stdin.take()); let start = Instant::now(); loop { if let Some(status) = self.try_wait()? { return Ok(Some(status)); } if start.elapsed() >= timeout { break; } // XXX: this is kinda gross, but it's cleaner than starting a thread just to wait // (which was the previous solution). We might want to use a different duration // here as well thread::sleep(Duration::from_millis(100)); } Ok(None) } } #[cfg(test)] mod tests { use super::*; #[test] #[cfg(not(target_os = "redox"))] fn test_getsid() { assert_eq!( getsid(getpid()).expect("getsid(getpid)"), // zero is a special value for SID. // https://pubs.opengroup.org/onlinepubs/9699919799/functions/getsid.html getsid(0).expect("getsid(0)") ); // SID never be 0. assert!(getsid(getpid()).expect("getsid(getpid)") > 0); // This might caused tests failure but the probability is low. assert!(getsid(999_999).is_err()); } } uucore-0.0.30/src/lib/features/quoting_style.rs000064400000000000000000001250531046102023000176430ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions for escaping names according to different quoting styles. use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; // These are characters with special meaning in the shell (e.g. bash). // The first const contains characters that only have a special meaning when they appear at the beginning of a name. const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; // PR#6559 : Remove `]{}` from special shell chars. const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; /// The quoting style to use when escaping a name. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum QuotingStyle { /// Escape the name as a shell string. /// Used in, e.g., `ls --quoting-style=shell`. Shell { /// Whether to escape characters in the name. /// True in, e.g., `ls --quoting-style=shell-escape`. escape: bool, /// Whether to always quote the name. always_quote: bool, /// Whether to show control and non-unicode characters, or replace them with `?`. show_control: bool, }, /// Escape the name as a C string. /// Used in, e.g., `ls --quote-name`. C { /// The type of quotes to use. quotes: Quotes, }, /// Do not escape the string. /// Used in, e.g., `ls --literal`. Literal { /// Whether to show control and non-unicode characters, or replace them with `?`. show_control: bool, }, } /// The type of quotes to use when escaping a name as a C string. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Quotes { /// Do not use quotes. None, /// Use single quotes. Single, /// Use double quotes. Double, // TODO: Locale } // This implementation is heavily inspired by the std::char::EscapeDefault implementation // in the Rust standard library. This custom implementation is needed because the // characters \a, \b, \e, \f & \v are not recognized by Rust. struct EscapedChar { state: EscapeState, } enum EscapeState { Done, Char(char), Backslash(char), ForceQuote(char), Octal(EscapeOctal), } /// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. /// Only supports characters up to 2 bytes long in UTF-8. struct EscapeOctal { c: [u8; 2], state: EscapeOctalState, idx: u8, } enum EscapeOctalState { Done, FirstBackslash, FirstValue, LastBackslash, LastValue, } fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { (byte >> (idx * 3)) & 0o7 } impl Iterator for EscapeOctal { type Item = char; fn next(&mut self) -> Option { match self.state { EscapeOctalState::Done => None, EscapeOctalState::FirstBackslash => { self.state = EscapeOctalState::FirstValue; Some('\\') } EscapeOctalState::LastBackslash => { self.state = EscapeOctalState::LastValue; Some('\\') } EscapeOctalState::FirstValue => { let octal_digit = byte_to_octal_digit(self.c[0], self.idx); if self.idx == 0 { self.state = EscapeOctalState::LastBackslash; self.idx = 2; } else { self.idx -= 1; } Some(from_digit(octal_digit.into(), 8).unwrap()) } EscapeOctalState::LastValue => { let octal_digit = byte_to_octal_digit(self.c[1], self.idx); if self.idx == 0 { self.state = EscapeOctalState::Done; } else { self.idx -= 1; } Some(from_digit(octal_digit.into(), 8).unwrap()) } } } } impl EscapeOctal { fn from_char(c: char) -> Self { if c.len_utf8() == 1 { return Self::from_byte(c as u8); } let mut buf = [0; 2]; let _s = c.encode_utf8(&mut buf); Self { c: buf, idx: 2, state: EscapeOctalState::FirstBackslash, } } fn from_byte(b: u8) -> Self { Self { c: [0, b], idx: 2, state: EscapeOctalState::LastBackslash, } } } impl EscapedChar { fn new_literal(c: char) -> Self { Self { state: EscapeState::Char(c), } } fn new_octal(b: u8) -> Self { Self { state: EscapeState::Octal(EscapeOctal::from_byte(b)), } } fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { use EscapeState::*; let init_state = match c { '\x07' => Backslash('a'), '\x08' => Backslash('b'), '\t' => Backslash('t'), '\n' => Backslash('n'), '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), '\\' => Backslash('\\'), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), }, '"' => match quotes { Quotes::Double => Backslash('"'), _ => Char('"'), }, ' ' if !dirname => match quotes { Quotes::None => Backslash(' '), _ => Char(' '), }, ':' if dirname => Backslash(':'), _ if c.is_control() => Octal(EscapeOctal::from_char(c)), _ => Char(c), }; Self { state: init_state } } fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { use EscapeState::*; let init_state = match c { _ if !escape && c.is_control() => Char(c), '\x07' => Backslash('a'), '\x08' => Backslash('b'), '\t' => Backslash('t'), '\n' => Backslash('n'), '\x0B' => Backslash('v'), '\x0C' => Backslash('f'), '\r' => Backslash('r'), '\'' => match quotes { Quotes::Single => Backslash('\''), _ => Char('\''), }, _ if c.is_control() => Octal(EscapeOctal::from_char(c)), _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), _ => Char(c), }; Self { state: init_state } } fn hide_control(self) -> Self { match self.state { EscapeState::Char(c) if c.is_control() => Self { state: EscapeState::Char('?'), }, _ => self, } } } impl Iterator for EscapedChar { type Item = char; fn next(&mut self) -> Option { match self.state { EscapeState::Backslash(c) => { self.state = EscapeState::Char(c); Some('\\') } EscapeState::Char(c) | EscapeState::ForceQuote(c) => { self.state = EscapeState::Done; Some(c) } EscapeState::Done => None, EscapeState::Octal(ref mut iter) => iter.next(), } } } /// Check whether `bytes` starts with any byte in `pattern`. fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { !bytes.is_empty() && pattern.contains(&bytes[0]) } fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec, bool) { let mut must_quote = false; let mut escaped_str = Vec::with_capacity(name.len()); let mut utf8_buf = vec![0; 4]; for s in name.utf8_chunks() { for c in s.valid().chars() { let escaped = { let ec = EscapedChar::new_shell(c, false, quotes); if show_control_chars { ec } else { ec.hide_control() } }; match escaped.state { EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"), EscapeState::ForceQuote(x) => { must_quote = true; escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes()); } _ => { for c in escaped { escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes()); } } } } if show_control_chars { escaped_str.extend_from_slice(s.invalid()); } else { escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?'); } } must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); (escaped_str, must_quote) } fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec, bool) { // We need to keep track of whether we are in a dollar expression // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' let mut in_dollar = false; let mut must_quote = false; let mut escaped_str = String::with_capacity(name.len()); for s in name.utf8_chunks() { for c in s.valid().chars() { let escaped = EscapedChar::new_shell(c, true, quotes); match escaped.state { EscapeState::Char(x) => { if in_dollar { escaped_str.push_str("''"); in_dollar = false; } escaped_str.push(x); } EscapeState::ForceQuote(x) => { if in_dollar { escaped_str.push_str("''"); in_dollar = false; } must_quote = true; escaped_str.push(x); } // Single quotes are not put in dollar expressions, but are escaped // if the string also contains double quotes. In that case, they must // be handled separately. EscapeState::Backslash('\'') => { must_quote = true; in_dollar = false; escaped_str.push_str("'\\''"); } _ => { if !in_dollar { escaped_str.push_str("'$'"); in_dollar = true; } must_quote = true; for char in escaped { escaped_str.push(char); } } } } if !s.invalid().is_empty() { if !in_dollar { escaped_str.push_str("'$'"); in_dollar = true; } must_quote = true; let escaped_bytes: String = s .invalid() .iter() .flat_map(|b| EscapedChar::new_octal(*b)) .collect(); escaped_str.push_str(&escaped_bytes); } } must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); (escaped_str.into(), must_quote) } /// Return a set of characters that implies quoting of the word in /// shell-quoting mode. fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; // the ':' colon character only induce quoting in the // context of ls displaying a directory name before listing its content. // (e.g. with the recursive flag -R) let start_index = if is_dirname { 0 } else { 1 }; &ESCAPED_CHARS[start_index..] } /// Escape a name according to the given quoting style. /// /// This inner function provides an additional flag `dirname` which /// is meant for ls' directory name display. fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec { match style { QuotingStyle::Literal { show_control } => { if *show_control { name.to_owned() } else { name.utf8_chunks() .map(|s| { let valid: String = s .valid() .chars() .flat_map(|c| EscapedChar::new_literal(c).hide_control()) .collect(); let invalid = "?".repeat(s.invalid().len()); valid + &invalid }) .collect::() .into() } } QuotingStyle::C { quotes } => { let escaped_str: String = name .utf8_chunks() .flat_map(|s| { let valid = s .valid() .chars() .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)); let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b)); valid.chain(invalid) }) .collect::(); match quotes { Quotes::Single => format!("'{escaped_str}'"), Quotes::Double => format!("\"{escaped_str}\""), Quotes::None => escaped_str, } .into() } QuotingStyle::Shell { escape, always_quote, show_control, } => { let (quotes, must_quote) = if name .iter() .any(|c| shell_escaped_char_set(dirname).contains(c)) { (Quotes::Single, true) } else if name.contains(&b'\'') { (Quotes::Double, true) } else if *always_quote { (Quotes::Single, true) } else { (Quotes::Single, false) }; let (escaped_str, contains_quote_chars) = if *escape { shell_with_escape(name, quotes) } else { shell_without_escape(name, quotes, *show_control) }; if must_quote | contains_quote_chars && quotes != Quotes::None { let mut quoted_str = Vec::::with_capacity(escaped_str.len() + 2); let quote = if quotes == Quotes::Single { b'\'' } else { b'"' }; quoted_str.push(quote); quoted_str.extend(escaped_str); quoted_str.push(quote); quoted_str } else { escaped_str } } } } /// Escape a filename with respect to the given style. pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { let name = crate::os_str_as_bytes_lossy(name); crate::os_string_from_vec(escape_name_inner(&name, style, false)) .expect("all byte sequences should be valid for platform, or already replaced in name") } /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { let name = crate::os_str_as_bytes_lossy(dir_name); crate::os_string_from_vec(escape_name_inner(&name, style, true)) .expect("all byte sequences should be valid for platform, or already replaced in name") } impl fmt::Display for QuotingStyle { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Self::Shell { escape, always_quote, show_control, } => { let mut style = "shell".to_string(); if escape { style.push_str("-escape"); } if always_quote { style.push_str("-always-quote"); } if show_control { style.push_str("-show-control"); } f.write_str(&style) } Self::C { .. } => f.write_str("C"), Self::Literal { .. } => f.write_str("literal"), } } } impl fmt::Display for Quotes { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Self::None => f.write_str("None"), Self::Single => f.write_str("Single"), Self::Double => f.write_str("Double"), } } } #[cfg(test)] mod tests { use crate::quoting_style::{escape_name_inner, Quotes, QuotingStyle}; // spell-checker:ignore (tests/words) one\'two one'two fn get_style(s: &str) -> QuotingStyle { match s { "literal" => QuotingStyle::Literal { show_control: false, }, "literal-show" => QuotingStyle::Literal { show_control: true }, "escape" => QuotingStyle::C { quotes: Quotes::None, }, "c" => QuotingStyle::C { quotes: Quotes::Double, }, "shell" => QuotingStyle::Shell { escape: false, always_quote: false, show_control: false, }, "shell-show" => QuotingStyle::Shell { escape: false, always_quote: false, show_control: true, }, "shell-always" => QuotingStyle::Shell { escape: false, always_quote: true, show_control: false, }, "shell-always-show" => QuotingStyle::Shell { escape: false, always_quote: true, show_control: true, }, "shell-escape" => QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }, "shell-escape-always" => QuotingStyle::Shell { escape: true, always_quote: true, show_control: false, }, _ => panic!("Invalid name!"), } } fn check_names_inner(name: &[u8], map: &[(T, &str)]) -> Vec> { map.iter() .map(|(_, style)| escape_name_inner(name, &get_style(style), false)) .collect() } fn check_names(name: &str, map: &[(&str, &str)]) { assert_eq!( map.iter() .map(|(correct, _)| *correct) .collect::>(), check_names_inner(name.as_bytes(), map) .iter() .map(|bytes| std::str::from_utf8(bytes) .expect("valid str goes in, valid str comes out")) .collect::>() ); } fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) { assert_eq!( map.iter() .map(|(correct, _)| *correct) .collect::>(), check_names_inner(name, map) ); } #[test] fn test_simple_names() { check_names( "one_two", &[ ("one_two", "literal"), ("one_two", "literal-show"), ("one_two", "escape"), ("\"one_two\"", "c"), ("one_two", "shell"), ("one_two", "shell-show"), ("'one_two'", "shell-always"), ("'one_two'", "shell-always-show"), ("one_two", "shell-escape"), ("'one_two'", "shell-escape-always"), ], ); } #[test] fn test_spaces() { check_names( "one two", &[ ("one two", "literal"), ("one two", "literal-show"), ("one\\ two", "escape"), ("\"one two\"", "c"), ("'one two'", "shell"), ("'one two'", "shell-show"), ("'one two'", "shell-always"), ("'one two'", "shell-always-show"), ("'one two'", "shell-escape"), ("'one two'", "shell-escape-always"), ], ); check_names( " one", &[ (" one", "literal"), (" one", "literal-show"), ("\\ one", "escape"), ("\" one\"", "c"), ("' one'", "shell"), ("' one'", "shell-show"), ("' one'", "shell-always"), ("' one'", "shell-always-show"), ("' one'", "shell-escape"), ("' one'", "shell-escape-always"), ], ); } #[test] fn test_quotes() { // One double quote check_names( "one\"two", &[ ("one\"two", "literal"), ("one\"two", "literal-show"), ("one\"two", "escape"), ("\"one\\\"two\"", "c"), ("'one\"two'", "shell"), ("'one\"two'", "shell-show"), ("'one\"two'", "shell-always"), ("'one\"two'", "shell-always-show"), ("'one\"two'", "shell-escape"), ("'one\"two'", "shell-escape-always"), ], ); // One single quote check_names( "one'two", &[ ("one'two", "literal"), ("one'two", "literal-show"), ("one'two", "escape"), ("\"one'two\"", "c"), ("\"one'two\"", "shell"), ("\"one'two\"", "shell-show"), ("\"one'two\"", "shell-always"), ("\"one'two\"", "shell-always-show"), ("\"one'two\"", "shell-escape"), ("\"one'two\"", "shell-escape-always"), ], ); // One single quote and one double quote check_names( "one'two\"three", &[ ("one'two\"three", "literal"), ("one'two\"three", "literal-show"), ("one'two\"three", "escape"), ("\"one'two\\\"three\"", "c"), ("'one'\\''two\"three'", "shell"), ("'one'\\''two\"three'", "shell-show"), ("'one'\\''two\"three'", "shell-always"), ("'one'\\''two\"three'", "shell-always-show"), ("'one'\\''two\"three'", "shell-escape"), ("'one'\\''two\"three'", "shell-escape-always"), ], ); // Consecutive quotes check_names( "one''two\"\"three", &[ ("one''two\"\"three", "literal"), ("one''two\"\"three", "literal-show"), ("one''two\"\"three", "escape"), ("\"one''two\\\"\\\"three\"", "c"), ("'one'\\'''\\''two\"\"three'", "shell"), ("'one'\\'''\\''two\"\"three'", "shell-show"), ("'one'\\'''\\''two\"\"three'", "shell-always"), ("'one'\\'''\\''two\"\"three'", "shell-always-show"), ("'one'\\'''\\''two\"\"three'", "shell-escape"), ("'one'\\'''\\''two\"\"three'", "shell-escape-always"), ], ); } #[test] fn test_control_chars() { // A simple newline check_names( "one\ntwo", &[ ("one?two", "literal"), ("one\ntwo", "literal-show"), ("one\\ntwo", "escape"), ("\"one\\ntwo\"", "c"), ("'one?two'", "shell"), ("'one\ntwo'", "shell-show"), ("'one?two'", "shell-always"), ("'one\ntwo'", "shell-always-show"), ("'one'$'\\n''two'", "shell-escape"), ("'one'$'\\n''two'", "shell-escape-always"), ], ); // A control character followed by a special shell character check_names( "one\n&two", &[ ("one?&two", "literal"), ("one\n&two", "literal-show"), ("one\\n&two", "escape"), ("\"one\\n&two\"", "c"), ("'one?&two'", "shell"), ("'one\n&two'", "shell-show"), ("'one?&two'", "shell-always"), ("'one\n&two'", "shell-always-show"), ("'one'$'\\n''&two'", "shell-escape"), ("'one'$'\\n''&two'", "shell-escape-always"), ], ); // The first 16 ASCII control characters. NUL is also included, even though it is of // no importance for file names. check_names( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", &[ ("????????????????", "literal"), ( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", "literal-show", ), ( "\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017", "escape", ), ( "\"\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017\"", "c", ), ("'????????????????'", "shell"), ( "'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F'", "shell-show", ), ("'????????????????'", "shell-always"), ( "'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F'", "shell-always-show", ), ( "''$'\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017'", "shell-escape", ), ( "''$'\\000\\001\\002\\003\\004\\005\\006\\a\\b\\t\\n\\v\\f\\r\\016\\017'", "shell-escape-always", ), ], ); // The last 16 ASCII control characters. check_names( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", &[ ("????????????????", "literal"), ( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "literal-show", ), ( "\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037", "escape", ), ( "\"\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037\"", "c", ), ("????????????????", "shell"), ( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", "shell-show", ), ("'????????????????'", "shell-always"), ( "'\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'", "shell-always-show", ), ( "''$'\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037'", "shell-escape", ), ( "''$'\\020\\021\\022\\023\\024\\025\\026\\027\\030\\031\\032\\033\\034\\035\\036\\037'", "shell-escape-always", ), ], ); // DEL check_names( "\x7F", &[ ("?", "literal"), ("\x7F", "literal-show"), ("\\177", "escape"), ("\"\\177\"", "c"), ("?", "shell"), ("\x7F", "shell-show"), ("'?'", "shell-always"), ("'\x7F'", "shell-always-show"), ("''$'\\177'", "shell-escape"), ("''$'\\177'", "shell-escape-always"), ], ); // The first 16 Unicode control characters. let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap(); check_names( test_str, &[ ("????????????????", "literal"), (test_str, "literal-show"), ("\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", "escape"), ("\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c"), ("????????????????", "shell"), (test_str, "shell-show"), ("'????????????????'", "shell-always"), (&format!("'{}'", test_str), "shell-always-show"), ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape"), ("''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", "shell-escape-always"), ], ); // The last 16 Unicode control characters. let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap(); check_names( test_str, &[ ("????????????????", "literal"), (test_str, "literal-show"), ("\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", "escape"), ("\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c"), ("????????????????", "shell"), (test_str, "shell-show"), ("'????????????????'", "shell-always"), (&format!("'{}'", test_str), "shell-always-show"), ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape"), ("''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", "shell-escape-always"), ], ); } #[test] fn test_non_unicode_bytes() { let ascii = b'_'; let continuation = b'\xA7'; let first2byte = b'\xC2'; let first3byte = b'\xE0'; let first4byte = b'\xF0'; let invalid = b'\xC0'; // a single byte value invalid outside of additional context in UTF-8 check_names_raw( &[continuation], &[ (b"?", "literal"), (b"\xA7", "literal-show"), (b"\\247", "escape"), (b"\"\\247\"", "c"), (b"?", "shell"), (b"\xA7", "shell-show"), (b"'?'", "shell-always"), (b"'\xA7'", "shell-always-show"), (b"''$'\\247'", "shell-escape"), (b"''$'\\247'", "shell-escape-always"), ], ); // ...but the byte becomes valid with appropriate context // (this is just the § character in UTF-8, written as bytes) check_names_raw( &[first2byte, continuation], &[ (b"\xC2\xA7", "literal"), (b"\xC2\xA7", "literal-show"), (b"\xC2\xA7", "escape"), (b"\"\xC2\xA7\"", "c"), (b"\xC2\xA7", "shell"), (b"\xC2\xA7", "shell-show"), (b"'\xC2\xA7'", "shell-always"), (b"'\xC2\xA7'", "shell-always-show"), (b"\xC2\xA7", "shell-escape"), (b"'\xC2\xA7'", "shell-escape-always"), ], ); // mixed with valid characters check_names_raw( &[continuation, ascii], &[ (b"?_", "literal"), (b"\xA7_", "literal-show"), (b"\\247_", "escape"), (b"\"\\247_\"", "c"), (b"?_", "shell"), (b"\xA7_", "shell-show"), (b"'?_'", "shell-always"), (b"'\xA7_'", "shell-always-show"), (b"''$'\\247''_'", "shell-escape"), (b"''$'\\247''_'", "shell-escape-always"), ], ); check_names_raw( &[ascii, continuation], &[ (b"_?", "literal"), (b"_\xA7", "literal-show"), (b"_\\247", "escape"), (b"\"_\\247\"", "c"), (b"_?", "shell"), (b"_\xA7", "shell-show"), (b"'_?'", "shell-always"), (b"'_\xA7'", "shell-always-show"), (b"'_'$'\\247'", "shell-escape"), (b"'_'$'\\247'", "shell-escape-always"), ], ); check_names_raw( &[ascii, continuation, ascii], &[ (b"_?_", "literal"), (b"_\xA7_", "literal-show"), (b"_\\247_", "escape"), (b"\"_\\247_\"", "c"), (b"_?_", "shell"), (b"_\xA7_", "shell-show"), (b"'_?_'", "shell-always"), (b"'_\xA7_'", "shell-always-show"), (b"'_'$'\\247''_'", "shell-escape"), (b"'_'$'\\247''_'", "shell-escape-always"), ], ); check_names_raw( &[continuation, ascii, continuation], &[ (b"?_?", "literal"), (b"\xA7_\xA7", "literal-show"), (b"\\247_\\247", "escape"), (b"\"\\247_\\247\"", "c"), (b"?_?", "shell"), (b"\xA7_\xA7", "shell-show"), (b"'?_?'", "shell-always"), (b"'\xA7_\xA7'", "shell-always-show"), (b"''$'\\247''_'$'\\247'", "shell-escape"), (b"''$'\\247''_'$'\\247'", "shell-escape-always"), ], ); // contiguous invalid bytes check_names_raw( &[ ascii, invalid, ascii, continuation, continuation, ascii, continuation, continuation, continuation, ascii, continuation, continuation, continuation, continuation, ascii, ], &[ (b"_?_??_???_????_", "literal"), ( b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_", "literal-show", ), ( b"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_", "escape", ), ( b"\"_\\300_\\247\\247_\\247\\247\\247_\\247\\247\\247\\247_\"", "c", ), (b"_?_??_???_????_", "shell"), ( b"_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_", "shell-show", ), (b"'_?_??_???_????_'", "shell-always"), ( b"'_\xC0_\xA7\xA7_\xA7\xA7\xA7_\xA7\xA7\xA7\xA7_'", "shell-always-show", ), ( b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'", "shell-escape", ), ( b"'_'$'\\300''_'$'\\247\\247''_'$'\\247\\247\\247''_'$'\\247\\247\\247\\247''_'", "shell-escape-always", ), ], ); // invalid multi-byte sequences that start valid check_names_raw( &[first2byte, ascii], &[ (b"?_", "literal"), (b"\xC2_", "literal-show"), (b"\\302_", "escape"), (b"\"\\302_\"", "c"), (b"?_", "shell"), (b"\xC2_", "shell-show"), (b"'?_'", "shell-always"), (b"'\xC2_'", "shell-always-show"), (b"''$'\\302''_'", "shell-escape"), (b"''$'\\302''_'", "shell-escape-always"), ], ); check_names_raw( &[first2byte, first2byte, continuation], &[ (b"?\xC2\xA7", "literal"), (b"\xC2\xC2\xA7", "literal-show"), (b"\\302\xC2\xA7", "escape"), (b"\"\\302\xC2\xA7\"", "c"), (b"?\xC2\xA7", "shell"), (b"\xC2\xC2\xA7", "shell-show"), (b"'?\xC2\xA7'", "shell-always"), (b"'\xC2\xC2\xA7'", "shell-always-show"), (b"''$'\\302''\xC2\xA7'", "shell-escape"), (b"''$'\\302''\xC2\xA7'", "shell-escape-always"), ], ); check_names_raw( &[first3byte, continuation, ascii], &[ (b"??_", "literal"), (b"\xE0\xA7_", "literal-show"), (b"\\340\\247_", "escape"), (b"\"\\340\\247_\"", "c"), (b"??_", "shell"), (b"\xE0\xA7_", "shell-show"), (b"'??_'", "shell-always"), (b"'\xE0\xA7_'", "shell-always-show"), (b"''$'\\340\\247''_'", "shell-escape"), (b"''$'\\340\\247''_'", "shell-escape-always"), ], ); check_names_raw( &[first4byte, continuation, continuation, ascii], &[ (b"???_", "literal"), (b"\xF0\xA7\xA7_", "literal-show"), (b"\\360\\247\\247_", "escape"), (b"\"\\360\\247\\247_\"", "c"), (b"???_", "shell"), (b"\xF0\xA7\xA7_", "shell-show"), (b"'???_'", "shell-always"), (b"'\xF0\xA7\xA7_'", "shell-always-show"), (b"''$'\\360\\247\\247''_'", "shell-escape"), (b"''$'\\360\\247\\247''_'", "shell-escape-always"), ], ); } #[test] fn test_question_mark() { // A question mark must force quotes in shell and shell-always, unless // it is in place of a control character (that case is already covered // in other tests) check_names( "one?two", &[ ("one?two", "literal"), ("one?two", "literal-show"), ("one?two", "escape"), ("\"one?two\"", "c"), ("'one?two'", "shell"), ("'one?two'", "shell-show"), ("'one?two'", "shell-always"), ("'one?two'", "shell-always-show"), ("'one?two'", "shell-escape"), ("'one?two'", "shell-escape-always"), ], ); } #[test] fn test_backslash() { // Escaped in C-style, but not in Shell-style escaping check_names( "one\\two", &[ ("one\\two", "literal"), ("one\\two", "literal-show"), ("one\\\\two", "escape"), ("\"one\\\\two\"", "c"), ("'one\\two'", "shell"), ("'one\\two'", "shell-always"), ("'one\\two'", "shell-escape"), ("'one\\two'", "shell-escape-always"), ], ); } #[test] fn test_tilde_and_hash() { check_names("~", &[("'~'", "shell"), ("'~'", "shell-escape")]); check_names( "~name", &[("'~name'", "shell"), ("'~name'", "shell-escape")], ); check_names( "some~name", &[("some~name", "shell"), ("some~name", "shell-escape")], ); check_names("name~", &[("name~", "shell"), ("name~", "shell-escape")]); check_names("#", &[("'#'", "shell"), ("'#'", "shell-escape")]); check_names( "#name", &[("'#name'", "shell"), ("'#name'", "shell-escape")], ); check_names( "some#name", &[("some#name", "shell"), ("some#name", "shell-escape")], ); check_names("name#", &[("name#", "shell"), ("name#", "shell-escape")]); } #[test] fn test_special_chars_in_double_quotes() { check_names( "can'$t", &[ ("'can'\\''$t'", "shell"), ("'can'\\''$t'", "shell-always"), ("'can'\\''$t'", "shell-escape"), ("'can'\\''$t'", "shell-escape-always"), ], ); check_names( "can'`t", &[ ("'can'\\''`t'", "shell"), ("'can'\\''`t'", "shell-always"), ("'can'\\''`t'", "shell-escape"), ("'can'\\''`t'", "shell-escape-always"), ], ); check_names( "can'\\t", &[ ("'can'\\''\\t'", "shell"), ("'can'\\''\\t'", "shell-always"), ("'can'\\''\\t'", "shell-escape"), ("'can'\\''\\t'", "shell-escape-always"), ], ); } #[test] fn test_quoting_style_display() { let style = QuotingStyle::Shell { escape: true, always_quote: false, show_control: false, }; assert_eq!(format!("{style}"), "shell-escape"); let style = QuotingStyle::Shell { escape: false, always_quote: true, show_control: false, }; assert_eq!(format!("{style}"), "shell-always-quote"); let style = QuotingStyle::Shell { escape: false, always_quote: false, show_control: true, }; assert_eq!(format!("{style}"), "shell-show-control"); let style = QuotingStyle::C { quotes: Quotes::Double, }; assert_eq!(format!("{style}"), "C"); let style = QuotingStyle::Literal { show_control: false, }; assert_eq!(format!("{style}"), "literal"); } #[test] fn test_quotes_display() { assert_eq!(format!("{}", Quotes::None), "None"); assert_eq!(format!("{}", Quotes::Single), "Single"); assert_eq!(format!("{}", Quotes::Double), "Double"); } } uucore-0.0.30/src/lib/features/ranges.rs000064400000000000000000000172151046102023000162140ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) inval //! A module for handling ranges of values. use std::cmp::max; use std::str::FromStr; use crate::display::Quotable; /// A range of values #[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] pub struct Range { /// The lower bound of the range pub low: usize, /// The upper bound of the range pub high: usize, } impl FromStr for Range { type Err = &'static str; /// Parse a string of the form `a-b` into a `Range` /// /// ``` /// use std::str::FromStr; /// use uucore::ranges::Range; /// assert_eq!(Range::from_str("5"), Ok(Range { low: 5, high: 5 })); /// assert_eq!(Range::from_str("4-"), Ok(Range { low: 4, high: usize::MAX - 1 })); /// assert_eq!(Range::from_str("-4"), Ok(Range { low: 1, high: 4 })); /// assert_eq!(Range::from_str("2-4"), Ok(Range { low: 2, high: 4 })); /// assert!(Range::from_str("0-4").is_err()); /// assert!(Range::from_str("4-2").is_err()); /// assert!(Range::from_str("-").is_err()); /// assert!(Range::from_str("a").is_err()); /// assert!(Range::from_str("a-b").is_err()); /// ``` fn from_str(s: &str) -> Result { fn parse(s: &str) -> Result { match s.parse::() { Ok(0) => Err("fields and positions are numbered from 1"), // GNU fails when we are at the limit. Match their behavior Ok(n) if n == usize::MAX => Err("byte/character offset is too large"), Ok(n) => Ok(n), Err(_) => Err("failed to parse range"), } } Ok(match s.split_once('-') { None => { let n = parse(s)?; Self { low: n, high: n } } Some(("", "")) => return Err("invalid range with no endpoint"), Some((low, "")) => Self { low: parse(low)?, high: usize::MAX - 1, }, Some(("", high)) => Self { low: 1, high: parse(high)?, }, Some((low, high)) => { let (low, high) = (parse(low)?, parse(high)?); if low <= high { Self { low, high } } else { return Err("high end of range less than low end"); } } }) } } impl Range { /// Parse a list of ranges separated by commas and/or spaces pub fn from_list(list: &str) -> Result, String> { let mut ranges = Vec::new(); for item in list.split(&[',', ' ']) { let range_item = FromStr::from_str(item) .map_err(|e| format!("range {} was invalid: {}", item.quote(), e))?; ranges.push(range_item); } Ok(Self::merge(ranges)) } /// Merge any overlapping ranges. Adjacent ranges are *NOT* merged. /// /// Is guaranteed to return only disjoint ranges in a sorted order. fn merge(mut ranges: Vec) -> Vec { ranges.sort(); // merge overlapping ranges for i in 0..ranges.len() { let j = i + 1; while j < ranges.len() && ranges[j].low <= ranges[i].high { let j_high = ranges.remove(j).high; ranges[i].high = max(ranges[i].high, j_high); } } ranges } } /// Calculate the complement of the given ranges. pub fn complement(ranges: &[Range]) -> Vec { let mut prev_high = 0; let mut complements = Vec::with_capacity(ranges.len() + 1); for range in ranges { if range.low > prev_high + 1 { complements.push(Range { low: prev_high + 1, high: range.low - 1, }); } prev_high = range.high; } if prev_high < usize::MAX - 1 { complements.push(Range { low: prev_high + 1, high: usize::MAX - 1, }); } complements } /// Test if at least one of the given Ranges contain the supplied value. /// /// Examples: /// /// ``` /// let ranges = uucore::ranges::Range::from_list("11,2,6-8").unwrap(); /// /// assert!(!uucore::ranges::contain(&ranges, 0)); /// assert!(!uucore::ranges::contain(&ranges, 1)); /// assert!(!uucore::ranges::contain(&ranges, 5)); /// assert!(!uucore::ranges::contain(&ranges, 10)); /// /// assert!(uucore::ranges::contain(&ranges, 2)); /// assert!(uucore::ranges::contain(&ranges, 6)); /// assert!(uucore::ranges::contain(&ranges, 7)); /// assert!(uucore::ranges::contain(&ranges, 8)); /// assert!(uucore::ranges::contain(&ranges, 11)); /// ``` pub fn contain(ranges: &[Range], n: usize) -> bool { for range in ranges { if n >= range.low && n <= range.high { return true; } } false } #[cfg(test)] mod test { use super::{complement, Range}; use std::str::FromStr; fn m(a: Vec, b: &[Range]) { assert_eq!(Range::merge(a), b); } fn r(low: usize, high: usize) -> Range { Range { low, high } } #[test] fn merging() { // Single element m(vec![r(1, 2)], &[r(1, 2)]); // Disjoint in wrong order m(vec![r(4, 5), r(1, 2)], &[r(1, 2), r(4, 5)]); // Two elements must be merged m(vec![r(1, 3), r(2, 4), r(6, 7)], &[r(1, 4), r(6, 7)]); // Two merges and a duplicate m( vec![r(1, 3), r(6, 7), r(2, 4), r(6, 7)], &[r(1, 4), r(6, 7)], ); // One giant m( vec![ r(110, 120), r(10, 20), r(100, 200), r(130, 140), r(150, 160), ], &[r(10, 20), r(100, 200)], ); // Last one joins the previous two m(vec![r(10, 20), r(30, 40), r(20, 30)], &[r(10, 40)]); m( vec![r(10, 20), r(30, 40), r(50, 60), r(20, 30)], &[r(10, 40), r(50, 60)], ); // Don't merge adjacent ranges m(vec![r(1, 3), r(4, 6)], &[r(1, 3), r(4, 6)]); } #[test] fn complementing() { // Simple assert_eq!(complement(&[r(3, 4)]), vec![r(1, 2), r(5, usize::MAX - 1)]); // With start assert_eq!( complement(&[r(1, 3), r(6, 10)]), vec![r(4, 5), r(11, usize::MAX - 1)] ); // With end assert_eq!( complement(&[r(2, 4), r(6, usize::MAX - 1)]), vec![r(1, 1), r(5, 5)] ); // With start and end assert_eq!(complement(&[r(1, 4), r(6, usize::MAX - 1)]), vec![r(5, 5)]); } #[test] fn test_from_str() { assert_eq!(Range::from_str("5"), Ok(Range { low: 5, high: 5 })); assert_eq!(Range::from_str("3-5"), Ok(Range { low: 3, high: 5 })); assert_eq!( Range::from_str("5-3"), Err("high end of range less than low end") ); assert_eq!(Range::from_str("-"), Err("invalid range with no endpoint")); assert_eq!( Range::from_str("3-"), Ok(Range { low: 3, high: usize::MAX - 1 }) ); assert_eq!(Range::from_str("-5"), Ok(Range { low: 1, high: 5 })); assert_eq!( Range::from_str("0"), Err("fields and positions are numbered from 1") ); let max_value = format!("{}", usize::MAX); assert_eq!( Range::from_str(&max_value), Err("byte/character offset is too large") ); } } uucore-0.0.30/src/lib/features/ringbuffer.rs000064400000000000000000000105341046102023000170630ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! A fixed-size ring buffer. use std::collections::VecDeque; /// A fixed-size ring buffer backed by a `VecDeque`. /// /// If the ring buffer is not full, then calling the [`push_back`] /// method appends elements, as in a [`VecDeque`]. If the ring buffer /// is full, then calling [`push_back`] removes the element at the /// front of the buffer (in a first-in, first-out manner) before /// appending the new element to the back of the buffer. /// /// Use [`from_iter`] to take the last `size` elements from an /// iterator. /// /// # Examples /// /// After exceeding the size limit, the oldest elements are dropped in /// favor of the newest element: /// /// ```rust,ignore /// let mut buffer: RingBuffer = RingBuffer::new(2); /// buffer.push_back(0); /// buffer.push_back(1); /// buffer.push_back(2); /// assert_eq!(vec![1, 2], buffer.data); /// ``` /// /// Take the last `n` elements from an iterator: /// /// ```rust,ignore /// let iter = [0, 1, 2].iter(); /// let actual = RingBuffer::from_iter(iter, 2).data; /// let expected = VecDeque::from_iter([1, 2].iter()); /// assert_eq!(expected, actual); /// ``` /// /// [`push_back`]: struct.RingBuffer.html#method.push_back /// [`from_iter`]: struct.RingBuffer.html#method.from_iter pub struct RingBuffer { /// The data stored in the ring buffer. pub data: VecDeque, /// The maximum number of elements that the ring buffer can hold. size: usize, } impl RingBuffer { /// Create a new ring buffer with a maximum size of `size`. pub fn new(size: usize) -> Self { Self { data: VecDeque::new(), size, } } /// Create a new ring buffer from an iterator. pub fn from_iter(iter: impl Iterator, size: usize) -> Self { let mut ring_buffer = Self::new(size); for value in iter { ring_buffer.push_back(value); } ring_buffer } /// Append a value to the end of the ring buffer. /// /// If the ring buffer is not full, this method return [`None`]. If /// the ring buffer is full, appending a new element will cause the /// oldest element to be evicted. In that case this method returns /// that element, or `None`. /// /// In the special case where the size limit is zero, each call to /// this method with input `value` returns `Some(value)`, because /// the input is immediately evicted. /// /// # Examples /// /// Appending an element when the buffer is full returns the oldest /// element: /// /// ```rust,ignore /// let mut buf = RingBuffer::new(3); /// assert_eq!(None, buf.push_back(0)); /// assert_eq!(None, buf.push_back(1)); /// assert_eq!(None, buf.push_back(2)); /// assert_eq!(Some(0), buf.push_back(3)); /// ``` /// /// If the size limit is zero, then this method always returns the /// input value: /// /// ```rust,ignore /// let mut buf = RingBuffer::new(0); /// assert_eq!(Some(0), buf.push_back(0)); /// assert_eq!(Some(1), buf.push_back(1)); /// assert_eq!(Some(2), buf.push_back(2)); /// ``` pub fn push_back(&mut self, value: T) -> Option { if self.size == 0 { return Some(value); } let result = if self.size <= self.data.len() { self.data.pop_front() } else { None }; self.data.push_back(value); result } } #[cfg(test)] mod tests { use crate::ringbuffer::RingBuffer; use std::collections::VecDeque; #[test] fn test_size_limit_zero() { let mut buf = RingBuffer::new(0); assert_eq!(Some(0), buf.push_back(0)); assert_eq!(Some(1), buf.push_back(1)); assert_eq!(Some(2), buf.push_back(2)); } #[test] fn test_evict_oldest() { let mut buf = RingBuffer::new(2); assert_eq!(None, buf.push_back(0)); assert_eq!(None, buf.push_back(1)); assert_eq!(Some(0), buf.push_back(2)); } #[test] fn test_from_iter() { let iter = [0, 1, 2].iter(); let actual = RingBuffer::from_iter(iter, 2).data; let expected: VecDeque<&i32> = [1, 2].iter().collect(); assert_eq!(expected, actual); } } uucore-0.0.30/src/lib/features/signals.rs000064400000000000000000000420371046102023000163750ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars/api) fcntl setrlimit setitimer rubout pollable sysconf // spell-checker:ignore (vars/signals) ABRT ALRM CHLD SEGV SIGABRT SIGALRM SIGBUS SIGCHLD SIGCONT SIGDANGER SIGEMT SIGFPE SIGHUP SIGILL SIGINFO SIGINT SIGIO SIGIOT SIGKILL SIGMIGRATE SIGMSG SIGPIPE SIGPRE SIGPROF SIGPWR SIGQUIT SIGSEGV SIGSTOP SIGSYS SIGTALRM SIGTERM SIGTRAP SIGTSTP SIGTHR SIGTTIN SIGTTOU SIGURG SIGUSR SIGVIRT SIGVTALRM SIGWINCH SIGXCPU SIGXFSZ STKFLT PWR THR TSTP TTIN TTOU VIRT VTALRM XCPU XFSZ SIGCLD SIGPOLL SIGWAITING SIGAIOCANCEL SIGLWP SIGFREEZE SIGTHAW SIGCANCEL SIGLOST SIGXRES SIGJVM SIGRTMIN SIGRT SIGRTMAX TALRM AIOCANCEL XRES RTMIN RTMAX //! This module provides a way to handle signals in a platform-independent way. //! It provides a way to convert signal names to their corresponding values and vice versa. //! It also provides a way to ignore the SIGINT signal and enable pipe errors. #[cfg(unix)] use nix::errno::Errno; #[cfg(unix)] use nix::sys::signal::{ signal, SigHandler::SigDfl, SigHandler::SigIgn, Signal::SIGINT, Signal::SIGPIPE, }; /// The default signal value. pub static DEFAULT_SIGNAL: usize = 15; /* Linux Programmer's Manual 1 HUP 2 INT 3 QUIT 4 ILL 5 TRAP 6 ABRT 7 BUS 8 FPE 9 KILL 10 USR1 11 SEGV 12 USR2 13 PIPE 14 ALRM 15 TERM 16 STKFLT 17 CHLD 18 CONT 19 STOP 20 TSTP 21 TTIN 22 TTOU 23 URG 24 XCPU 25 XFSZ 26 VTALRM 27 PROF 28 WINCH 29 POLL 30 PWR 31 SYS */ /// The list of all signals. #[cfg(any(target_os = "linux", target_os = "android", target_os = "redox"))] pub static ALL_SIGNALS: [&str; 32] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "BUS", "FPE", "KILL", "USR1", "SEGV", "USR2", "PIPE", "ALRM", "TERM", "STKFLT", "CHLD", "CONT", "STOP", "TSTP", "TTIN", "TTOU", "URG", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "POLL", "PWR", "SYS", ]; /* https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/signal.3.html No Name Default Action Description 1 SIGHUP terminate process terminal line hangup 2 SIGINT terminate process interrupt program 3 SIGQUIT create core image quit program 4 SIGILL create core image illegal instruction 5 SIGTRAP create core image trace trap 6 SIGABRT create core image abort program (formerly SIGIOT) 7 SIGEMT create core image emulate instruction executed 8 SIGFPE create core image floating-point exception 9 SIGKILL terminate process kill program 10 SIGBUS create core image bus error 11 SIGSEGV create core image segmentation violation 12 SIGSYS create core image non-existent system call invoked 13 SIGPIPE terminate process write on a pipe with no reader 14 SIGALRM terminate process real-time timer expired 15 SIGTERM terminate process software termination signal 16 SIGURG discard signal urgent condition present on socket 17 SIGSTOP stop process stop (cannot be caught or ignored) 18 SIGTSTP stop process stop signal generated from keyboard 19 SIGCONT discard signal continue after stop 20 SIGCHLD discard signal child status has changed 21 SIGTTIN stop process background read attempted from control terminal 22 SIGTTOU stop process background write attempted to control terminal 23 SIGIO discard signal I/O is possible on a descriptor (see fcntl(2)) 24 SIGXCPU terminate process cpu time limit exceeded (see setrlimit(2)) 25 SIGXFSZ terminate process file size limit exceeded (see setrlimit(2)) 26 SIGVTALRM terminate process virtual time alarm (see setitimer(2)) 27 SIGPROF terminate process profiling timer alarm (see setitimer(2)) 28 SIGWINCH discard signal Window size change 29 SIGINFO discard signal status request from keyboard 30 SIGUSR1 terminate process User defined signal 1 31 SIGUSR2 terminate process User defined signal 2 */ #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] pub static ALL_SIGNALS: [&str; 32] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", ]; /* The following signals are defined in NetBSD: SIGHUP 1 Hangup SIGINT 2 Interrupt SIGQUIT 3 Quit SIGILL 4 Illegal instruction SIGTRAP 5 Trace/BPT trap SIGABRT 6 Abort trap SIGEMT 7 EMT trap SIGFPE 8 Floating point exception SIGKILL 9 Killed SIGBUS 10 Bus error SIGSEGV 11 Segmentation fault SIGSYS 12 Bad system call SIGPIPE 13 Broken pipe SIGALRM 14 Alarm clock SIGTERM 15 Terminated SIGURG 16 Urgent I/O condition SIGSTOP 17 Suspended (signal) SIGTSTP 18 Suspended SIGCONT 19 Continued SIGCHLD 20 Child exited, stopped or continued SIGTTIN 21 Stopped (tty input) SIGTTOU 22 Stopped (tty output) SIGIO 23 I/O possible SIGXCPU 24 CPU time limit exceeded SIGXFSZ 25 File size limit exceeded SIGVTALRM 26 Virtual timer expired SIGPROF 27 Profiling timer expired SIGWINCH 28 Window size changed SIGINFO 29 Information request SIGUSR1 30 User defined signal 1 SIGUSR2 31 User defined signal 2 SIGPWR 32 Power fail/restart */ #[cfg(target_os = "netbsd")] pub static ALL_SIGNALS: [&str; 33] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", "PWR", ]; /* The following signals are defined in OpenBSD: SIGHUP terminate process terminal line hangup SIGINT terminate process interrupt program SIGQUIT create core image quit program SIGILL create core image illegal instruction SIGTRAP create core image trace trap SIGABRT create core image abort(3) call (formerly SIGIOT) SIGEMT create core image emulate instruction executed SIGFPE create core image floating-point exception SIGKILL terminate process kill program (cannot be caught or ignored) SIGBUS create core image bus error SIGSEGV create core image segmentation violation SIGSYS create core image system call given invalid argument SIGPIPE terminate process write on a pipe with no reader SIGALRM terminate process real-time timer expired SIGTERM terminate process software termination signal SIGURG discard signal urgent condition present on socket SIGSTOP stop process stop (cannot be caught or ignored) SIGTSTP stop process stop signal generated from keyboard SIGCONT discard signal continue after stop SIGCHLD discard signal child status has changed SIGTTIN stop process background read attempted from control terminal SIGTTOU stop process background write attempted to control terminal SIGIO discard signal I/O is possible on a descriptor (see fcntl(2)) SIGXCPU terminate process CPU time limit exceeded (see setrlimit(2)) SIGXFSZ terminate process file size limit exceeded (see setrlimit(2)) SIGVTALRM terminate process virtual time alarm (see setitimer(2)) SIGPROF terminate process profiling timer alarm (see setitimer(2)) SIGWINCH discard signal window size change SIGINFO discard signal status request from keyboard SIGUSR1 terminate process user-defined signal 1 SIGUSR2 terminate process user-defined signal 2 SIGTHR discard signal thread AST */ #[cfg(target_os = "openbsd")] pub static ALL_SIGNALS: [&str; 33] = [ "EXIT", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "INFO", "USR1", "USR2", "THR", ]; /* The following signals are defined in Solaris and illumos; (the signals for illumos are the same as Solaris, but illumos still has SIGLWP as well as the alias for SIGLWP (SIGAIOCANCEL)): SIGHUP 1 hangup SIGINT 2 interrupt (rubout) SIGQUIT 3 quit (ASCII FS) SIGILL 4 illegal instruction (not reset when caught) SIGTRAP 5 trace trap (not reset when caught) SIGIOT 6 IOT instruction SIGABRT 6 used by abort, replace SIGIOT in the future SIGEMT 7 EMT instruction SIGFPE 8 floating point exception SIGKILL 9 kill (cannot be caught or ignored) SIGBUS 10 bus error SIGSEGV 11 segmentation violation SIGSYS 12 bad argument to system call SIGPIPE 13 write on a pipe with no one to read it SIGALRM 14 alarm clock SIGTERM 15 software termination signal from kill SIGUSR1 16 user defined signal 1 SIGUSR2 17 user defined signal 2 SIGCLD 18 child status change SIGCHLD 18 child status change alias (POSIX) SIGPWR 19 power-fail restart SIGWINCH 20 window size change SIGURG 21 urgent socket condition SIGPOLL 22 pollable event occurred SIGIO SIGPOLL socket I/O possible (SIGPOLL alias) SIGSTOP 23 stop (cannot be caught or ignored) SIGTSTP 24 user stop requested from tty SIGCONT 25 stopped process has been continued SIGTTIN 26 background tty read attempted SIGTTOU 27 background tty write attempted SIGVTALRM 28 virtual timer expired SIGPROF 29 profiling timer expired SIGXCPU 30 exceeded cpu limit SIGXFSZ 31 exceeded file size limit SIGWAITING 32 reserved signal no longer used by threading code SIGAIOCANCEL 33 reserved signal no longer used by threading code (formerly SIGLWP) SIGFREEZE 34 special signal used by CPR SIGTHAW 35 special signal used by CPR SIGCANCEL 36 reserved signal for thread cancellation SIGLOST 37 resource lost (eg, record-lock lost) SIGXRES 38 resource control exceeded SIGJVM1 39 reserved signal for Java Virtual Machine SIGJVM2 40 reserved signal for Java Virtual Machine SIGINFO 41 information request SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) first realtime signal SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) last realtime signal */ #[cfg(target_os = "solaris")] const SIGNALS_SIZE: usize = 46; #[cfg(target_os = "illumos")] const SIGNALS_SIZE: usize = 47; #[cfg(any(target_os = "solaris", target_os = "illumos"))] static ALL_SIGNALS: [&str; SIGNALS_SIZE] = [ "HUP", "INT", "QUIT", "ILL", "TRAP", "IOT", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "USR1", "USR2", "CLD", "CHLD", "PWR", "WINCH", "URG", "POLL", "IO", "STOP", "TSTP", "CONT", "TTIN", "TTOU", "VTALRM", "PROF", "XCPU", "XFSZ", "WAITING", "AIOCANCEL", #[cfg(target_os = "illumos")] "LWP", "FREEZE", "THAW", "CANCEL", "LOST", "XRES", "JVM1", "JVM2", "INFO", "RTMIN", "RTMAX", ]; /* The following signals are defined in AIX: SIGHUP hangup, generated when terminal disconnects SIGINT interrupt, generated from terminal special char SIGQUIT quit, generated from terminal special char SIGILL illegal instruction (not reset when caught) SIGTRAP trace trap (not reset when caught) SIGABRT abort process SIGEMT EMT instruction SIGFPE floating point exception SIGKILL kill (cannot be caught or ignored) SIGBUS bus error (specification exception) SIGSEGV segmentation violation SIGSYS bad argument to system call SIGPIPE write on a pipe with no one to read it SIGALRM alarm clock timeout SIGTERM software termination signal SIGURG urgent condition on I/O channel SIGSTOP stop (cannot be caught or ignored) SIGTSTP interactive stop SIGCONT continue (cannot be caught or ignored) SIGCHLD sent to parent on child stop or exit SIGTTIN background read attempted from control terminal SIGTTOU background write attempted to control terminal SIGIO I/O possible, or completed SIGXCPU cpu time limit exceeded (see setrlimit()) SIGXFSZ file size limit exceeded (see setrlimit()) SIGMSG input data is in the ring buffer SIGWINCH window size changed SIGPWR power-fail restart SIGUSR1 user defined signal 1 SIGUSR2 user defined signal 2 SIGPROF profiling time alarm (see setitimer) SIGDANGER system crash imminent; free up some page space SIGVTALRM virtual time alarm (see setitimer) SIGMIGRATE migrate process SIGPRE programming exception SIGVIRT AIX virtual time alarm SIGTALRM per-thread alarm clock */ #[cfg(target_os = "aix")] pub static ALL_SIGNALS: [&str; 37] = [ "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "EMT", "FPE", "KILL", "BUS", "SEGV", "SYS", "PIPE", "ALRM", "TERM", "URG", "STOP", "TSTP", "CONT", "CHLD", "TTIN", "TTOU", "IO", "XCPU", "XFSZ", "MSG", "WINCH", "PWR", "USR1", "USR2", "PROF", "DANGER", "VTALRM", "MIGRATE", "PRE", "VIRT", "TALRM", ]; /// Returns the signal number for a given signal name or value. pub fn signal_by_name_or_value(signal_name_or_value: &str) -> Option { let signal_name_upcase = signal_name_or_value.to_uppercase(); if let Ok(value) = signal_name_upcase.parse() { if is_signal(value) { return Some(value); } else { return None; } } let signal_name = signal_name_upcase.trim_start_matches("SIG"); ALL_SIGNALS.iter().position(|&s| s == signal_name) } /// Returns true if the given number is a valid signal number. pub fn is_signal(num: usize) -> bool { num < ALL_SIGNALS.len() } /// Returns the signal name for a given signal value. pub fn signal_name_by_value(signal_value: usize) -> Option<&'static str> { ALL_SIGNALS.get(signal_value).copied() } /// Returns the default signal value. #[cfg(unix)] pub fn enable_pipe_errors() -> Result<(), Errno> { // We pass the error as is, the return value would just be Ok(SigDfl), so we can safely ignore it. // SAFETY: this function is safe as long as we do not use a custom SigHandler -- we use the default one. unsafe { signal(SIGPIPE, SigDfl) }.map(|_| ()) } /// Ignores the SIGINT signal. #[cfg(unix)] pub fn ignore_interrupts() -> Result<(), Errno> { // We pass the error as is, the return value would just be Ok(SigIgn), so we can safely ignore it. // SAFETY: this function is safe as long as we do not use a custom SigHandler -- we use the default one. unsafe { signal(SIGINT, SigIgn) }.map(|_| ()) } #[test] fn signal_by_value() { assert_eq!(signal_by_name_or_value("0"), Some(0)); for (value, _signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_by_name_or_value(&value.to_string()), Some(value)); } } #[test] fn signal_by_short_name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_by_name_or_value(signal), Some(value)); } } #[test] fn signal_by_long_name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!( signal_by_name_or_value(&format!("SIG{signal}")), Some(value) ); } } #[test] fn name() { for (value, signal) in ALL_SIGNALS.iter().enumerate() { assert_eq!(signal_name_by_value(value), Some(*signal)); } } uucore-0.0.30/src/lib/features/sum.rs000064400000000000000000000346531046102023000155460ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore memmem algo //! Implementations of digest functions, like md5 and sha1. //! //! The [`Digest`] trait represents the interface for providing inputs //! to these digest functions and accessing the resulting hash. The //! [`DigestWriter`] struct provides a wrapper around [`Digest`] that //! implements the [`Write`] trait, for use in situations where calling //! [`write`] would be useful. use std::io::Write; use hex::encode; #[cfg(windows)] use memchr::memmem; pub trait Digest { fn new() -> Self where Self: Sized; fn hash_update(&mut self, input: &[u8]); fn hash_finalize(&mut self, out: &mut [u8]); fn reset(&mut self); fn output_bits(&self) -> usize; fn output_bytes(&self) -> usize { self.output_bits().div_ceil(8) } fn result_str(&mut self) -> String { let mut buf: Vec = vec![0; self.output_bytes()]; self.hash_finalize(&mut buf); encode(buf) } } /// first element of the tuple is the blake2b state /// second is the number of output bits pub struct Blake2b(blake2b_simd::State, usize); impl Blake2b { /// Return a new Blake2b instance with a custom output bytes length pub fn with_output_bytes(output_bytes: usize) -> Self { let mut params = blake2b_simd::Params::new(); params.hash_length(output_bytes); let state = params.to_state(); Self(state, output_bytes * 8) } } impl Digest for Blake2b { fn new() -> Self { // by default, Blake2b output is 512 bits long (= 64B) Self::with_output_bytes(64) } fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { let hash_result = &self.0.finalize(); out.copy_from_slice(hash_result.as_bytes()); } fn reset(&mut self) { *self = Self::with_output_bytes(self.output_bytes()); } fn output_bits(&self) -> usize { self.1 } } pub struct Blake3(blake3::Hasher); impl Digest for Blake3 { fn new() -> Self { Self(blake3::Hasher::new()) } fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { let hash_result = &self.0.finalize(); out.copy_from_slice(hash_result.as_bytes()); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } pub struct Sm3(sm3::Sm3); impl Digest for Sm3 { fn new() -> Self { Self(::new()) } fn hash_update(&mut self, input: &[u8]) { ::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { out.copy_from_slice(&::finalize(self.0.clone())); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } // NOTE: CRC_TABLE_LEN *must* be <= 256 as we cast 0..CRC_TABLE_LEN to u8 const CRC_TABLE_LEN: usize = 256; pub struct CRC { state: u32, size: usize, crc_table: [u32; CRC_TABLE_LEN], } impl CRC { fn generate_crc_table() -> [u32; CRC_TABLE_LEN] { let mut table = [0; CRC_TABLE_LEN]; for (i, elt) in table.iter_mut().enumerate().take(CRC_TABLE_LEN) { *elt = Self::crc_entry(i as u8); } table } fn crc_entry(input: u8) -> u32 { let mut crc = (input as u32) << 24; let mut i = 0; while i < 8 { let if_condition = crc & 0x8000_0000; let if_body = (crc << 1) ^ 0x04c1_1db7; let else_body = crc << 1; // NOTE: i feel like this is easier to understand than emulating an if statement in bitwise // ops let condition_table = [else_body, if_body]; crc = condition_table[(if_condition != 0) as usize]; i += 1; } crc } fn update(&mut self, input: u8) { self.state = (self.state << 8) ^ self.crc_table[((self.state >> 24) as usize ^ input as usize) & 0xFF]; } } impl Digest for CRC { fn new() -> Self { Self { state: 0, size: 0, crc_table: Self::generate_crc_table(), } } fn hash_update(&mut self, input: &[u8]) { for &elt in input { self.update(elt); } self.size += input.len(); } fn hash_finalize(&mut self, out: &mut [u8]) { let mut sz = self.size; while sz != 0 { self.update(sz as u8); sz >>= 8; } self.state = !self.state; out.copy_from_slice(&self.state.to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 4]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 256 } } pub struct CRC32B(crc32fast::Hasher); impl Digest for CRC32B { fn new() -> Self { Self(crc32fast::Hasher::new()) } fn hash_update(&mut self, input: &[u8]) { self.0.update(input); } fn hash_finalize(&mut self, out: &mut [u8]) { let result = self.0.clone().finalize(); let slice = result.to_be_bytes(); out.copy_from_slice(&slice); } fn reset(&mut self) { self.0.reset(); } fn output_bits(&self) -> usize { 32 } fn result_str(&mut self) -> String { let mut out = [0; 4]; self.hash_finalize(&mut out); format!("{}", u32::from_be_bytes(out)) } } pub struct BSD { state: u16, } impl Digest for BSD { fn new() -> Self { Self { state: 0 } } fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = (self.state >> 1) + ((self.state & 1) << 15); self.state = self.state.wrapping_add(u16::from(byte)); } } fn hash_finalize(&mut self, out: &mut [u8]) { out.copy_from_slice(&self.state.to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 2]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 128 } } pub struct SYSV { state: u32, } impl Digest for SYSV { fn new() -> Self { Self { state: 0 } } fn hash_update(&mut self, input: &[u8]) { for &byte in input { self.state = self.state.wrapping_add(u32::from(byte)); } } fn hash_finalize(&mut self, out: &mut [u8]) { self.state = (self.state & 0xffff) + (self.state >> 16); self.state = (self.state & 0xffff) + (self.state >> 16); out.copy_from_slice(&(self.state as u16).to_ne_bytes()); } fn result_str(&mut self) -> String { let mut _out: Vec = vec![0; 2]; self.hash_finalize(&mut _out); format!("{}", self.state) } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 512 } } // Implements the Digest trait for sha2 / sha3 algorithms with fixed output macro_rules! impl_digest_common { ($algo_type: ty, $size: expr) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) } fn hash_update(&mut self, input: &[u8]) { digest::Digest::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { digest::Digest::finalize_into_reset(&mut self.0, out.into()); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { $size } } }; } // Implements the Digest trait for sha2 / sha3 algorithms with variable output macro_rules! impl_digest_shake { ($algo_type: ty) => { impl Digest for $algo_type { fn new() -> Self { Self(Default::default()) } fn hash_update(&mut self, input: &[u8]) { digest::Update::update(&mut self.0, input); } fn hash_finalize(&mut self, out: &mut [u8]) { digest::ExtendableOutputReset::finalize_xof_reset_into(&mut self.0, out); } fn reset(&mut self) { *self = Self::new(); } fn output_bits(&self) -> usize { 0 } } }; } pub struct Md5(md5::Md5); pub struct Sha1(sha1::Sha1); pub struct Sha224(sha2::Sha224); pub struct Sha256(sha2::Sha256); pub struct Sha384(sha2::Sha384); pub struct Sha512(sha2::Sha512); impl_digest_common!(Md5, 128); impl_digest_common!(Sha1, 160); impl_digest_common!(Sha224, 224); impl_digest_common!(Sha256, 256); impl_digest_common!(Sha384, 384); impl_digest_common!(Sha512, 512); pub struct Sha3_224(sha3::Sha3_224); pub struct Sha3_256(sha3::Sha3_256); pub struct Sha3_384(sha3::Sha3_384); pub struct Sha3_512(sha3::Sha3_512); impl_digest_common!(Sha3_224, 224); impl_digest_common!(Sha3_256, 256); impl_digest_common!(Sha3_384, 384); impl_digest_common!(Sha3_512, 512); pub struct Shake128(sha3::Shake128); pub struct Shake256(sha3::Shake256); impl_digest_shake!(Shake128); impl_digest_shake!(Shake256); /// A struct that writes to a digest. /// /// This struct wraps a [`Digest`] and provides a [`Write`] /// implementation that passes input bytes directly to the /// [`Digest::hash_update`]. /// /// On Windows, if `binary` is `false`, then the [`write`] /// implementation replaces instances of "\r\n" with "\n" before passing /// the input bytes to the [`digest`]. pub struct DigestWriter<'a> { digest: &'a mut Box, /// Whether to write to the digest in binary mode or text mode on Windows. /// /// If this is `false`, then instances of "\r\n" are replaced with /// "\n" before passing input bytes to the [`digest`]. #[allow(dead_code)] binary: bool, /// Whether the previous #[allow(dead_code)] was_last_character_carriage_return: bool, // TODO These are dead code only on non-Windows operating systems. // It might be better to use a `#[cfg(windows)]` guard here. } impl<'a> DigestWriter<'a> { pub fn new(digest: &'a mut Box, binary: bool) -> Self { let was_last_character_carriage_return = false; DigestWriter { digest, binary, was_last_character_carriage_return, } } pub fn finalize(&mut self) -> bool { if self.was_last_character_carriage_return { self.digest.hash_update(b"\r"); true } else { false } } } impl Write for DigestWriter<'_> { #[cfg(not(windows))] fn write(&mut self, buf: &[u8]) -> std::io::Result { self.digest.hash_update(buf); Ok(buf.len()) } #[cfg(windows)] fn write(&mut self, buf: &[u8]) -> std::io::Result { if self.binary { self.digest.hash_update(buf); return Ok(buf.len()); } // The remaining code handles Windows text mode, where we must // replace each occurrence of "\r\n" with "\n". // // First, if the last character written was "\r" and the first // character in the current buffer to write is not "\n", then we // need to write the "\r" that we buffered from the previous // call to `write()`. let n = buf.len(); if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' { self.digest.hash_update(b"\r"); } // Next, find all occurrences of "\r\n", inputting the slice // just before the "\n" in the previous instance of "\r\n" and // the beginning of this "\r\n". let mut i_prev = 0; for i in memmem::find_iter(buf, b"\r\n") { self.digest.hash_update(&buf[i_prev..i]); i_prev = i + 1; } // Finally, check whether the last character is "\r". If so, // buffer it until we know that the next character is not "\n", // which can only be known on the next call to `write()`. // // This all assumes that `write()` will be called on adjacent // blocks of the input. if n > 0 && buf[n - 1] == b'\r' { self.was_last_character_carriage_return = true; self.digest.hash_update(&buf[i_prev..n - 1]); } else { self.was_last_character_carriage_return = false; self.digest.hash_update(&buf[i_prev..n]); } // Even though we dropped a "\r" for each "\r\n" we found, we // still report the number of bytes written as `n`. This is // because the meaning of the returned number is supposed to be // the number of bytes consumed by the writer, so that if the // calling code were calling `write()` in a loop, it would know // where the next contiguous slice of the buffer starts. Ok(n) } fn flush(&mut self) -> std::io::Result<()> { Ok(()) } } #[cfg(test)] mod tests { /// Test for replacing a "\r\n" sequence with "\n" when the "\r" is /// at the end of one block and the "\n" is at the beginning of the /// next block, when reading in blocks. #[cfg(windows)] #[test] fn test_crlf_across_blocks() { use std::io::Write; use super::Digest; use super::DigestWriter; use super::Md5; // Writing "\r" in one call to `write()`, and then "\n" in another. let mut digest = Box::new(Md5::new()) as Box; let mut writer_crlf = DigestWriter::new(&mut digest, false); writer_crlf.write_all(b"\r").unwrap(); writer_crlf.write_all(b"\n").unwrap(); writer_crlf.finalize(); let result_crlf = digest.result_str(); // We expect "\r\n" to be replaced with "\n" in text mode on Windows. let mut digest = Box::new(Md5::new()) as Box; let mut writer_lf = DigestWriter::new(&mut digest, false); writer_lf.write_all(b"\n").unwrap(); writer_lf.finalize(); let result_lf = digest.result_str(); assert_eq!(result_crlf, result_lf); } } uucore-0.0.30/src/lib/features/tty.rs000064400000000000000000000070021046102023000155460ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Set of functions to parsing TTY use std::{ fmt::{self, Display, Formatter}, path::PathBuf, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Teletype { Tty(u64), TtyS(u64), Pts(u64), Unknown, } impl Display for Teletype { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Tty(id) => write!(f, "/dev/pts/{}", id), Self::TtyS(id) => write!(f, "/dev/tty{}", id), Self::Pts(id) => write!(f, "/dev/ttyS{}", id), Self::Unknown => write!(f, "?"), } } } impl TryFrom for Teletype { type Error = (); fn try_from(value: String) -> Result { if value == "?" { return Ok(Self::Unknown); } Self::try_from(value.as_str()) } } impl TryFrom<&str> for Teletype { type Error = (); fn try_from(value: &str) -> Result { Self::try_from(PathBuf::from(value)) } } impl TryFrom for Teletype { type Error = (); fn try_from(value: PathBuf) -> Result { // Three case: /dev/pts/* , /dev/ttyS**, /dev/tty** let mut iter = value.iter(); // Case 1 // Considering this format: **/**/pts/ if let (Some(_), Some(num)) = (iter.find(|it| *it == "pts"), iter.next()) { return num .to_str() .ok_or(())? .parse::() .map_err(|_| ()) .map(Teletype::Pts); }; // Considering this format: **/**/ttyS** then **/**/tty** let path = value.to_str().ok_or(())?; let f = |prefix: &str| { value .iter() .last()? .to_str()? .strip_prefix(prefix)? .parse::() .ok() }; if path.contains("ttyS") { // Case 2 f("ttyS").ok_or(()).map(Teletype::TtyS) } else if path.contains("tty") { // Case 3 f("tty").ok_or(()).map(Teletype::Tty) } else { Err(()) } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_tty_from() { assert_eq!(Teletype::try_from("?").unwrap(), Teletype::Unknown); assert_eq!(Teletype::try_from("/dev/tty1").unwrap(), Teletype::Tty(1)); assert_eq!(Teletype::try_from("/dev/tty10").unwrap(), Teletype::Tty(10)); assert_eq!(Teletype::try_from("/dev/pts/1").unwrap(), Teletype::Pts(1)); assert_eq!( Teletype::try_from("/dev/pts/10").unwrap(), Teletype::Pts(10) ); assert_eq!(Teletype::try_from("/dev/ttyS1").unwrap(), Teletype::TtyS(1)); assert_eq!( Teletype::try_from("/dev/ttyS10").unwrap(), Teletype::TtyS(10) ); assert_eq!(Teletype::try_from("ttyS10").unwrap(), Teletype::TtyS(10)); assert!(Teletype::try_from("value").is_err()); assert!(Teletype::try_from("TtyS10").is_err()); } #[test] fn test_terminal_type_display() { assert_eq!(Teletype::Pts(10).to_string(), "/dev/pts/10"); assert_eq!(Teletype::Tty(10).to_string(), "/dev/tty10"); assert_eq!(Teletype::TtyS(10).to_string(), "/dev/ttyS10"); assert_eq!(Teletype::Unknown.to_string(), "?"); } } uucore-0.0.30/src/lib/features/update_control.rs000064400000000000000000000105131046102023000177510ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Implement GNU-style update functionality. //! //! - pre-defined [`clap`-Arguments][1] for inclusion in utilities that //! implement updates //! - determination of the [update mode][2] //! //! Update-functionality is implemented by the following utilities: //! //! - `cp` //! - `mv` //! //! //! [1]: arguments //! [2]: `determine_update_mode()` //! //! //! # Usage example //! //! ``` //! #[macro_use] //! extern crate uucore; //! //! use clap::{Command, Arg, ArgMatches}; //! use uucore::update_control::{self, UpdateMode}; //! //! fn main() { //! let matches = Command::new("command") //! .arg(update_control::arguments::update()) //! .arg(update_control::arguments::update_no_args()) //! .get_matches_from(vec![ //! "command", "--update=older" //! ]); //! //! let update_mode = update_control::determine_update_mode(&matches); //! //! // handle cases //! if update_mode == UpdateMode::ReplaceIfOlder { //! // do //! } else { //! unreachable!() //! } //! } //! ``` use clap::ArgMatches; /// Available update mode #[derive(Clone, Debug, Eq, PartialEq)] pub enum UpdateMode { /// --update=`all`, `` ReplaceAll, /// --update=`none` ReplaceNone, /// --update=`older` /// -u ReplaceIfOlder, ReplaceNoneFail, } pub mod arguments { //! Pre-defined arguments for update functionality. use crate::shortcut_value_parser::ShortcutValueParser; use clap::ArgAction; /// `--update` argument pub static OPT_UPDATE: &str = "update"; /// `-u` argument pub static OPT_UPDATE_NO_ARG: &str = "u"; /// `--update` argument, defaults to `older` if no values are provided pub fn update() -> clap::Arg { clap::Arg::new(OPT_UPDATE) .long("update") .help("move only when the SOURCE file is newer than the destination file or when the destination file is missing") .value_parser(ShortcutValueParser::new(["none", "all", "older","none-fail"])) .num_args(0..=1) .default_missing_value("older") .require_equals(true) .overrides_with("update") .action(clap::ArgAction::Set) } /// `-u` argument pub fn update_no_args() -> clap::Arg { clap::Arg::new(OPT_UPDATE_NO_ARG) .short('u') .help("like --update but does not accept an argument") .action(ArgAction::SetTrue) } } /// Determine the "mode" for the update operation to perform, if any. /// /// Parses the backup options and converts them to an instance of /// `UpdateMode` for further processing. /// /// Takes [`clap::ArgMatches`] as argument which **must** contain the options /// from [`arguments::update()`] or [`arguments::update_no_args()`]. Otherwise /// the `ReplaceAll` mode is returned unconditionally. /// /// # Examples /// /// Here's how one would integrate the update mode determination into an /// application. /// /// ``` /// #[macro_use] /// extern crate uucore; /// use uucore::update_control::{self, UpdateMode}; /// use clap::{Command, Arg, ArgMatches}; /// /// fn main() { /// let matches = Command::new("command") /// .arg(update_control::arguments::update()) /// .arg(update_control::arguments::update_no_args()) /// .get_matches_from(vec![ /// "command", "--update=all" /// ]); /// /// let update_mode = update_control::determine_update_mode(&matches); /// assert_eq!(update_mode, UpdateMode::ReplaceAll) /// } pub fn determine_update_mode(matches: &ArgMatches) -> UpdateMode { if let Some(mode) = matches.get_one::(arguments::OPT_UPDATE) { match mode.as_str() { "all" => UpdateMode::ReplaceAll, "none" => UpdateMode::ReplaceNone, "older" => UpdateMode::ReplaceIfOlder, "none-fail" => UpdateMode::ReplaceNoneFail, _ => unreachable!("other args restricted by clap"), } } else if matches.get_flag(arguments::OPT_UPDATE_NO_ARG) { // short form of this option is equivalent to using --update=older UpdateMode::ReplaceIfOlder } else { // no option was present UpdateMode::ReplaceAll } } uucore-0.0.30/src/lib/features/uptime.rs000064400000000000000000000242411046102023000162350ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore gettime BOOTTIME clockid boottime formated nusers loadavg getloadavg //! Provides functions to get system uptime, number of users and load average. // The code was originally written in uu_uptime // (https://github.com/uutils/coreutils/blob/main/src/uu/uptime/src/uptime.rs) // but was eventually moved here. // See https://github.com/uutils/coreutils/pull/7289 for discussion. use crate::error::{UError, UResult}; use chrono::Local; use libc::time_t; use thiserror::Error; #[derive(Debug, Error)] pub enum UptimeError { #[error("could not retrieve system uptime")] SystemUptime, #[error("could not retrieve system load average")] SystemLoadavg, #[error("Windows does not have an equivalent to the load average on Unix-like systems")] WindowsLoadavg, #[error("boot time larger than current time")] BootTime, } impl UError for UptimeError { fn code(&self) -> i32 { 1 } } /// Returns the formatted time string, e.g. "12:34:56" pub fn get_formatted_time() -> String { Local::now().time().format("%H:%M:%S").to_string() } /// Get the system uptime /// /// # Arguments /// /// boot_time: Option - Manually specify the boot time, or None to try to get it from the system. /// /// # Returns /// /// Returns a UResult with the uptime in seconds if successful, otherwise an UptimeError. #[cfg(target_os = "openbsd")] pub fn get_uptime(_boot_time: Option) -> UResult { use libc::clock_gettime; use libc::CLOCK_BOOTTIME; use libc::c_int; use libc::timespec; let mut tp: timespec = timespec { tv_sec: 0, tv_nsec: 0, }; let raw_tp = &mut tp as *mut timespec; // OpenBSD prototype: clock_gettime(clk_id: ::clockid_t, tp: *mut ::timespec) -> ::c_int; let ret: c_int = unsafe { clock_gettime(CLOCK_BOOTTIME, raw_tp) }; if ret == 0 { #[cfg(target_pointer_width = "64")] let uptime: i64 = tp.tv_sec; #[cfg(not(target_pointer_width = "64"))] let uptime: i64 = tp.tv_sec.into(); Ok(uptime) } else { Err(UptimeError::SystemUptime) } } /// Get the system uptime /// /// # Arguments /// /// boot_time: Option - Manually specify the boot time, or None to try to get it from the system. /// /// # Returns /// /// Returns a UResult with the uptime in seconds if successful, otherwise an UptimeError. #[cfg(unix)] #[cfg(not(target_os = "openbsd"))] pub fn get_uptime(boot_time: Option) -> UResult { use crate::utmpx::Utmpx; use libc::BOOT_TIME; use std::fs::File; use std::io::Read; let mut proc_uptime_s = String::new(); let proc_uptime = File::open("/proc/uptime") .ok() .and_then(|mut f| f.read_to_string(&mut proc_uptime_s).ok()) .and_then(|_| proc_uptime_s.split_whitespace().next()) .and_then(|s| s.split('.').next().unwrap_or("0").parse::().ok()); if let Some(uptime) = proc_uptime { return Ok(uptime); } let boot_time = boot_time.or_else(|| { let records = Utmpx::iter_all_records(); for line in records { match line.record_type() { BOOT_TIME => { let dt = line.login_time(); if dt.unix_timestamp() > 0 { return Some(dt.unix_timestamp() as time_t); } } _ => continue, } } None }); if let Some(t) = boot_time { let now = Local::now().timestamp(); #[cfg(target_pointer_width = "64")] let boottime: i64 = t; #[cfg(not(target_pointer_width = "64"))] let boottime: i64 = t.into(); if now < boottime { Err(UptimeError::BootTime)?; } return Ok(now - boottime); } Err(UptimeError::SystemUptime)? } /// Get the system uptime /// /// # Returns /// /// Returns a UResult with the uptime in seconds if successful, otherwise an UptimeError. #[cfg(windows)] pub fn get_uptime(_boot_time: Option) -> UResult { use windows_sys::Win32::System::SystemInformation::GetTickCount; let uptime = unsafe { GetTickCount() }; if uptime < 0 { Err(UptimeError::SystemUptime)?; } Ok(uptime as i64) } /// Get the system uptime in a human-readable format /// /// # Arguments /// /// boot_time: Option - Manually specify the boot time, or None to try to get it from the system. /// /// # Returns /// /// Returns a UResult with the uptime in a human-readable format(e.g. "1 day, 3:45") if successful, otherwise an UptimeError. #[inline] pub fn get_formated_uptime(boot_time: Option) -> UResult { let up_secs = get_uptime(boot_time)?; if up_secs < 0 { Err(UptimeError::SystemUptime)?; } let up_days = up_secs / 86400; let up_hours = (up_secs - (up_days * 86400)) / 3600; let up_mins = (up_secs - (up_days * 86400) - (up_hours * 3600)) / 60; match up_days.cmp(&1) { std::cmp::Ordering::Equal => Ok(format!("{up_days:1} day, {up_hours:2}:{up_mins:02}")), std::cmp::Ordering::Greater => Ok(format!("{up_days:1} days {up_hours:2}:{up_mins:02}")), _ => Ok(format!("{up_hours:2}:{up_mins:02}")), } } /// Get the number of users currently logged in /// /// # Returns /// /// Returns the number of users currently logged in if successful, otherwise 0. #[cfg(unix)] #[cfg(not(target_os = "openbsd"))] // see: https://gitlab.com/procps-ng/procps/-/blob/4740a0efa79cade867cfc7b32955fe0f75bf5173/library/uptime.c#L63-L115 pub fn get_nusers() -> usize { use crate::utmpx::Utmpx; use libc::USER_PROCESS; let mut num_user = 0; Utmpx::iter_all_records().for_each(|ut| { if ut.record_type() == USER_PROCESS { num_user += 1; } }); num_user } /// Get the number of users currently logged in /// /// # Returns /// /// Returns the number of users currently logged in if successful, otherwise 0 #[cfg(target_os = "openbsd")] pub fn get_nusers(file: &str) -> usize { use utmp_classic::{parse_from_path, UtmpEntry}; let mut nusers = 0; let entries = match parse_from_path(file) { Some(e) => e, None => return 0, }; for entry in entries { if let UtmpEntry::UTMP { line: _, user, host: _, time: _, } = entry { if !user.is_empty() { nusers += 1; } } } nusers } /// Get the number of users currently logged in /// /// # Returns /// /// Returns the number of users currently logged in if successful, otherwise 0 #[cfg(target_os = "windows")] pub fn get_nusers() -> usize { use std::ptr; use windows_sys::Win32::System::RemoteDesktop::*; let mut num_user = 0; unsafe { let mut session_info_ptr = ptr::null_mut(); let mut session_count = 0; let result = WTSEnumerateSessionsW( WTS_CURRENT_SERVER_HANDLE, 0, 1, &mut session_info_ptr, &mut session_count, ); if result == 0 { return 0; } let sessions = std::slice::from_raw_parts(session_info_ptr, session_count as usize); for session in sessions { let mut buffer: *mut u16 = ptr::null_mut(); let mut bytes_returned = 0; let result = WTSQuerySessionInformationW( WTS_CURRENT_SERVER_HANDLE, session.SessionId, 5, &mut buffer, &mut bytes_returned, ); if result == 0 || buffer.is_null() { continue; } let username = if !buffer.is_null() { let cstr = std::ffi::CStr::from_ptr(buffer as *const i8); cstr.to_string_lossy().to_string() } else { String::new() }; if !username.is_empty() { num_user += 1; } WTSFreeMemory(buffer as _); } WTSFreeMemory(session_info_ptr as _); } num_user } /// Format the number of users to a human-readable string /// /// # Returns /// /// e.g. "0 user", "1 user", "2 users" #[inline] pub fn format_nusers(nusers: usize) -> String { match nusers { 0 => "0 user".to_string(), 1 => "1 user".to_string(), _ => format!("{} users", nusers), } } /// Get the number of users currently logged in in a human-readable format /// /// # Returns /// /// e.g. "0 user", "1 user", "2 users" #[inline] pub fn get_formatted_nusers() -> String { #[cfg(not(target_os = "openbsd"))] return format_nusers(get_nusers()); #[cfg(target_os = "openbsd")] format_nusers(get_nusers("/var/run/utmp")) } /// Get the system load average /// /// # Returns /// /// Returns a UResult with the load average if successful, otherwise an UptimeError. /// The load average is a tuple of three floating point numbers representing the 1-minute, 5-minute, and 15-minute load averages. #[cfg(unix)] pub fn get_loadavg() -> UResult<(f64, f64, f64)> { use crate::libc::c_double; use libc::getloadavg; let mut avg: [c_double; 3] = [0.0; 3]; let loads: i32 = unsafe { getloadavg(avg.as_mut_ptr(), 3) }; if loads == -1 { Err(UptimeError::SystemLoadavg)? } else { Ok((avg[0], avg[1], avg[2])) } } /// Get the system load average /// Windows does not have an equivalent to the load average on Unix-like systems. /// /// # Returns /// /// Returns a UResult with an UptimeError. #[cfg(windows)] pub fn get_loadavg() -> UResult<(f64, f64, f64)> { Err(UptimeError::WindowsLoadavg)? } /// Get the system load average in a human-readable format /// /// # Returns /// /// Returns a UResult with the load average in a human-readable format if successful, otherwise an UptimeError. /// e.g. "load average: 0.00, 0.00, 0.00" #[inline] pub fn get_formatted_loadavg() -> UResult { let loadavg = get_loadavg()?; Ok(format!( "load average: {:.2}, {:.2}, {:.2}", loadavg.0, loadavg.1, loadavg.2 )) } uucore-0.0.30/src/lib/features/utmpx.rs000064400000000000000000000250071046102023000161100ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // //! Aims to provide platform-independent methods to obtain login records //! //! **ONLY** support linux, macos and freebsd for the time being //! //! # Examples: //! //! ``` //! use uucore::utmpx::Utmpx; //! for ut in Utmpx::iter_all_records() { //! if ut.is_user_process() { //! println!("{}: {}", ut.host(), ut.user()) //! } //! } //! ``` //! //! Specifying the path to login record: //! //! ``` //! use uucore::utmpx::Utmpx; //! for ut in Utmpx::iter_all_records_from("/some/where/else") { //! if ut.is_user_process() { //! println!("{}: {}", ut.host(), ut.user()) //! } //! } //! ``` pub extern crate time; use std::ffi::CString; use std::io::Result as IOResult; use std::marker::PhantomData; use std::os::unix::ffi::OsStrExt; use std::path::Path; use std::ptr; use std::sync::{Mutex, MutexGuard}; pub use self::ut::*; pub use libc::endutxent; pub use libc::getutxent; pub use libc::setutxent; use libc::utmpx; #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "netbsd"))] pub use libc::utmpxname; /// # Safety /// Just fixed the clippy warning. Please add description here. #[cfg(target_os = "freebsd")] pub unsafe extern "C" fn utmpxname(_file: *const libc::c_char) -> libc::c_int { 0 } use crate::*; // import macros from `../../macros.rs` // In case the c_char array doesn't end with NULL macro_rules! chars2string { ($arr:expr) => { $arr.iter() .take_while(|i| **i > 0) .map(|&i| i as u8 as char) .collect::() }; } #[cfg(target_os = "linux")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmp"; pub use libc::__UT_HOSTSIZE as UT_HOSTSIZE; pub use libc::__UT_LINESIZE as UT_LINESIZE; pub use libc::__UT_NAMESIZE as UT_NAMESIZE; pub const UT_IDSIZE: usize = 4; pub use libc::ACCOUNTING; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::USER_PROCESS; } #[cfg(target_vendor = "apple")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmpx"; pub use libc::_UTX_HOSTSIZE as UT_HOSTSIZE; pub use libc::_UTX_IDSIZE as UT_IDSIZE; pub use libc::_UTX_LINESIZE as UT_LINESIZE; pub use libc::_UTX_USERSIZE as UT_NAMESIZE; pub use libc::ACCOUNTING; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::SHUTDOWN_TIME; pub use libc::SIGNATURE; pub use libc::USER_PROCESS; } #[cfg(target_os = "freebsd")] mod ut { pub static DEFAULT_FILE: &str = ""; pub const UT_LINESIZE: usize = 16; pub const UT_NAMESIZE: usize = 32; pub const UT_IDSIZE: usize = 8; pub const UT_HOSTSIZE: usize = 128; pub use libc::BOOT_TIME; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::SHUTDOWN_TIME; pub use libc::USER_PROCESS; } #[cfg(target_os = "netbsd")] mod ut { pub static DEFAULT_FILE: &str = "/var/run/utmpx"; pub const ACCOUNTING: usize = 9; pub const SHUTDOWN_TIME: usize = 11; pub use libc::_UTX_HOSTSIZE as UT_HOSTSIZE; pub use libc::_UTX_IDSIZE as UT_IDSIZE; pub use libc::_UTX_LINESIZE as UT_LINESIZE; pub use libc::_UTX_USERSIZE as UT_NAMESIZE; pub use libc::ACCOUNTING; pub use libc::DEAD_PROCESS; pub use libc::EMPTY; pub use libc::INIT_PROCESS; pub use libc::LOGIN_PROCESS; pub use libc::NEW_TIME; pub use libc::OLD_TIME; pub use libc::RUN_LVL; pub use libc::SIGNATURE; pub use libc::USER_PROCESS; } /// A login record pub struct Utmpx { inner: utmpx, } impl Utmpx { /// A.K.A. ut.ut_type pub fn record_type(&self) -> i16 { self.inner.ut_type } /// A.K.A. ut.ut_pid pub fn pid(&self) -> i32 { self.inner.ut_pid } /// A.K.A. ut.ut_id pub fn terminal_suffix(&self) -> String { chars2string!(self.inner.ut_id) } /// A.K.A. ut.ut_user pub fn user(&self) -> String { chars2string!(self.inner.ut_user) } /// A.K.A. ut.ut_host pub fn host(&self) -> String { chars2string!(self.inner.ut_host) } /// A.K.A. ut.ut_line pub fn tty_device(&self) -> String { chars2string!(self.inner.ut_line) } /// A.K.A. ut.ut_tv pub fn login_time(&self) -> time::OffsetDateTime { #[allow(clippy::unnecessary_cast)] let ts_nanos: i128 = (1_000_000_000_i64 * self.inner.ut_tv.tv_sec as i64 + 1_000_i64 * self.inner.ut_tv.tv_usec as i64) .into(); let local_offset = time::OffsetDateTime::now_local().map_or_else(|_| time::UtcOffset::UTC, |v| v.offset()); time::OffsetDateTime::from_unix_timestamp_nanos(ts_nanos) .unwrap() .to_offset(local_offset) } /// A.K.A. ut.ut_exit /// /// Return (e_termination, e_exit) #[cfg(target_os = "linux")] pub fn exit_status(&self) -> (i16, i16) { (self.inner.ut_exit.e_termination, self.inner.ut_exit.e_exit) } /// A.K.A. ut.ut_exit /// /// Return (0, 0) on Non-Linux platform #[cfg(not(target_os = "linux"))] pub fn exit_status(&self) -> (i16, i16) { (0, 0) } /// Consumes the `Utmpx`, returning the underlying C struct utmpx pub fn into_inner(self) -> utmpx { self.inner } /// check if the record is a user process pub fn is_user_process(&self) -> bool { !self.user().is_empty() && self.record_type() == USER_PROCESS } /// Canonicalize host name using DNS pub fn canon_host(&self) -> IOResult { let host = self.host(); let (hostname, display) = host.split_once(':').unwrap_or((&host, "")); if !hostname.is_empty() { use dns_lookup::{getaddrinfo, AddrInfoHints}; const AI_CANONNAME: i32 = 0x2; let hints = AddrInfoHints { flags: AI_CANONNAME, ..AddrInfoHints::default() }; if let Ok(sockets) = getaddrinfo(Some(hostname), None, Some(hints)) { let sockets = sockets.collect::>>()?; for socket in sockets { if let Some(ai_canonname) = socket.canonname { return Ok(if display.is_empty() { ai_canonname } else { format!("{ai_canonname}:{display}") }); } } } else { // GNU coreutils has this behavior return Ok(hostname.to_string()); } } Ok(host.to_string()) } /// Iterate through all the utmp records. /// /// This will use the default location, or the path [`Utmpx::iter_all_records_from`] /// was most recently called with. /// /// Only one instance of [`UtmpxIter`] may be active at a time. This /// function will block as long as one is still active. Beware! pub fn iter_all_records() -> UtmpxIter { let iter = UtmpxIter::new(); unsafe { // This can technically fail, and it would be nice to detect that, // but it doesn't return anything so we'd have to do nasty things // with errno. setutxent(); } iter } /// Iterate through all the utmp records from a specific file. /// /// No failure is reported or detected. /// /// This function affects subsequent calls to [`Utmpx::iter_all_records`]. /// /// The same caveats as for [`Utmpx::iter_all_records`] apply. pub fn iter_all_records_from>(path: P) -> UtmpxIter { let iter = UtmpxIter::new(); let path = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); unsafe { // In glibc, utmpxname() only fails if there's not enough memory // to copy the string. // Solaris returns 1 on success instead of 0. Supposedly there also // exist systems where it returns void. // GNU who on Debian seems to output nothing if an invalid filename // is specified, no warning or anything. // So this function is pretty crazy and we don't try to detect errors. // Not much we can do besides pray. utmpxname(path.as_ptr()); setutxent(); } iter } } // On some systems these functions are not thread-safe. On others they're // thread-local. Therefore we use a mutex to allow only one guard to exist at // a time, and make sure UtmpxIter cannot be sent across threads. // // I believe the only technical memory unsafety that could happen is a data // race while copying the data out of the pointer returned by getutxent(), but // ordinary race conditions are also very much possible. static LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); /// Iterator of login records pub struct UtmpxIter { #[allow(dead_code)] guard: MutexGuard<'static, ()>, /// Ensure UtmpxIter is !Send. Technically redundant because MutexGuard /// is also !Send. phantom: PhantomData>, } impl UtmpxIter { fn new() -> Self { // PoisonErrors can safely be ignored let guard = LOCK.lock().unwrap_or_else(|err| err.into_inner()); Self { guard, phantom: PhantomData, } } } impl Iterator for UtmpxIter { type Item = Utmpx; fn next(&mut self) -> Option { unsafe { let res = getutxent(); if res.is_null() { None } else { // The data behind this pointer will be replaced by the next // call to getutxent(), so we have to read it now. // All the strings live inline in the struct as arrays, which // makes things easier. Some(Utmpx { inner: ptr::read(res as *const _), }) } } } } impl Drop for UtmpxIter { fn drop(&mut self) { unsafe { endutxent(); } } } uucore-0.0.30/src/lib/features/version_cmp.rs000064400000000000000000000260111046102023000172530ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Compare two version strings. use std::cmp::Ordering; /// Compares the non-digit parts of a version. /// Special cases: ~ are before everything else, even ends ("a~" < "a") /// Letters are before non-letters fn version_non_digit_cmp(a: &str, b: &str) -> Ordering { let mut a_chars = a.chars(); let mut b_chars = b.chars(); loop { match (a_chars.next(), b_chars.next()) { (Some(c1), Some(c2)) if c1 == c2 => {} (None, None) => return Ordering::Equal, (_, Some('~')) => return Ordering::Greater, (Some('~'), _) => return Ordering::Less, (None, Some(_)) => return Ordering::Less, (Some(_), None) => return Ordering::Greater, (Some(c1), Some(c2)) if c1.is_ascii_alphabetic() && !c2.is_ascii_alphabetic() => { return Ordering::Less } (Some(c1), Some(c2)) if !c1.is_ascii_alphabetic() && c2.is_ascii_alphabetic() => { return Ordering::Greater } (Some(c1), Some(c2)) => return c1.cmp(&c2), } } } /// Remove file endings matching the regex (\.[A-Za-z~][A-Za-z0-9~]*)*$ fn remove_file_ending(a: &str) -> &str { let mut ending_start = None; let mut prev_was_dot = false; for (idx, char) in a.char_indices() { if char == '.' { if ending_start.is_none() || prev_was_dot { ending_start = Some(idx); } prev_was_dot = true; } else if prev_was_dot { prev_was_dot = false; if !char.is_ascii_alphabetic() && char != '~' { ending_start = None; } } else if !char.is_ascii_alphanumeric() && char != '~' { ending_start = None; } } if prev_was_dot { ending_start = None; } if let Some(ending_start) = ending_start { &a[..ending_start] } else { a } } /// Compare two version strings. pub fn version_cmp(mut a: &str, mut b: &str) -> Ordering { let str_cmp = a.cmp(b); if str_cmp == Ordering::Equal { return str_cmp; } // Special cases: // 1. Empty strings match (a.is_empty(), b.is_empty()) { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 2. Dots match (a == ".", b == ".") { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 3. Two Dots match (a == "..", b == "..") { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => unreachable!(), (false, false) => {} } // 4. Strings starting with a dot match (a.starts_with('.'), b.starts_with('.')) { (true, false) => return Ordering::Less, (false, true) => return Ordering::Greater, (true, true) => { // Strip the leading dot for later comparisons a = &a[1..]; b = &b[1..]; } _ => {} } // Try to strip file extensions let (mut a, mut b) = match (remove_file_ending(a), remove_file_ending(b)) { (a_stripped, b_stripped) if a_stripped == b_stripped => { // If both would be the same after stripping file extensions, don't strip them. (a, b) } stripped => stripped, }; // 1. Compare leading non-numerical part // 2. Compare leading numerical part // 3. Repeat while !a.is_empty() || !b.is_empty() { let a_numerical_start = a.find(|c: char| c.is_ascii_digit()).unwrap_or(a.len()); let b_numerical_start = b.find(|c: char| c.is_ascii_digit()).unwrap_or(b.len()); let a_str = &a[..a_numerical_start]; let b_str = &b[..b_numerical_start]; match version_non_digit_cmp(a_str, b_str) { Ordering::Equal => {} ord => return ord, } a = &a[a_numerical_start..]; b = &b[a_numerical_start..]; let a_numerical_end = a.find(|c: char| !c.is_ascii_digit()).unwrap_or(a.len()); let b_numerical_end = b.find(|c: char| !c.is_ascii_digit()).unwrap_or(b.len()); let a_str = a[..a_numerical_end].trim_start_matches('0'); let b_str = b[..b_numerical_end].trim_start_matches('0'); match a_str.len().cmp(&b_str.len()) { Ordering::Equal => {} ord => return ord, } match a_str.cmp(b_str) { Ordering::Equal => {} ord => return ord, } a = &a[a_numerical_end..]; b = &b[b_numerical_end..]; } Ordering::Equal } #[cfg(test)] mod tests { use crate::version_cmp::version_cmp; use std::cmp::Ordering; #[test] fn test_version_cmp() { // Identical strings assert_eq!(version_cmp("hello", "hello"), Ordering::Equal); assert_eq!(version_cmp("file12", "file12"), Ordering::Equal); assert_eq!( version_cmp("file12-suffix", "file12-suffix"), Ordering::Equal ); assert_eq!( version_cmp("file12-suffix24", "file12-suffix24"), Ordering::Equal ); // Shortened names assert_eq!(version_cmp("world", "wo"), Ordering::Greater,); assert_eq!(version_cmp("hello10wo", "hello10world"), Ordering::Less,); // Simple names assert_eq!(version_cmp("world", "hello"), Ordering::Greater,); assert_eq!(version_cmp("hello", "world"), Ordering::Less); assert_eq!(version_cmp("apple", "ant"), Ordering::Greater); assert_eq!(version_cmp("ant", "apple"), Ordering::Less); // Uppercase letters assert_eq!( version_cmp("Beef", "apple"), Ordering::Less, "Uppercase letters are sorted before all lowercase letters" ); assert_eq!(version_cmp("Apple", "apple"), Ordering::Less); assert_eq!(version_cmp("apple", "aPple"), Ordering::Greater); // Numbers assert_eq!( version_cmp("100", "20"), Ordering::Greater, "Greater numbers are greater even if they start with a smaller digit", ); assert_eq!( version_cmp("20", "20"), Ordering::Equal, "Equal numbers are equal" ); assert_eq!( version_cmp("15", "200"), Ordering::Less, "Small numbers are smaller" ); // Comparing numbers with other characters assert_eq!( version_cmp("1000", "apple"), Ordering::Less, "Numbers are sorted before other characters" ); assert_eq!( // spell-checker:disable-next-line version_cmp("file1000", "fileapple"), Ordering::Less, "Numbers in the middle of the name are sorted before other characters" ); // Leading zeroes assert_eq!( version_cmp("012", "12"), Ordering::Equal, "A single leading zero does not make a difference" ); assert_eq!( version_cmp("000800", "0000800"), Ordering::Equal, "Multiple leading zeros do not make a difference" ); // Numbers and other characters combined assert_eq!(version_cmp("ab10", "aa11"), Ordering::Greater); assert_eq!( version_cmp("aa10", "aa11"), Ordering::Less, "Numbers after other characters are handled correctly." ); assert_eq!( version_cmp("aa2", "aa100"), Ordering::Less, "Numbers after alphabetical characters are handled correctly." ); assert_eq!( version_cmp("aa10bb", "aa11aa"), Ordering::Less, "Number is used even if alphabetical characters after it differ." ); assert_eq!( version_cmp("aa10aa0010", "aa11aa1"), Ordering::Less, "Second number is ignored if the first number differs." ); assert_eq!( version_cmp("aa10aa0010", "aa10aa1"), Ordering::Greater, "Second number is used if the rest is equal." ); assert_eq!( version_cmp("aa10aa0010", "aa00010aa1"), Ordering::Greater, "Second number is used if the rest is equal up to leading zeroes of the first number." ); assert_eq!( version_cmp("aa10aa0022", "aa010aa022"), Ordering::Equal, "Test multiple numeric values with leading zeros" ); assert_eq!( version_cmp("file-1.4", "file-1.13"), Ordering::Less, "Periods are handled as normal text, not as a decimal point." ); // Greater than u64::Max // u64 == 18446744073709551615 so this should be plenty: // 20000000000000000000000 assert_eq!( version_cmp("aa2000000000000000000000bb", "aa002000000000000000000001bb"), Ordering::Less, "Numbers larger than u64::MAX are handled correctly without crashing" ); assert_eq!( version_cmp("aa2000000000000000000000bb", "aa002000000000000000000000bb"), Ordering::Equal, "Leading zeroes for numbers larger than u64::MAX are \ handled correctly without crashing" ); assert_eq!( version_cmp(" a", "a"), Ordering::Greater, "Whitespace is after letters because letters are before non-letters" ); assert_eq!( version_cmp("a~", "ab"), Ordering::Less, "A tilde is before other letters" ); assert_eq!( version_cmp("a~", "a"), Ordering::Less, "A tilde is before the line end" ); assert_eq!( version_cmp("~", ""), Ordering::Greater, "A tilde is after the empty string" ); assert_eq!( version_cmp(".f", ".1"), Ordering::Greater, "if both start with a dot it is ignored for the comparison" ); // The following tests are incompatible with GNU as of 2021/06. // I think that's because of a bug in GNU, reported as https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html assert_eq!( version_cmp("a..a", "a.+"), Ordering::Less, ".a is stripped before the comparison" ); assert_eq!( version_cmp("a.", "a+"), Ordering::Greater, ". is not stripped before the comparison" ); assert_eq!( version_cmp("a\0a", "a"), Ordering::Greater, "NULL bytes are handled comparison" ); } } uucore-0.0.30/src/lib/features/wide.rs000064400000000000000000000020661046102023000156630ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. use std::ffi::{OsStr, OsString}; use std::os::windows::ffi::{OsStrExt, OsStringExt}; pub trait ToWide { fn to_wide(&self) -> Vec; fn to_wide_null(&self) -> Vec; } impl ToWide for T where T: AsRef, { fn to_wide(&self) -> Vec { self.as_ref().encode_wide().collect() } fn to_wide_null(&self) -> Vec { self.as_ref().encode_wide().chain(Some(0)).collect() } } pub trait FromWide { fn from_wide(wide: &[u16]) -> Self; fn from_wide_null(wide: &[u16]) -> Self; } impl FromWide for String { fn from_wide(wide: &[u16]) -> Self { OsString::from_wide(wide).to_string_lossy().into_owned() } fn from_wide_null(wide: &[u16]) -> Self { let len = wide.iter().take_while(|&&c| c != 0).count(); OsString::from_wide(&wide[..len]) .to_string_lossy() .into_owned() } } uucore-0.0.30/src/lib/features.rs000064400000000000000000000037431046102023000147360ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // features ~ feature-gated modules (core/bundler file) #[cfg(feature = "backup-control")] pub mod backup_control; #[cfg(feature = "buf-copy")] pub mod buf_copy; #[cfg(feature = "checksum")] pub mod checksum; #[cfg(feature = "colors")] pub mod colors; #[cfg(feature = "custom-tz-fmt")] pub mod custom_tz_fmt; #[cfg(feature = "encoding")] pub mod encoding; #[cfg(feature = "format")] pub mod format; #[cfg(feature = "fs")] pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; #[cfg(feature = "lines")] pub mod lines; #[cfg(feature = "quoting-style")] pub mod quoting_style; #[cfg(feature = "ranges")] pub mod ranges; #[cfg(feature = "ringbuffer")] pub mod ringbuffer; #[cfg(feature = "sum")] pub mod sum; #[cfg(feature = "update-control")] pub mod update_control; #[cfg(feature = "uptime")] pub mod uptime; #[cfg(feature = "version-cmp")] pub mod version_cmp; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) #[cfg(all(not(windows), feature = "mode"))] pub mod mode; // ** unix-only #[cfg(all(unix, feature = "entries"))] pub mod entries; #[cfg(all(unix, feature = "perms"))] pub mod perms; #[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))] pub mod pipes; #[cfg(all(target_os = "linux", feature = "proc-info"))] pub mod proc_info; #[cfg(all(unix, feature = "process"))] pub mod process; #[cfg(all(target_os = "linux", feature = "tty"))] pub mod tty; #[cfg(all(unix, feature = "fsxattr"))] pub mod fsxattr; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub mod signals; #[cfg(all( unix, not(target_os = "android"), not(target_os = "fuchsia"), not(target_os = "openbsd"), not(target_os = "redox"), not(target_env = "musl"), feature = "utmpx" ))] pub mod utmpx; // ** windows-only #[cfg(all(windows, feature = "wide"))] pub mod wide; uucore-0.0.30/src/lib/lib.rs000064400000000000000000000365031046102023000136660ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! library ~ (core/bundler file) // #![deny(missing_docs)] //TODO: enable this // // spell-checker:ignore sigaction SIGBUS SIGSEGV // * feature-gated external crates (re-shared as public internal modules) #[cfg(feature = "libc")] pub extern crate libc; #[cfg(all(feature = "windows-sys", target_os = "windows"))] pub extern crate windows_sys; //## internal modules mod features; // feature-gated code modules mod macros; // crate macros (macro_rules-type; exported to `crate::...`) mod mods; // core cross-platform modules mod parser; // string parsing modules pub use uucore_procs::*; // * cross-platform modules pub use crate::mods::display; pub use crate::mods::error; pub use crate::mods::io; pub use crate::mods::line_ending; pub use crate::mods::os; pub use crate::mods::panic; pub use crate::mods::posix; // * string parsing modules pub use crate::parser::parse_glob; pub use crate::parser::parse_size; pub use crate::parser::parse_time; pub use crate::parser::shortcut_value_parser; // * feature-gated modules #[cfg(feature = "backup-control")] pub use crate::features::backup_control; #[cfg(feature = "buf-copy")] pub use crate::features::buf_copy; #[cfg(feature = "checksum")] pub use crate::features::checksum; #[cfg(feature = "colors")] pub use crate::features::colors; #[cfg(feature = "custom-tz-fmt")] pub use crate::features::custom_tz_fmt; #[cfg(feature = "encoding")] pub use crate::features::encoding; #[cfg(feature = "format")] pub use crate::features::format; #[cfg(feature = "fs")] pub use crate::features::fs; #[cfg(feature = "lines")] pub use crate::features::lines; #[cfg(feature = "quoting-style")] pub use crate::features::quoting_style; #[cfg(feature = "ranges")] pub use crate::features::ranges; #[cfg(feature = "ringbuffer")] pub use crate::features::ringbuffer; #[cfg(feature = "sum")] pub use crate::features::sum; #[cfg(feature = "update-control")] pub use crate::features::update_control; #[cfg(feature = "uptime")] pub use crate::features::uptime; #[cfg(feature = "version-cmp")] pub use crate::features::version_cmp; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) #[cfg(all(not(windows), feature = "mode"))] pub use crate::features::mode; // ** unix-only #[cfg(all(unix, feature = "entries"))] pub use crate::features::entries; #[cfg(all(unix, feature = "perms"))] pub use crate::features::perms; #[cfg(all(unix, any(feature = "pipes", feature = "buf-copy")))] pub use crate::features::pipes; #[cfg(all(unix, feature = "process"))] pub use crate::features::process; #[cfg(all(unix, not(target_os = "fuchsia"), feature = "signals"))] pub use crate::features::signals; #[cfg(all( unix, not(target_os = "android"), not(target_os = "fuchsia"), not(target_os = "openbsd"), not(target_os = "redox"), not(target_env = "musl"), feature = "utmpx" ))] pub use crate::features::utmpx; // ** windows-only #[cfg(all(windows, feature = "wide"))] pub use crate::features::wide; #[cfg(feature = "fsext")] pub use crate::features::fsext; #[cfg(all(unix, feature = "fsxattr"))] pub use crate::features::fsxattr; //## core functions #[cfg(unix)] use nix::errno::Errno; #[cfg(unix)] use nix::sys::signal::{ sigaction, SaFlags, SigAction, SigHandler::SigDfl, SigSet, Signal::SIGBUS, Signal::SIGSEGV, }; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; use std::io::{BufRead, BufReader}; use std::iter; #[cfg(unix)] use std::os::unix::ffi::{OsStrExt, OsStringExt}; use std::str; use std::sync::{atomic::Ordering, LazyLock}; /// Disables the custom signal handlers installed by Rust for stack-overflow handling. With those custom signal handlers processes ignore the first SIGBUS and SIGSEGV signal they receive. /// See for details. #[cfg(unix)] pub fn disable_rust_signal_handlers() -> Result<(), Errno> { unsafe { sigaction( SIGSEGV, &SigAction::new(SigDfl, SaFlags::empty(), SigSet::all()), ) }?; unsafe { sigaction( SIGBUS, &SigAction::new(SigDfl, SaFlags::empty(), SigSet::all()), ) }?; Ok(()) } /// Execute utility code for `util`. /// /// This macro expands to a main function that invokes the `uumain` function in `util` /// Exits with code returned by `uumain`. #[macro_export] macro_rules! bin { ($util:ident) => { pub fn main() { use std::io::Write; // suppress extraneous error output for SIGPIPE failures/panics uucore::panic::mute_sigpipe_panic(); // execute utility code let code = $util::uumain(uucore::args_os()); // (defensively) flush stdout for utility prior to exit; see if let Err(e) = std::io::stdout().flush() { eprintln!("Error flushing stdout: {}", e); } std::process::exit(code); } }; } /// Generate the usage string for clap. /// /// This function does two things. It indents all but the first line to align /// the lines because clap adds "Usage: " to the first line. And it replaces /// all occurrences of `{}` with the execution phrase and returns the resulting /// `String`. It does **not** support more advanced formatting features such /// as `{0}`. pub fn format_usage(s: &str) -> String { let s = s.replace('\n', &format!("\n{}", " ".repeat(7))); s.replace("{}", crate::execution_phrase()) } /// Used to check if the utility is the second argument. /// Used to check if we were called as a multicall binary (`coreutils `) pub fn get_utility_is_second_arg() -> bool { crate::macros::UTILITY_IS_SECOND_ARG.load(Ordering::SeqCst) } /// Change the value of `UTILITY_IS_SECOND_ARG` to true /// Used to specify that the utility is the second argument. pub fn set_utility_is_second_arg() { crate::macros::UTILITY_IS_SECOND_ARG.store(true, Ordering::SeqCst); } // args_os() can be expensive to call, it copies all of argv before iterating. // So if we want only the first arg or so it's overkill. We cache it. static ARGV: LazyLock> = LazyLock::new(|| wild::args_os().collect()); static UTIL_NAME: LazyLock = LazyLock::new(|| { let base_index = usize::from(get_utility_is_second_arg()); let is_man = usize::from(ARGV[base_index].eq("manpage")); let argv_index = base_index + is_man; ARGV[argv_index].to_string_lossy().into_owned() }); /// Derive the utility name. pub fn util_name() -> &'static str { &UTIL_NAME } static EXECUTION_PHRASE: LazyLock = LazyLock::new(|| { if get_utility_is_second_arg() { ARGV.iter() .take(2) .map(|os_str| os_str.to_string_lossy().into_owned()) .collect::>() .join(" ") } else { ARGV[0].to_string_lossy().into_owned() } }); /// Derive the complete execution phrase for "usage". pub fn execution_phrase() -> &'static str { &EXECUTION_PHRASE } /// Args contains arguments passed to the utility. /// It is a trait that extends `Iterator`. /// It provides utility functions to collect the arguments into a `Vec`. /// The collected `Vec` can be lossy or ignore invalid encoding. pub trait Args: Iterator + Sized { /// Collects the iterator into a `Vec`, lossily converting the `OsString`s to `Strings`. fn collect_lossy(self) -> Vec { self.map(|s| s.to_string_lossy().into_owned()).collect() } /// Collects the iterator into a `Vec`, removing any elements that contain invalid encoding. fn collect_ignore(self) -> Vec { self.filter_map(|s| s.into_string().ok()).collect() } } impl + Sized> Args for T {} /// Returns an iterator over the command line arguments as `OsString`s. /// args_os() can be expensive to call pub fn args_os() -> impl Iterator { ARGV.iter().cloned() } /// Read a line from stdin and check whether the first character is `'y'` or `'Y'` pub fn read_yes() -> bool { let mut s = String::new(); match std::io::stdin().read_line(&mut s) { Ok(_) => matches!(s.chars().next(), Some('y' | 'Y')), _ => false, } } /// Converts an `OsStr` to a UTF-8 `&[u8]`. /// /// This always succeeds on unix platforms, /// and fails on other platforms if the string can't be coerced to UTF-8. pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { #[cfg(unix)] let bytes = os_string.as_bytes(); #[cfg(not(unix))] let bytes = os_string .to_str() .ok_or_else(|| { mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") })? .as_bytes(); Ok(bytes) } /// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes. /// /// This is always lossless on unix platforms, /// and wraps [`OsStr::to_string_lossy`] on non-unix platforms. pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> { #[cfg(unix)] let bytes = Cow::from(os_string.as_bytes()); #[cfg(not(unix))] let bytes = match os_string.to_string_lossy() { Cow::Borrowed(slice) => Cow::from(slice.as_bytes()), Cow::Owned(owned) => Cow::from(owned.into_bytes()), }; bytes } /// Converts a `&[u8]` to an `&OsStr`, /// or parses it as UTF-8 into an [`OsString`] on non-unix platforms. /// /// This always succeeds on unix platforms, /// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult> { #[cfg(unix)] let os_str = Cow::Borrowed(OsStr::from_bytes(bytes)); #[cfg(not(unix))] let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| { mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr") })?)); Ok(os_str) } /// Converts a `Vec` into an `OsString`, parsing as UTF-8 on non-unix platforms. /// /// This always succeeds on unix platforms, /// and fails on other platforms if the bytes can't be parsed as UTF-8. pub fn os_string_from_vec(vec: Vec) -> mods::error::UResult { #[cfg(unix)] let s = OsString::from_vec(vec); #[cfg(not(unix))] let s = OsString::from(String::from_utf8(vec).map_err(|_| { mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments") })?); Ok(s) } /// Equivalent to `std::BufRead::lines` which outputs each line as a `Vec`, /// which avoids panicking on non UTF-8 input. pub fn read_byte_lines( mut buf_reader: BufReader, ) -> impl Iterator> { iter::from_fn(move || { let mut buf = Vec::with_capacity(256); let size = buf_reader.read_until(b'\n', &mut buf).ok()?; if size == 0 { return None; } // Trim (\r)\n if buf.ends_with(b"\n") { buf.pop(); if buf.ends_with(b"\r") { buf.pop(); } } Some(buf) }) } /// Equivalent to `std::BufRead::lines` which outputs each line as an `OsString` /// This won't panic on non UTF-8 characters on Unix, /// but it still will on Windows. pub fn read_os_string_lines( buf_reader: BufReader, ) -> impl Iterator { read_byte_lines(buf_reader).map(|byte_line| os_string_from_vec(byte_line).expect("UTF-8 error")) } /// Prompt the user with a formatted string and returns `true` if they reply `'y'` or `'Y'` /// /// This macro functions accepts the same syntax as `format!`. The prompt is written to /// `stderr`. A space is also printed at the end for nice spacing between the prompt and /// the user input. Any input starting with `'y'` or `'Y'` is interpreted as `yes`. /// /// # Examples /// ``` /// use uucore::prompt_yes; /// let file = "foo.rs"; /// prompt_yes!("Do you want to delete '{}'?", file); /// ``` /// will print something like below to `stderr` (with `util_name` substituted by the actual /// util name) and will wait for user input. /// ```txt /// util_name: Do you want to delete 'foo.rs'? /// ``` #[macro_export] macro_rules! prompt_yes( ($($args:tt)+) => ({ use std::io::Write; eprint!("{}: ", uucore::util_name()); eprint!($($args)+); eprint!(" "); let res = std::io::stderr().flush().map_err(|err| { $crate::error::USimpleError::new(1, err.to_string()) }); uucore::show_if_err!(res); uucore::read_yes() }) ); #[cfg(test)] mod tests { use super::*; use std::ffi::OsStr; fn make_os_vec(os_str: &OsStr) -> Vec { vec![ OsString::from("test"), OsString::from("สวัสดี"), // spell-checker:disable-line os_str.to_os_string(), ] } #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_lossy(os_str: &OsStr) { // assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); let test_vec = make_os_vec(os_str); let collected_to_str = test_vec.clone().into_iter().collect_lossy(); // conservation of length - when accepting lossy conversion no arguments may be dropped assert_eq!(collected_to_str.len(), test_vec.len()); // first indices identical for index in 0..2 { assert_eq!(collected_to_str[index], test_vec[index].to_str().unwrap()); } // lossy conversion for string with illegal encoding is done assert_eq!( *collected_to_str[2], os_str.to_os_string().to_string_lossy() ); } #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_ignore(os_str: &OsStr) { // assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); let test_vec = make_os_vec(os_str); let collected_to_str = test_vec.clone().into_iter().collect_ignore(); // assert that the broken entry is filtered out assert_eq!(collected_to_str.len(), test_vec.len() - 1); // assert that the unbroken indices are converted as expected for index in 0..2 { assert_eq!( collected_to_str.get(index).unwrap(), test_vec.get(index).unwrap().to_str().unwrap() ); } } #[test] fn valid_utf8_encoding_args() { // create a vector containing only correct encoding let test_vec = make_os_vec(&OsString::from("test2")); // expect complete conversion without losses, even when lossy conversion is accepted let _ = test_vec.into_iter().collect_lossy(); } #[cfg(any(unix, target_os = "redox"))] #[test] fn invalid_utf8_args_unix() { use std::os::unix::ffi::OsStrExt; let source = [0x66, 0x6f, 0x80, 0x6f]; let os_str = OsStr::from_bytes(&source[..]); test_invalid_utf8_args_lossy(os_str); test_invalid_utf8_args_ignore(os_str); } #[test] fn test_format_usage() { assert_eq!(format_usage("expr EXPRESSION"), "expr EXPRESSION"); assert_eq!( format_usage("expr EXPRESSION\nexpr OPTION"), "expr EXPRESSION\n expr OPTION" ); } } uucore-0.0.30/src/lib/macros.rs000064400000000000000000000117571046102023000144100ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Macros for the uucore utilities. //! //! This module bundles all macros used across the uucore utilities. These //! include macros for reporting errors in various formats, aborting program //! execution and more. //! //! To make use of all macros in this module, they must be imported like so: //! //! ```ignore //! #[macro_use] //! extern crate uucore; //! ``` //! //! Alternatively, you can import single macros by importing them through their //! fully qualified name like this: //! //! ```no_run //! use uucore::show; //! ``` //! //! Here's an overview of the macros sorted by purpose //! //! - Print errors //! - From types implementing [`crate::error::UError`]: [`crate::show!`], //! [`crate::show_if_err!`] //! - From custom messages: [`crate::show_error!`] //! - Print warnings: [`crate::show_warning!`] // spell-checker:ignore sourcepath targetpath rustdoc use std::sync::atomic::AtomicBool; // This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. /// Whether we were called as a multicall binary (`coreutils `) pub static UTILITY_IS_SECOND_ARG: AtomicBool = AtomicBool::new(false); //==== /// Display a [`crate::error::UError`] and set global exit code. /// /// Prints the error message contained in an [`crate::error::UError`] to stderr /// and sets the exit code through [`crate::error::set_exit_code`]. The printed /// error message is prepended with the calling utility's name. A call to this /// macro will not finish program execution. /// /// # Examples /// /// The following example would print a message "Some error occurred" and set /// the utility's exit code to 2. /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// /// use uucore::error::{self, USimpleError}; /// /// fn main() { /// let err = USimpleError::new(2, "Some error occurred."); /// show!(err); /// assert_eq!(error::get_exit_code(), 2); /// } /// ``` /// /// If not using [`crate::error::UError`], one may achieve the same behavior /// like this: /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// /// use uucore::error::set_exit_code; /// /// fn main() { /// set_exit_code(2); /// show_error!("Some error occurred."); /// } /// ``` #[macro_export] macro_rules! show( ($err:expr) => ({ #[allow(unused_imports)] use $crate::error::UError; let e = $err; $crate::error::set_exit_code(e.code()); eprintln!("{}: {}", $crate::util_name(), e); }) ); /// Display an error and set global exit code in error case. /// /// Wraps around [`crate::show!`] and takes a [`crate::error::UResult`] instead of a /// [`crate::error::UError`] type. This macro invokes [`crate::show!`] if the /// [`crate::error::UResult`] is an `Err`-variant. This can be invoked directly /// on the result of a function call, like in the `install` utility: /// /// ```ignore /// show_if_err!(copy(sourcepath, &targetpath, b)); /// ``` /// /// # Examples /// /// ```ignore /// # #[macro_use] /// # extern crate uucore; /// # use uucore::error::{UError, UIoError, UResult, USimpleError}; /// /// # fn main() { /// let is_ok = Ok(1); /// // This does nothing at all /// show_if_err!(is_ok); /// /// let is_err = Err(USimpleError::new(1, "I'm an error").into()); /// // Calls `show!` on the contained USimpleError /// show_if_err!(is_err); /// # } /// ``` /// /// #[macro_export] macro_rules! show_if_err( ($res:expr) => ({ if let Err(e) = $res { $crate::show!(e); } }) ); /// Show an error to stderr in a similar style to GNU coreutils. /// /// Takes a [`format!`]-like input and prints it to stderr. The output is /// prepended with the current utility's name. /// /// # Examples /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// show_error!("Couldn't apply {} to {}", "foo", "bar"); /// # } /// ``` #[macro_export] macro_rules! show_error( ($($args:tt)+) => ({ eprint!("{}: ", $crate::util_name()); eprintln!($($args)+); }) ); /// Print a warning message to stderr. /// /// Takes [`format!`]-compatible input and prepends it with the current /// utility's name and "warning: " before printing to stderr. /// /// # Examples /// /// ``` /// # #[macro_use] /// # extern crate uucore; /// # fn main() { /// // outputs : warning: Couldn't apply foo to bar /// show_warning!("Couldn't apply {} to {}", "foo", "bar"); /// # } /// ``` #[macro_export] macro_rules! show_warning( ($($args:tt)+) => ({ eprint!("{}: warning: ", $crate::util_name()); eprintln!($($args)+); }) ); /// Print a warning message to stderr, prepending the utility name. #[macro_export] macro_rules! show_warning_caps( ($($args:tt)+) => ({ eprint!("{}: WARNING: ", $crate::util_name()); eprintln!($($args)+); }) ); uucore-0.0.30/src/lib/mods/display.rs000064400000000000000000000052271046102023000155260ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Utilities for printing paths, with special attention paid to special //! characters and invalid unicode. //! //! For displaying paths in informational messages use `Quotable::quote`. This //! will wrap quotes around the filename and add the necessary escapes to make //! it copy/paste-able into a shell. //! //! For writing raw paths to stdout when the output should not be quoted or escaped, //! use `println_verbatim`. This will preserve invalid unicode. //! //! # Examples //! ```rust //! use std::path::Path; //! use uucore::display::{Quotable, println_verbatim}; //! //! let path = Path::new("foo/bar.baz"); //! //! println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" //! println_verbatim(path)?; // Prints "foo/bar.baz" //! # Ok::<(), std::io::Error>(()) //! ``` use std::ffi::OsStr; use std::io::{self, Write as IoWrite}; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::ffi::OsStrExt; // These used to be defined here, but they live in their own crate now. pub use os_display::{Quotable, Quoted}; /// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline, /// without losing any information if its encoding is invalid. /// /// This function is appropriate for commands where printing paths is the point and the /// output is likely to be captured, like `pwd` and `basename`. For informational output /// use `Quotable::quote`. /// /// FIXME: This is lossy on Windows. It could probably be implemented using some low-level /// API that takes UTF-16, without going through io::Write. This is not a big priority /// because broken filenames are much rarer on Windows than on Unix. pub fn println_verbatim>(text: S) -> io::Result<()> { let stdout = io::stdout(); let mut stdout = stdout.lock(); #[cfg(any(unix, target_os = "wasi"))] { stdout.write_all(text.as_ref().as_bytes())?; stdout.write_all(b"\n")?; } #[cfg(not(any(unix, target_os = "wasi")))] { writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?; } Ok(()) } /// Like `println_verbatim`, without the trailing newline. pub fn print_verbatim>(text: S) -> io::Result<()> { let mut stdout = io::stdout(); #[cfg(any(unix, target_os = "wasi"))] { stdout.write_all(text.as_ref().as_bytes()) } #[cfg(not(any(unix, target_os = "wasi")))] { write!(stdout, "{}", std::path::Path::new(text.as_ref()).display()) } } uucore-0.0.30/src/lib/mods/error.rs000064400000000000000000000562451046102023000152200ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! All utils return exit with an exit code. Usually, the following scheme is used: //! * `0`: succeeded //! * `1`: minor problems //! * `2`: major problems //! //! This module provides types to reconcile these exit codes with idiomatic Rust error //! handling. This has a couple advantages over manually using [`std::process::exit`]: //! 1. It enables the use of `?`, `map_err`, `unwrap_or`, etc. in `uumain`. //! 1. It encourages the use of [`UResult`]/[`Result`] in functions in the utils. //! 1. The error messages are largely standardized across utils. //! 1. Standardized error messages can be created from external result types //! (i.e. [`std::io::Result`] & `clap::ClapResult`). //! 1. [`set_exit_code`] takes away the burden of manually tracking exit codes for non-fatal errors. //! //! # Usage //! The signature of a typical util should be: //! ```ignore //! fn uumain(args: impl uucore::Args) -> UResult<()> { //! ... //! } //! ``` //! [`UResult`] is a simple wrapper around [`Result`] with a custom error trait: [`UError`]. The //! most important difference with types implementing [`std::error::Error`] is that [`UError`]s //! can specify the exit code of the program when they are returned from `uumain`: //! * When `Ok` is returned, the code set with [`set_exit_code`] is used as exit code. If //! [`set_exit_code`] was not used, then `0` is used. //! * When `Err` is returned, the code corresponding with the error is used as exit code and the //! error message is displayed. //! //! Additionally, the errors can be displayed manually with the [`crate::show`] and [`crate::show_if_err`] macros: //! ```ignore //! let res = Err(USimpleError::new(1, "Error!!")); //! show_if_err!(res); //! // or //! if let Err(e) = res { //! show!(e); //! } //! ``` //! //! **Note**: The [`crate::show`] and [`crate::show_if_err`] macros set the exit code of the program using //! [`set_exit_code`]. See the documentation on that function for more information. //! //! # Guidelines //! * Use error types from `uucore` where possible. //! * Add error types to `uucore` if an error appears in multiple utils. //! * Prefer proper custom error types over [`ExitCode`] and [`USimpleError`]. //! * [`USimpleError`] may be used in small utils with simple error handling. //! * Using [`ExitCode`] is not recommended but can be useful for converting utils to use //! [`UResult`]. // spell-checker:ignore uioerror rustdoc use std::{ error::Error, fmt::{Display, Formatter}, sync::atomic::{AtomicI32, Ordering}, }; static EXIT_CODE: AtomicI32 = AtomicI32::new(0); /// Get the last exit code set with [`set_exit_code`]. /// The default value is `0`. pub fn get_exit_code() -> i32 { EXIT_CODE.load(Ordering::SeqCst) } /// Set the exit code for the program if `uumain` returns `Ok(())`. /// /// This function is most useful for non-fatal errors, for example when applying an operation to /// multiple files: /// ```ignore /// use uucore::error::{UResult, set_exit_code}; /// /// fn uumain(args: impl uucore::Args) -> UResult<()> { /// ... /// for file in files { /// let res = some_operation_that_might_fail(file); /// match res { /// Ok() => {}, /// Err(_) => set_exit_code(1), /// } /// } /// Ok(()) // If any of the operations failed, 1 is returned. /// } /// ``` pub fn set_exit_code(code: i32) { EXIT_CODE.store(code, Ordering::SeqCst); } /// Result type that should be returned by all utils. pub type UResult = Result>; /// Custom errors defined by the utils and `uucore`. /// /// All errors should implement [`std::error::Error`], [`std::fmt::Display`] and /// [`std::fmt::Debug`] and have an additional `code` method that specifies the /// exit code of the program if the error is returned from `uumain`. /// /// An example of a custom error from `ls`: /// /// ``` /// use uucore::{ /// display::Quotable, /// error::{UError, UResult} /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum LsError { /// InvalidLineWidth(String), /// NoMetadata(PathBuf), /// } /// /// impl UError for LsError { /// fn code(&self) -> i32 { /// match self { /// LsError::InvalidLineWidth(_) => 2, /// LsError::NoMetadata(_) => 1, /// } /// } /// } /// /// impl Error for LsError {} /// /// impl Display for LsError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// match self { /// LsError::InvalidLineWidth(s) => write!(f, "invalid line width: {}", s.quote()), /// LsError::NoMetadata(p) => write!(f, "could not open file: {}", p.quote()), /// } /// } /// } /// ``` /// /// The main routine would look like this: /// /// ```ignore /// #[uucore::main] /// pub fn uumain(args: impl uucore::Args) -> UResult<()> { /// // Perform computations here ... /// return Err(LsError::InvalidLineWidth(String::from("test")).into()) /// } /// ``` /// /// The call to `into()` is required to convert the `LsError` to /// [`Box`]. The implementation for `From` is provided automatically. /// /// A crate like [`quick_error`](https://crates.io/crates/quick-error) might /// also be used, but will still require an `impl` for the `code` method. pub trait UError: Error + Send { /// Error code of a custom error. /// /// Set a return value for each variant of an enum-type to associate an /// error code (which is returned to the system shell) with an error /// variant. /// /// # Example /// /// ``` /// use uucore::{ /// display::Quotable, /// error::UError /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum MyError { /// Foo(String), /// Bar(PathBuf), /// Bing(), /// } /// /// impl UError for MyError { /// fn code(&self) -> i32 { /// match self { /// MyError::Foo(_) => 2, /// // All other errors yield the same error code, there's no /// // need to list them explicitly. /// _ => 1, /// } /// } /// } /// /// impl Error for MyError {} /// /// impl Display for MyError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// use MyError as ME; /// match self { /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), /// ME::Bing() => write!(f, "Exterminate!"), /// } /// } /// } /// ``` fn code(&self) -> i32 { 1 } /// Print usage help to a custom error. /// /// Return true or false to control whether a short usage help is printed /// below the error message. The usage help is in the format: "Try `{name} /// --help` for more information." and printed only if `true` is returned. /// /// # Example /// /// ``` /// use uucore::{ /// display::Quotable, /// error::UError /// }; /// use std::{ /// error::Error, /// fmt::{Display, Debug}, /// path::PathBuf /// }; /// /// #[derive(Debug)] /// enum MyError { /// Foo(String), /// Bar(PathBuf), /// Bing(), /// } /// /// impl UError for MyError { /// fn usage(&self) -> bool { /// match self { /// // This will have a short usage help appended /// MyError::Bar(_) => true, /// // These matches won't have a short usage help appended /// _ => false, /// } /// } /// } /// /// impl Error for MyError {} /// /// impl Display for MyError { /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { /// use MyError as ME; /// match self { /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), /// ME::Bing() => write!(f, "Exterminate!"), /// } /// } /// } /// ``` fn usage(&self) -> bool { false } } impl From for Box where T: UError + 'static, { fn from(t: T) -> Self { Box::new(t) } } /// A simple error type with an exit code and a message that implements [`UError`]. /// /// ``` /// use uucore::error::{UResult, USimpleError}; /// let err = USimpleError { code: 1, message: "error!".into()}; /// let res: UResult<()> = Err(err.into()); /// // or using the `new` method: /// let res: UResult<()> = Err(USimpleError::new(1, "error!")); /// ``` #[derive(Debug)] pub struct USimpleError { /// Exit code of the error. pub code: i32, /// Error message. pub message: String, } impl USimpleError { /// Create a new `USimpleError` with a given exit code and message. #[allow(clippy::new_ret_no_self)] pub fn new>(code: i32, message: S) -> Box { Box::new(Self { code, message: message.into(), }) } } impl Error for USimpleError {} impl Display for USimpleError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.message.fmt(f) } } impl UError for USimpleError { fn code(&self) -> i32 { self.code } } /// Wrapper type around [`std::io::Error`]. #[derive(Debug)] pub struct UUsageError { /// Exit code of the error. pub code: i32, /// Error message. pub message: String, } impl UUsageError { #[allow(clippy::new_ret_no_self)] /// Create a new `UUsageError` with a given exit code and message. pub fn new>(code: i32, message: S) -> Box { Box::new(Self { code, message: message.into(), }) } } impl Error for UUsageError {} impl Display for UUsageError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.message.fmt(f) } } impl UError for UUsageError { fn code(&self) -> i32 { self.code } fn usage(&self) -> bool { true } } /// Wrapper type around [`std::io::Error`]. /// /// The messages displayed by [`UIoError`] should match the error messages displayed by GNU /// coreutils. /// /// There are two ways to construct this type: with [`UIoError::new`] or by calling the /// [`FromIo::map_err_context`] method on a [`std::io::Result`] or [`std::io::Error`]. /// ``` /// use uucore::{ /// display::Quotable, /// error::{FromIo, UResult, UIoError, UError} /// }; /// use std::fs::File; /// use std::path::Path; /// let path = Path::new("test.txt"); /// /// // Manual construction /// let e: Box = UIoError::new( /// std::io::ErrorKind::NotFound, /// format!("cannot access {}", path.quote()) /// ); /// let res: UResult<()> = Err(e.into()); /// /// // Converting from an `std::io::Error`. /// let res: UResult = File::open(path).map_err_context(|| format!("cannot access {}", path.quote())); /// ``` #[derive(Debug)] pub struct UIoError { context: Option, inner: std::io::Error, } impl UIoError { #[allow(clippy::new_ret_no_self)] /// Create a new `UIoError` with a given exit code and message. pub fn new>(kind: std::io::ErrorKind, context: S) -> Box { Box::new(Self { context: Some(context.into()), inner: kind.into(), }) } } impl UError for UIoError {} impl Error for UIoError {} impl Display for UIoError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { use std::io::ErrorKind::*; let message; let message = if self.inner.raw_os_error().is_some() { // These are errors that come directly from the OS. // We want to normalize their messages across systems, // and we want to strip the "(os error X)" suffix. match self.inner.kind() { NotFound => "No such file or directory", PermissionDenied => "Permission denied", ConnectionRefused => "Connection refused", ConnectionReset => "Connection reset", ConnectionAborted => "Connection aborted", NotConnected => "Not connected", AddrInUse => "Address in use", AddrNotAvailable => "Address not available", BrokenPipe => "Broken pipe", AlreadyExists => "Already exists", WouldBlock => "Would block", InvalidInput => "Invalid input", InvalidData => "Invalid data", TimedOut => "Timed out", WriteZero => "Write zero", Interrupted => "Interrupted", UnexpectedEof => "Unexpected end of file", _ => { // TODO: When the new error variants // (https://github.com/rust-lang/rust/issues/86442) // are stabilized, we should add them to the match statement. message = strip_errno(&self.inner); &message } } } else { // These messages don't need as much normalization, and the above // messages wouldn't always be a good substitute. // For example, ErrorKind::NotFound doesn't necessarily mean it was // a file that was not found. // There are also errors with entirely custom messages. message = self.inner.to_string(); &message }; if let Some(ctx) = &self.context { write!(f, "{ctx}: {message}") } else { write!(f, "{message}") } } } /// Strip the trailing " (os error XX)" from io error strings. pub fn strip_errno(err: &std::io::Error) -> String { let mut msg = err.to_string(); if let Some(pos) = msg.find(" (os error ") { msg.truncate(pos); } msg } /// Enables the conversion from [`std::io::Error`] to [`UError`] and from [`std::io::Result`] to /// [`UResult`]. pub trait FromIo { /// Map the error context of an [`std::io::Error`] or [`std::io::Result`] to a custom error fn map_err_context(self, context: impl FnOnce() -> String) -> T; } impl FromIo> for std::io::Error { fn map_err_context(self, context: impl FnOnce() -> String) -> Box { Box::new(UIoError { context: Some((context)()), inner: self, }) } } impl FromIo> for std::io::Result { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { self.map_err(|e| e.map_err_context(context) as Box) } } impl FromIo> for std::io::ErrorKind { fn map_err_context(self, context: impl FnOnce() -> String) -> Box { Box::new(UIoError { context: Some((context)()), inner: std::io::Error::new(self, ""), }) } } impl From for UIoError { fn from(f: std::io::Error) -> Self { Self { context: None, inner: f, } } } impl From for Box { fn from(f: std::io::Error) -> Self { let u_error: UIoError = f.into(); Box::new(u_error) as Self } } /// Enables the conversion from [`Result`] to [`UResult`]. /// /// # Examples /// /// ``` /// use uucore::error::FromIo; /// use nix::errno::Errno; /// /// let nix_err = Err::<(), nix::Error>(Errno::EACCES); /// let uio_result = nix_err.map_err_context(|| String::from("fix me please!")); /// /// // prints "fix me please!: Permission denied" /// println!("{}", uio_result.unwrap_err()); /// ``` #[cfg(unix)] impl FromIo> for Result { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { self.map_err(|e| { Box::new(UIoError { context: Some((context)()), inner: std::io::Error::from_raw_os_error(e as i32), }) as Box }) } } #[cfg(unix)] impl FromIo> for nix::Error { fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { Err(Box::new(UIoError { context: Some((context)()), inner: std::io::Error::from_raw_os_error(self as i32), }) as Box) } } #[cfg(unix)] impl From for UIoError { fn from(f: nix::Error) -> Self { Self { context: None, inner: std::io::Error::from_raw_os_error(f as i32), } } } #[cfg(unix)] impl From for Box { fn from(f: nix::Error) -> Self { let u_error: UIoError = f.into(); Box::new(u_error) as Self } } /// Shorthand to construct [`UIoError`]-instances. /// /// This macro serves as a convenience call to quickly construct instances of /// [`UIoError`]. It takes: /// /// - An instance of [`std::io::Error`] /// - A `format!`-compatible string and /// - An arbitrary number of arguments to the format string /// /// In exactly this order. It is equivalent to the more verbose code seen in the /// example. /// /// # Examples /// /// ``` /// use uucore::error::UIoError; /// use uucore::uio_error; /// /// let io_err = std::io::Error::new( /// std::io::ErrorKind::PermissionDenied, "fix me please!" /// ); /// /// let uio_err = UIoError::new( /// io_err.kind(), /// format!("Error code: {}", 2) /// ); /// /// let other_uio_err = uio_error!(io_err, "Error code: {}", 2); /// /// // prints "fix me please!: Permission denied" /// println!("{}", uio_err); /// // prints "Error code: 2: Permission denied" /// println!("{}", other_uio_err); /// ``` /// /// The [`std::fmt::Display`] impl of [`UIoError`] will then ensure that an /// appropriate error message relating to the actual error kind of the /// [`std::io::Error`] is appended to whatever error message is defined in /// addition (as secondary argument). /// /// If you want to show only the error message for the [`std::io::ErrorKind`] /// that's contained in [`UIoError`], pass the second argument as empty string: /// /// ``` /// use uucore::error::UIoError; /// use uucore::uio_error; /// /// let io_err = std::io::Error::new( /// std::io::ErrorKind::PermissionDenied, "fix me please!" /// ); /// /// let other_uio_err = uio_error!(io_err, ""); /// /// // prints: ": Permission denied" /// println!("{}", other_uio_err); /// ``` //#[macro_use] #[macro_export] macro_rules! uio_error( ($err:expr, $($args:tt)+) => ({ UIoError::new( $err.kind(), format!($($args)+) ) }) ); /// A special error type that does not print any message when returned from /// `uumain`. Especially useful for porting utilities to using [`UResult`]. /// /// There are two ways to construct an [`ExitCode`]: /// ``` /// use uucore::error::{ExitCode, UResult}; /// // Explicit /// let res: UResult<()> = Err(ExitCode(1).into()); /// /// // Using into on `i32`: /// let res: UResult<()> = Err(1.into()); /// ``` /// This type is especially useful for a trivial conversion from utils returning [`i32`] to /// returning [`UResult`]. #[derive(Debug)] pub struct ExitCode(pub i32); impl ExitCode { #[allow(clippy::new_ret_no_self)] /// Create a new `ExitCode` with a given exit code. pub fn new(code: i32) -> Box { Box::new(Self(code)) } } impl Error for ExitCode {} impl Display for ExitCode { fn fmt(&self, _: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { Ok(()) } } impl UError for ExitCode { fn code(&self) -> i32 { self.0 } } impl From for Box { fn from(i: i32) -> Self { ExitCode::new(i) } } /// A wrapper for `clap::Error` that implements [`UError`] /// /// Contains a custom error code. When `Display::fmt` is called on this struct /// the [`clap::Error`] will be printed _directly to `stdout` or `stderr`_. /// This is because `clap` only supports colored output when it prints directly. /// /// [`ClapErrorWrapper`] is generally created by calling the /// [`UClapError::with_exit_code`] method on [`clap::Error`] or using the [`From`] /// implementation from [`clap::Error`] to `Box`, which constructs /// a [`ClapErrorWrapper`] with an exit code of `1`. /// /// ```rust /// use uucore::error::{ClapErrorWrapper, UError, UClapError}; /// let command = clap::Command::new("test"); /// let result: Result<_, ClapErrorWrapper> = command.try_get_matches().with_exit_code(125); /// /// let command = clap::Command::new("test"); /// let result: Result<_, Box> = command.try_get_matches().map_err(Into::into); /// ``` #[derive(Debug)] pub struct ClapErrorWrapper { code: i32, error: clap::Error, } /// Extension trait for `clap::Error` to adjust the exit code. pub trait UClapError { /// Set the exit code for the program if `uumain` returns `Ok(())`. fn with_exit_code(self, code: i32) -> T; } impl From for Box { fn from(e: clap::Error) -> Self { Box::new(ClapErrorWrapper { code: 1, error: e }) } } impl UClapError for clap::Error { fn with_exit_code(self, code: i32) -> ClapErrorWrapper { ClapErrorWrapper { code, error: self } } } impl UClapError> for Result { fn with_exit_code(self, code: i32) -> Result { self.map_err(|e| e.with_exit_code(code)) } } impl UError for ClapErrorWrapper { fn code(&self) -> i32 { // If the error is a DisplayHelp or DisplayVersion variant, // we don't want to apply the custom error code, but leave // it 0. if let clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion = self.error.kind() { 0 } else { self.code } } } impl Error for ClapErrorWrapper {} // This is abuse of the Display trait impl Display for ClapErrorWrapper { fn fmt(&self, _f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { self.error.print().unwrap(); Ok(()) } } #[cfg(test)] mod tests { #[test] #[cfg(unix)] fn test_nix_error_conversion() { use super::{FromIo, UIoError}; use nix::errno::Errno; use std::io::ErrorKind; for (nix_error, expected_error_kind) in [ (Errno::EACCES, ErrorKind::PermissionDenied), (Errno::ENOENT, ErrorKind::NotFound), (Errno::EEXIST, ErrorKind::AlreadyExists), ] { let error = UIoError::from(nix_error); assert_eq!(expected_error_kind, error.inner.kind()); } assert_eq!( "test: Permission denied", Err::<(), nix::Error>(Errno::EACCES) .map_err_context(|| String::from("test")) .unwrap_err() .to_string() ); } } uucore-0.0.30/src/lib/mods/io.rs000064400000000000000000000052461046102023000144710ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Encapsulates differences between OSs regarding the access to //! file handles / descriptors. //! This is useful when dealing with lower level stdin/stdout access. //! //! In detail: //! On unix like OSs, file _descriptors_ are used in this context. //! On windows OSs, file _handles_ are used. //! //! Even though they are distinct classes, they share common functionality. //! Access to this common functionality is provided in `OwnedFileDescriptorOrHandle`. #[cfg(not(windows))] use std::os::fd::{AsFd, OwnedFd}; #[cfg(windows)] use std::os::windows::io::{AsHandle, OwnedHandle}; use std::{ fs::{File, OpenOptions}, io, path::Path, process::Stdio, }; #[cfg(windows)] type NativeType = OwnedHandle; #[cfg(not(windows))] type NativeType = OwnedFd; /// abstraction wrapper for native file handle / file descriptor pub struct OwnedFileDescriptorOrHandle { fx: NativeType, } impl OwnedFileDescriptorOrHandle { /// create from underlying native type pub fn new(x: NativeType) -> Self { Self { fx: x } } /// create by opening a file pub fn open_file(options: &OpenOptions, path: &Path) -> io::Result { let f = options.open(path)?; Self::from(f) } /// conversion from borrowed native type /// /// e.g. `std::io::stdout()`, `std::fs::File`, ... #[cfg(windows)] pub fn from(t: T) -> io::Result { Ok(Self { fx: t.as_handle().try_clone_to_owned()?, }) } /// conversion from borrowed native type /// /// e.g. `std::io::stdout()`, `std::fs::File`, ... #[cfg(not(windows))] pub fn from(t: T) -> io::Result { Ok(Self { fx: t.as_fd().try_clone_to_owned()?, }) } /// instantiates a corresponding `File` pub fn into_file(self) -> File { File::from(self.fx) } /// instantiates a corresponding `Stdio` pub fn into_stdio(self) -> Stdio { Stdio::from(self.fx) } /// clones self. useful when needing another /// owned reference to same file pub fn try_clone(&self) -> io::Result { self.fx.try_clone().map(Self::new) } /// provides native type to be used with /// OS specific functions without abstraction pub fn as_raw(&self) -> &NativeType { &self.fx } } /// instantiates a corresponding `Stdio` impl From for Stdio { fn from(value: OwnedFileDescriptorOrHandle) -> Self { value.into_stdio() } } uucore-0.0.30/src/lib/mods/line_ending.rs000064400000000000000000000030731046102023000163310ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Provides consistent newline/zero terminator handling for `-z`/`--zero` flags. //! //! See the [`LineEnding`] struct for more information. use std::fmt::Display; /// Line ending of either `\n` or `\0` /// /// Used by various utilities that have the option to separate lines by nul /// characters instead of `\n`. Usually, this is specified with the `-z` or /// `--zero` flag. /// /// The [`Display`] implementation writes the character corresponding to the /// variant to the formatter. #[repr(u8)] #[derive(Clone, Copy, Debug, Default, PartialEq)] pub enum LineEnding { #[default] /// Newline character (`\n`) Newline = b'\n', /// Null character (`\0`) Nul = 0, } impl Display for LineEnding { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Newline => writeln!(f), Self::Nul => write!(f, "\0"), } } } impl From for u8 { fn from(line_ending: LineEnding) -> Self { line_ending as Self } } impl LineEnding { /// Create a [`LineEnding`] from a `-z`/`--zero` flag /// /// If `is_zero_terminated` is true, [`LineEnding::Nul`] is returned, /// otherwise [`LineEnding::Newline`]. pub fn from_zero_flag(is_zero_terminated: bool) -> Self { if is_zero_terminated { Self::Nul } else { Self::Newline } } } uucore-0.0.30/src/lib/mods/os.rs000064400000000000000000000022551046102023000145000ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (path) osrelease //! Test if the program is running under WSL //! ref: @@ /// Test if the program is running under WSL version 1 pub fn is_wsl_1() -> bool { #[cfg(target_os = "linux")] { if is_wsl_2() { return false; } if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { if let Ok(s) = std::str::from_utf8(&b) { let a = s.to_ascii_lowercase(); return a.contains("microsoft") || a.contains("wsl"); } } } false } /// Test if the program is running under WSL version 2 pub fn is_wsl_2() -> bool { #[cfg(target_os = "linux")] { if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { if let Ok(s) = std::str::from_utf8(&b) { let a = s.to_ascii_lowercase(); return a.contains("wsl2"); } } } false } uucore-0.0.30/src/lib/mods/panic.rs000064400000000000000000000027531046102023000151540ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Custom panic hooks that allow silencing certain types of errors. //! //! Use the [`mute_sigpipe_panic`] function to silence panics caused by //! broken pipe errors. This can happen when a process is still //! producing data when the consuming process terminates and closes the //! pipe. For example, //! //! ```sh //! $ seq inf | head -n 1 //! ``` //! use std::panic::{self, PanicHookInfo}; /// Decide whether a panic was caused by a broken pipe (SIGPIPE) error. fn is_broken_pipe(info: &PanicHookInfo) -> bool { if let Some(res) = info.payload().downcast_ref::() { if res.contains("BrokenPipe") || res.contains("Broken pipe") { return true; } } false } /// Terminate without error on panics that occur due to broken pipe errors. /// /// For background discussions on `SIGPIPE` handling, see /// /// * `` /// * `` /// * `` /// * `` /// * `` /// pub fn mute_sigpipe_panic() { let hook = panic::take_hook(); panic::set_hook(Box::new(move |info| { if !is_broken_pipe(info) { hook(info); } })); } uucore-0.0.30/src/lib/mods/posix.rs000064400000000000000000000035211046102023000152160ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) //! Iterate over lines, including the line ending character(s). //! //! This module provides the [`posix_version`] function, that returns //! Some(usize) if the `_POSIX2_VERSION` environment variable is defined //! and has value that can be parsed. //! Otherwise returns None, so the calling utility would assume default behavior. //! //! NOTE: GNU (as of v9.4) recognizes three distinct values for POSIX version //! //! Utilities that rely on this module: //! `sort` (TBD) //! `tail` (TBD) //! `touch` (TBD) //! `uniq` //! use std::env; /// '199209' for POSIX 1003.2-1992, which would define Obsolete mode pub const OBSOLETE: usize = 199_209; /// '200112' for POSIX 1003.1-2001, which is the minimum version for Traditional mode pub const TRADITIONAL: usize = 200_112; /// '200809' for POSIX 1003.1-2008, which is the minimum version for Modern mode pub const MODERN: usize = 200_809; /// Returns the value of the `_POSIX2_VERSION` environment variable if it is defined pub fn posix_version() -> Option { env::var("_POSIX2_VERSION") .ok() .and_then(|v| v.parse::().ok()) } #[cfg(test)] mod tests { use crate::posix::*; #[test] fn test_posix_version() { // default assert_eq!(posix_version(), None); // set specific version env::set_var("_POSIX2_VERSION", OBSOLETE.to_string()); assert_eq!(posix_version(), Some(OBSOLETE)); env::set_var("_POSIX2_VERSION", TRADITIONAL.to_string()); assert_eq!(posix_version(), Some(TRADITIONAL)); env::set_var("_POSIX2_VERSION", MODERN.to_string()); assert_eq!(posix_version(), Some(MODERN)); } } uucore-0.0.30/src/lib/mods.rs000064400000000000000000000005321046102023000140530ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // mods ~ cross-platforms modules (core/bundler file) pub mod display; pub mod error; pub mod io; pub mod line_ending; pub mod os; pub mod panic; pub mod posix; uucore-0.0.30/src/lib/parser/parse_glob.rs000064400000000000000000000076331046102023000165330ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. //! Parsing a glob Pattern from a string. //! //! Use the [`from_str`] function to parse a [`Pattern`] from a string. // cSpell:words fnmatch use glob::{Pattern, PatternError}; fn fix_negation(glob: &str) -> String { let mut chars = glob.chars().collect::>(); let mut i = 0; // Add 3 to prevent out of bounds in loop while i + 3 < chars.len() { if chars[i] == '[' && chars[i + 1] == '^' { match chars[i + 3..].iter().position(|x| *x == ']') { None => { // if closing square bracket not found, stop looking for it // again break; } Some(j) => { chars[i + 1] = '!'; i += j + 4; continue; } } } i += 1; } chars.into_iter().collect::() } /// Parse a glob Pattern from a string. /// /// This function amends the input string to replace any caret or circumflex /// character (^) used to negate a set of characters with an exclamation mark /// (!), which adapts rust's glob matching to function the way the GNU utils' /// fnmatch does. /// /// # Examples /// /// ```rust /// use std::time::Duration; /// use uucore::parse_glob::from_str; /// assert!(!from_str("[^abc]").unwrap().matches("a")); /// assert!(from_str("[^abc]").unwrap().matches("x")); /// ``` pub fn from_str(glob: &str) -> Result { Pattern::new(&fix_negation(glob)) } #[cfg(test)] mod tests { use super::*; #[test] fn test_from_str() { assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap()); } #[test] fn test_fix_negation() { // Happy/Simple case assert_eq!(fix_negation("[^abc]"), "[!abc]"); // Should fix negations in a long regex assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]"); // Should fix multiple negations in a regex assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]"); // Should fix negation of the single character ] assert_eq!(fix_negation("[^]]"), "[!]]"); // Should fix negation of the single character ^ assert_eq!(fix_negation("[^^]"), "[!^]"); // Should fix negation of the space character assert_eq!(fix_negation("[^ ]"), "[! ]"); // Complicated patterns assert_eq!(fix_negation("[^][]"), "[!][]"); assert_eq!(fix_negation("[^[]]"), "[![]]"); // More complex patterns that should be replaced assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]"); assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]"); assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]"); // test that we don't look for closing square brackets unnecessarily // Verifies issue #5584 let chars = "^[".repeat(174_571); assert_eq!(fix_negation(chars.as_str()), chars); } #[test] fn test_fix_negation_should_not_amend() { assert_eq!(fix_negation("abc"), "abc"); // Regex specifically matches either [ or ^ assert_eq!(fix_negation("[[^]"), "[[^]"); // Regex that specifically matches either space or ^ assert_eq!(fix_negation("[ ^]"), "[ ^]"); // Regex that specifically matches either [, space or ^ assert_eq!(fix_negation("[[ ^]"), "[[ ^]"); assert_eq!(fix_negation("[ [^]"), "[ [^]"); // Invalid globs (according to rust's glob implementation) will remain unamended assert_eq!(fix_negation("[^]"), "[^]"); assert_eq!(fix_negation("[^"), "[^"); assert_eq!(fix_negation("[][^]"), "[][^]"); // Issue #4479 assert_eq!(fix_negation("ààà[^"), "ààà[^"); } } uucore-0.0.30/src/lib/parser/parse_size.rs000064400000000000000000000653331046102023000165630ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (ToDO) hdsf ghead gtail ACDBK hexdigit //! Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). use std::error::Error; use std::fmt; #[cfg(target_os = "linux")] use std::io::BufRead; use std::num::{IntErrorKind, ParseIntError}; use crate::display::Quotable; /// Error arising from trying to compute system memory. enum SystemError { IOError, ParseError, NotFound, } impl From for SystemError { fn from(_: std::io::Error) -> Self { Self::IOError } } impl From for SystemError { fn from(_: ParseIntError) -> Self { Self::ParseError } } /// Get the total number of bytes of physical memory. /// /// The information is read from the `/proc/meminfo` file. /// /// # Errors /// /// If there is a problem reading the file or finding the appropriate /// entry in the file. #[cfg(target_os = "linux")] fn total_physical_memory() -> Result { // On Linux, the `/proc/meminfo` file has a table with information // about memory usage. For example, // // MemTotal: 7811500 kB // MemFree: 1487876 kB // MemAvailable: 3857232 kB // ... // // We just need to extract the number of `MemTotal` let table = std::fs::read("/proc/meminfo")?; for line in table.lines() { let line = line?; if line.starts_with("MemTotal:") && line.ends_with("kB") { let num_kilobytes: u128 = line[9..line.len() - 2].trim().parse()?; let num_bytes = 1024 * num_kilobytes; return Ok(num_bytes); } } Err(SystemError::NotFound) } /// Get the total number of bytes of physical memory. /// /// TODO Implement this for non-Linux systems. #[cfg(not(target_os = "linux"))] fn total_physical_memory() -> Result { Err(SystemError::NotFound) } /// Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). /// /// The [`Parser::parse`] function performs the parse. #[derive(Default)] pub struct Parser<'parser> { /// Whether to allow empty numeric strings. pub no_empty_numeric: bool, /// Whether to treat the suffix "B" as meaning "bytes". pub capital_b_bytes: bool, /// Whether to treat "b" as a "byte count" instead of "block" pub b_byte_count: bool, /// Whitelist for the suffix pub allow_list: Option<&'parser [&'parser str]>, /// Default unit when no suffix is provided pub default_unit: Option<&'parser str>, } enum NumberSystem { Decimal, Octal, Hexadecimal, } impl<'parser> Parser<'parser> { /// Change allow_list of the parser - whitelist for the suffix pub fn with_allow_list(&mut self, allow_list: &'parser [&str]) -> &mut Self { self.allow_list = Some(allow_list); self } /// Change default_unit of the parser - when no suffix is provided pub fn with_default_unit(&mut self, default_unit: &'parser str) -> &mut Self { self.default_unit = Some(default_unit); self } /// Change b_byte_count of the parser - to treat "b" as a "byte count" instead of "block" pub fn with_b_byte_count(&mut self, value: bool) -> &mut Self { self.b_byte_count = value; self } /// Change no_empty_numeric of the parser - to allow empty numeric strings pub fn with_allow_empty_numeric(&mut self, value: bool) -> &mut Self { self.no_empty_numeric = value; self } /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit /// may be K, M, G, T, P, E, Z, Y, R or Q (powers of 1024), or KB, MB, /// etc. (powers of 1000), or b which is 512. /// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. /// /// # Errors /// /// Will return `ParseSizeError` if it's not possible to parse this /// string into a number, e.g. if the string does not begin with a /// numeral, or if the unit is not one of the supported units described /// in the preceding section. /// /// # Examples /// /// ```rust /// use uucore::parse_size::Parser; /// let parser = Parser { /// default_unit: Some("M"), /// ..Default::default() /// }; /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123M")); // M is 1024^2 /// assert_eq!(Ok(123 * 1024 * 1024), parser.parse("123")); // default unit set to "M" on parser instance /// assert_eq!(Ok(9 * 1000), parser.parse("9kB")); // kB is 1000 /// assert_eq!(Ok(2 * 1024), parser.parse("2K")); // K is 1024 /// assert_eq!(Ok(44251 * 1024), parser.parse("0xACDBK")); // 0xACDB is 44251 in decimal /// ``` pub fn parse(&self, size: &str) -> Result { if size.is_empty() { return Err(ParseSizeError::parse_failure(size)); } let number_system = Self::determine_number_system(size); // Split the size argument into numeric and unit parts // For example, if the argument is "123K", the numeric part is "123", and // the unit is "K" let numeric_string: String = match number_system { NumberSystem::Hexadecimal => size .chars() .take(2) .chain(size.chars().skip(2).take_while(char::is_ascii_hexdigit)) .collect(), _ => size.chars().take_while(char::is_ascii_digit).collect(), }; let mut unit: &str = &size[numeric_string.len()..]; if let Some(default_unit) = self.default_unit { // Check if `unit` is empty then assigns `default_unit` to `unit` if unit.is_empty() { unit = default_unit; } } // Check if `b` is a byte count and remove `b` if self.b_byte_count && unit.ends_with('b') { // If `unit` = 'b' then return error if numeric_string.is_empty() { return Err(ParseSizeError::parse_failure(size)); } unit = &unit[0..unit.len() - 1]; } if let Some(allow_list) = self.allow_list { // Check if `unit` appears in `allow_list`, if not return error if !allow_list.contains(&unit) && !unit.is_empty() { if numeric_string.is_empty() { return Err(ParseSizeError::parse_failure(size)); } return Err(ParseSizeError::invalid_suffix(size)); } } // Special case: for percentage, just compute the given fraction // of the total physical memory on the machine, if possible. if unit == "%" { let number: u128 = Self::parse_number(&numeric_string, 10, size)?; return match total_physical_memory() { Ok(total) => Ok((number / 100) * total), Err(_) => Err(ParseSizeError::PhysicalMem(size.to_string())), }; } // Compute the factor the unit represents. // empty string means the factor is 1. // // The lowercase "b" (used by `od`, `head`, `tail`, etc.) means // "block" and the Posix block size is 512. The uppercase "B" // means "byte". let (base, exponent): (u128, u32) = match unit { "" => (1, 0), "B" if self.capital_b_bytes => (1, 0), "b" => (512, 1), "KiB" | "kiB" | "K" | "k" => (1024, 1), "MiB" | "miB" | "M" | "m" => (1024, 2), "GiB" | "giB" | "G" | "g" => (1024, 3), "TiB" | "tiB" | "T" | "t" => (1024, 4), "PiB" | "piB" | "P" | "p" => (1024, 5), "EiB" | "eiB" | "E" | "e" => (1024, 6), "ZiB" | "ziB" | "Z" | "z" => (1024, 7), "YiB" | "yiB" | "Y" | "y" => (1024, 8), "RiB" | "riB" | "R" | "r" => (1024, 9), "QiB" | "qiB" | "Q" | "q" => (1024, 10), "KB" | "kB" => (1000, 1), "MB" | "mB" => (1000, 2), "GB" | "gB" => (1000, 3), "TB" | "tB" => (1000, 4), "PB" | "pB" => (1000, 5), "EB" | "eB" => (1000, 6), "ZB" | "zB" => (1000, 7), "YB" | "yB" => (1000, 8), "RB" | "rB" => (1000, 9), "QB" | "qB" => (1000, 10), _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), _ => return Err(ParseSizeError::invalid_suffix(size)), }; let factor = base.pow(exponent); // parse string into u128 let number: u128 = match number_system { NumberSystem::Decimal => { if numeric_string.is_empty() && !self.no_empty_numeric { 1 } else { Self::parse_number(&numeric_string, 10, size)? } } NumberSystem::Octal => { let trimmed_string = numeric_string.trim_start_matches('0'); Self::parse_number(trimmed_string, 8, size)? } NumberSystem::Hexadecimal => { let trimmed_string = numeric_string.trim_start_matches("0x"); Self::parse_number(trimmed_string, 16, size)? } }; number .checked_mul(factor) .ok_or_else(|| ParseSizeError::size_too_big(size)) } /// Explicit u128 alias for `parse()` pub fn parse_u128(&self, size: &str) -> Result { self.parse(size) } /// Same as `parse()` but tries to return u64 pub fn parse_u64(&self, size: &str) -> Result { self.parse(size).and_then(|num_u128| { u64::try_from(num_u128).map_err(|_| ParseSizeError::size_too_big(size)) }) } /// Same as `parse_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality /// and GNU test suite checks this behavior for some utils (`split` for example) pub fn parse_u64_max(&self, size: &str) -> Result { let result = self.parse_u64(size); match result { Ok(_) => result, Err(error) => { if let ParseSizeError::SizeTooBig(_) = error { Ok(u64::MAX) } else { Err(error) } } } } /// Same as `parse_u64_max()`, except for u128, i.e. returns `u128::MAX` on overflow pub fn parse_u128_max(&self, size: &str) -> Result { let result = self.parse_u128(size); match result { Ok(_) => result, Err(error) => { if let ParseSizeError::SizeTooBig(_) = error { Ok(u128::MAX) } else { Err(error) } } } } fn determine_number_system(size: &str) -> NumberSystem { if size.len() <= 1 { return NumberSystem::Decimal; } if size.starts_with("0x") { return NumberSystem::Hexadecimal; } let num_digits: usize = size .chars() .take_while(char::is_ascii_digit) .collect::() .len(); let all_zeros = size.chars().all(|c| c == '0'); if size.starts_with('0') && num_digits > 1 && !all_zeros { return NumberSystem::Octal; } NumberSystem::Decimal } fn parse_number( numeric_string: &str, radix: u32, original_size: &str, ) -> Result { u128::from_str_radix(numeric_string, radix).map_err(|e| match e.kind() { IntErrorKind::PosOverflow => ParseSizeError::size_too_big(original_size), _ => ParseSizeError::ParseFailure(original_size.to_string()), }) } } /// Parse a size string into a number of bytes /// using Default Parser (no custom settings) /// /// # Examples /// /// ```rust /// use uucore::parse_size::parse_size_u128; /// assert_eq!(Ok(123), parse_size_u128("123")); /// assert_eq!(Ok(9 * 1000), parse_size_u128("9kB")); // kB is 1000 /// assert_eq!(Ok(2 * 1024), parse_size_u128("2K")); // K is 1024 /// assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); /// ``` pub fn parse_size_u128(size: &str) -> Result { Parser::default().parse(size) } /// Same as `parse_size_u128()`, but for u64 pub fn parse_size_u64(size: &str) -> Result { Parser::default().parse_u64(size) } /// Same as `parse_size_u64()` - deprecated #[deprecated = "Please use parse_size_u64(size: &str) -> Result OR parse_size_u128(size: &str) -> Result instead."] pub fn parse_size(size: &str) -> Result { parse_size_u64(size) } /// Same as `parse_size_u64()`, except returns `u64::MAX` on overflow /// GNU lib/coreutils include similar functionality /// and GNU test suite checks this behavior for some utils pub fn parse_size_u64_max(size: &str) -> Result { Parser::default().parse_u64_max(size) } /// Same as `parse_size_u128()`, except returns `u128::MAX` on overflow pub fn parse_size_u128_max(size: &str) -> Result { Parser::default().parse_u128_max(size) } /// Error type for parse_size #[derive(Debug, PartialEq, Eq)] pub enum ParseSizeError { /// Suffix InvalidSuffix(String), /// Syntax ParseFailure(String), /// Overflow SizeTooBig(String), /// Could not determine total physical memory size. PhysicalMem(String), } impl Error for ParseSizeError { fn description(&self) -> &str { match *self { Self::InvalidSuffix(ref s) => s, Self::ParseFailure(ref s) => s, Self::SizeTooBig(ref s) => s, Self::PhysicalMem(ref s) => s, } } } impl fmt::Display for ParseSizeError { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { let s = match self { Self::InvalidSuffix(s) | Self::ParseFailure(s) | Self::SizeTooBig(s) | Self::PhysicalMem(s) => s, }; write!(f, "{s}") } } // FIXME: It's more idiomatic to move the formatting into the Display impl, // but there's a lot of downstream code that constructs these errors manually // that would be affected impl ParseSizeError { fn invalid_suffix(s: &str) -> Self { Self::InvalidSuffix(format!("{}", s.quote())) } fn parse_failure(s: &str) -> Self { // stderr on linux (GNU coreutils 8.32) (LC_ALL=C) // has to be handled in the respective uutils because strings differ, e.g.: // // `NUM` // head: invalid number of bytes: '1fb' // tail: invalid number of bytes: '1fb' // // `SIZE` // split: invalid number of bytes: '1fb' // truncate: Invalid number: '1fb' // // `MODE` // stdbuf: invalid mode '1fb' // // `SIZE` // sort: invalid suffix in --buffer-size argument '1fb' // sort: invalid --buffer-size argument 'fb' // // `SIZE` // du: invalid suffix in --buffer-size argument '1fb' // du: invalid suffix in --threshold argument '1fb' // du: invalid --buffer-size argument 'fb' // du: invalid --threshold argument 'fb' // // `BYTES` // od: invalid suffix in --read-bytes argument '1fb' // od: invalid --read-bytes argument argument 'fb' // --skip-bytes // --width // --strings // etc. Self::ParseFailure(format!("{}", s.quote())) } fn size_too_big(s: &str) -> Self { // stderr on linux (GNU coreutils 8.32) (LC_ALL=C) // has to be handled in the respective uutils because strings differ, e.g.: // // head: invalid number of bytes: '1Y': Value too large for defined data type // tail: invalid number of bytes: '1Y': Value too large for defined data type // split: invalid number of bytes: '1Y': Value too large for defined data type // truncate: Invalid number: '1Y': Value too large for defined data type // stdbuf: invalid mode '1Y': Value too large for defined data type // sort: -S argument '1Y' too large // du: -B argument '1Y' too large // od: -N argument '1Y' too large // etc. // // stderr on macos (brew - GNU coreutils 8.32) also differs for the same version, e.g.: // ghead: invalid number of bytes: '1Y': Value too large to be stored in data type // gtail: invalid number of bytes: '1Y': Value too large to be stored in data type Self::SizeTooBig(format!( "{}: Value too large for defined data type", s.quote() )) } } #[cfg(test)] mod tests { use super::*; fn variant_eq(a: &ParseSizeError, b: &ParseSizeError) -> bool { std::mem::discriminant(a) == std::mem::discriminant(b) } #[test] fn all_suffixes() { // Units are K,M,G,T,P,E,Z,Y,R,Q (powers of 1024) or KB,MB,... (powers of 1000). // Binary prefixes can be used, too: KiB=K, MiB=M, and so on. let suffixes = [ ('K', 1u32), ('M', 2u32), ('G', 3u32), ('T', 4u32), ('P', 5u32), ('E', 6u32), ('Z', 7u32), ('Y', 8u32), ('R', 9u32), ('Q', 10u32), ]; for &(c, exp) in &suffixes { let s = format!("2{c}B"); // KB assert_eq!(Ok(2 * (1000_u128).pow(exp)), parse_size_u128(&s)); let s = format!("2{c}"); // K assert_eq!(Ok(2 * (1024_u128).pow(exp)), parse_size_u128(&s)); let s = format!("2{c}iB"); // KiB assert_eq!(Ok(2 * (1024_u128).pow(exp)), parse_size_u128(&s)); let s = format!("2{}iB", c.to_lowercase()); // kiB assert_eq!(Ok(2 * (1024_u128).pow(exp)), parse_size_u128(&s)); // suffix only let s = format!("{c}B"); // KB assert_eq!(Ok((1000_u128).pow(exp)), parse_size_u128(&s)); let s = format!("{c}"); // K assert_eq!(Ok((1024_u128).pow(exp)), parse_size_u128(&s)); let s = format!("{c}iB"); // KiB assert_eq!(Ok((1024_u128).pow(exp)), parse_size_u128(&s)); let s = format!("{}iB", c.to_lowercase()); // kiB assert_eq!(Ok((1024_u128).pow(exp)), parse_size_u128(&s)); } } #[test] fn overflow_x64() { assert!(parse_size_u64("10000000000000000000000").is_err()); assert!(parse_size_u64("1000000000T").is_err()); assert!(parse_size_u64("100000P").is_err()); assert!(parse_size_u64("100E").is_err()); assert!(parse_size_u64("1Z").is_err()); assert!(parse_size_u64("1Y").is_err()); assert!(parse_size_u64("1R").is_err()); assert!(parse_size_u64("1Q").is_err()); assert!(variant_eq( &parse_size_u64("1Z").unwrap_err(), &ParseSizeError::SizeTooBig(String::new()) )); assert_eq!( ParseSizeError::SizeTooBig("'1Y': Value too large for defined data type".to_string()), parse_size_u64("1Y").unwrap_err() ); assert_eq!( ParseSizeError::SizeTooBig("'1R': Value too large for defined data type".to_string()), parse_size_u64("1R").unwrap_err() ); assert_eq!( ParseSizeError::SizeTooBig("'1Q': Value too large for defined data type".to_string()), parse_size_u64("1Q").unwrap_err() ); } #[test] fn overflow_to_max_u64() { assert_eq!(Ok(1_099_511_627_776), parse_size_u64_max("1T")); assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64_max("1P")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("18446744073709551616")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("10000000000000000000000")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Y")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1R")); assert_eq!(Ok(u64::MAX), parse_size_u64_max("1Q")); } #[test] fn overflow_to_max_u128() { assert_eq!( Ok(12_379_400_392_853_802_748_991_242_240), parse_size_u128_max("10R") ); assert_eq!( Ok(12_676_506_002_282_294_014_967_032_053_760), parse_size_u128_max("10Q") ); assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000000R")); assert_eq!(Ok(u128::MAX), parse_size_u128_max("1000000000Q")); } #[test] fn invalid_suffix() { let test_strings = ["5mib", "1eb", "1H"]; for &test_string in &test_strings { assert_eq!( parse_size_u64(test_string).unwrap_err(), ParseSizeError::InvalidSuffix(format!("{}", test_string.quote())) ); } } #[test] fn invalid_syntax() { let test_strings = ["biB", "-", "+", "", "-1", "∞"]; for &test_string in &test_strings { assert_eq!( parse_size_u64(test_string).unwrap_err(), ParseSizeError::ParseFailure(format!("{}", test_string.quote())) ); } } #[test] fn b_suffix() { assert_eq!(Ok(3 * 512), parse_size_u64("3b")); // b is 512 } #[test] fn no_suffix() { assert_eq!(Ok(1234), parse_size_u64("1234")); assert_eq!(Ok(0), parse_size_u64("0")); assert_eq!(Ok(5), parse_size_u64("5")); assert_eq!(Ok(999), parse_size_u64("999")); } #[test] fn kilobytes_suffix() { assert_eq!(Ok(123 * 1000), parse_size_u64("123KB")); // KB is 1000 assert_eq!(Ok(9 * 1000), parse_size_u64("9kB")); // kB is 1000 assert_eq!(Ok(2 * 1024), parse_size_u64("2K")); // K is 1024 assert_eq!(Ok(0), parse_size_u64("0K")); assert_eq!(Ok(0), parse_size_u64("0KB")); assert_eq!(Ok(1000), parse_size_u64("KB")); assert_eq!(Ok(1024), parse_size_u64("K")); assert_eq!(Ok(2000), parse_size_u64("2kB")); assert_eq!(Ok(4000), parse_size_u64("4KB")); } #[test] fn megabytes_suffix() { assert_eq!(Ok(123 * 1024 * 1024), parse_size_u64("123M")); assert_eq!(Ok(123 * 1000 * 1000), parse_size_u64("123MB")); assert_eq!(Ok(1024 * 1024), parse_size_u64("M")); assert_eq!(Ok(1000 * 1000), parse_size_u64("MB")); assert_eq!(Ok(2 * 1_048_576), parse_size_u64("2m")); assert_eq!(Ok(4 * 1_048_576), parse_size_u64("4M")); assert_eq!(Ok(2_000_000), parse_size_u64("2mB")); assert_eq!(Ok(4_000_000), parse_size_u64("4MB")); } #[test] fn gigabytes_suffix() { assert_eq!(Ok(1_073_741_824), parse_size_u64("1G")); assert_eq!(Ok(2_000_000_000), parse_size_u64("2GB")); } #[test] #[cfg(target_pointer_width = "64")] fn x64() { assert_eq!(Ok(1_099_511_627_776), parse_size_u64("1T")); assert_eq!(Ok(1_125_899_906_842_624), parse_size_u64("1P")); assert_eq!(Ok(1_152_921_504_606_846_976), parse_size_u64("1E")); assert_eq!(Ok(1_180_591_620_717_411_303_424), parse_size_u128("1Z")); assert_eq!(Ok(1_208_925_819_614_629_174_706_176), parse_size_u128("1Y")); assert_eq!( Ok(1_237_940_039_285_380_274_899_124_224), parse_size_u128("1R") ); assert_eq!( Ok(1_267_650_600_228_229_401_496_703_205_376), parse_size_u128("1Q") ); assert_eq!(Ok(2_000_000_000_000), parse_size_u64("2TB")); assert_eq!(Ok(2_000_000_000_000_000), parse_size_u64("2PB")); assert_eq!(Ok(2_000_000_000_000_000_000), parse_size_u64("2EB")); assert_eq!(Ok(2_000_000_000_000_000_000_000), parse_size_u128("2ZB")); assert_eq!( Ok(2_000_000_000_000_000_000_000_000), parse_size_u128("2YB") ); assert_eq!( Ok(2_000_000_000_000_000_000_000_000_000), parse_size_u128("2RB") ); assert_eq!( Ok(2_000_000_000_000_000_000_000_000_000_000), parse_size_u128("2QB") ); } #[test] fn parse_size_options() { let mut parser = Parser::default(); parser .with_allow_list(&["k", "K", "G", "MB", "M"]) .with_default_unit("K"); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); assert_eq!(Ok(1000 * 1000), parser.parse("1MB")); assert_eq!(Ok(1024 * 1024), parser.parse("1M")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1G")); assert!(parser.parse("1T").is_err()); assert!(parser.parse("1P").is_err()); assert!(parser.parse("1E").is_err()); parser .with_allow_list(&[ "b", "k", "K", "m", "M", "MB", "g", "G", "t", "T", "P", "E", "Z", "Y", "R", "Q", ]) .with_default_unit("K") .with_b_byte_count(true); assert_eq!(Ok(1024), parser.parse("1")); assert_eq!(Ok(2 * 1024), parser.parse("2")); assert_eq!(Ok(1000 * 1000), parser.parse("1MB")); assert_eq!(Ok(1024 * 1024), parser.parse("1M")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1G")); assert_eq!( Ok(1_237_940_039_285_380_274_899_124_224), parser.parse_u128("1R") ); assert_eq!( Ok(1_267_650_600_228_229_401_496_703_205_376), parser.parse_u128("1Q") ); assert_eq!(Ok(1), parser.parse("1b")); assert_eq!(Ok(1024), parser.parse("1024b")); assert_eq!(Ok(1024 * 1024 * 1024), parser.parse("1024Mb")); assert!(parser.parse("b").is_err()); assert!(parser.parse("1B").is_err()); assert!(parser.parse("B").is_err()); } #[test] fn parse_octal_size() { assert_eq!(Ok(63), parse_size_u64("077")); assert_eq!(Ok(528), parse_size_u64("01020")); assert_eq!(Ok(668 * 1024), parse_size_u128("01234K")); } #[test] fn parse_hex_size() { assert_eq!(Ok(10), parse_size_u64("0xA")); assert_eq!(Ok(94722), parse_size_u64("0x17202")); assert_eq!(Ok(44251 * 1024), parse_size_u128("0xACDBK")); } #[test] #[cfg(target_os = "linux")] fn parse_percent() { assert!(parse_size_u64("0%").is_ok()); assert!(parse_size_u64("50%").is_ok()); assert!(parse_size_u64("100%").is_ok()); assert!(parse_size_u64("100000%").is_ok()); assert!(parse_size_u64("-1%").is_err()); assert!(parse_size_u64("1.0%").is_err()); assert!(parse_size_u64("0x1%").is_err()); } } uucore-0.0.30/src/lib/parser/parse_time.rs000064400000000000000000000075351046102023000165470ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore (vars) NANOS numstr //! Parsing a duration from a string. //! //! Use the [`from_str`] function to parse a [`Duration`] from a string. use std::time::Duration; use crate::display::Quotable; /// Parse a duration from a string. /// /// The string may contain only a number, like "123" or "4.5", or it /// may contain a number with a unit specifier, like "123s" meaning /// one hundred twenty three seconds or "4.5d" meaning four and a half /// days. If no unit is specified, the unit is assumed to be seconds. /// /// The only allowed suffixes are /// /// * "s" for seconds, /// * "m" for minutes, /// * "h" for hours, /// * "d" for days. /// /// This function uses [`Duration::saturating_mul`] to compute the /// number of seconds, so it does not overflow. If overflow would have /// occurred, [`Duration::MAX`] is returned instead. /// /// # Errors /// /// This function returns an error if the input string is empty, the /// input is not a valid number, or the unit specifier is invalid or /// unknown. /// /// # Examples /// /// ```rust /// use std::time::Duration; /// use uucore::parse_time::from_str; /// assert_eq!(from_str("123"), Ok(Duration::from_secs(123))); /// assert_eq!(from_str("2d"), Ok(Duration::from_secs(60 * 60 * 24 * 2))); /// ``` pub fn from_str(string: &str) -> Result { let len = string.len(); if len == 0 { return Err("empty string".to_owned()); } let Some(slice) = string.get(..len - 1) else { return Err(format!("invalid time interval {}", string.quote())); }; let (numstr, times) = match string.chars().next_back().unwrap() { 's' => (slice, 1), 'm' => (slice, 60), 'h' => (slice, 60 * 60), 'd' => (slice, 60 * 60 * 24), val if !val.is_alphabetic() => (string, 1), _ => { if string == "inf" || string == "infinity" { ("inf", 1) } else { return Err(format!("invalid time interval {}", string.quote())); } } }; let num = numstr .parse::() .map_err(|e| format!("invalid time interval {}: {}", string.quote(), e))?; if num < 0. { return Err(format!("invalid time interval {}", string.quote())); } const NANOS_PER_SEC: u32 = 1_000_000_000; let whole_secs = num.trunc(); let nanos = (num.fract() * (NANOS_PER_SEC as f64)).trunc(); let duration = Duration::new(whole_secs as u64, nanos as u32); Ok(duration.saturating_mul(times)) } #[cfg(test)] mod tests { use crate::parse_time::from_str; use std::time::Duration; #[test] fn test_no_units() { assert_eq!(from_str("123"), Ok(Duration::from_secs(123))); } #[test] fn test_units() { assert_eq!(from_str("2d"), Ok(Duration::from_secs(60 * 60 * 24 * 2))); } #[test] fn test_saturating_mul() { assert_eq!(from_str("9223372036854775808d"), Ok(Duration::MAX)); } #[test] fn test_error_empty() { assert!(from_str("").is_err()); } #[test] fn test_error_invalid_unit() { assert!(from_str("123X").is_err()); } #[test] fn test_error_multi_bytes_characters() { assert!(from_str("10€").is_err()); } #[test] fn test_error_invalid_magnitude() { assert!(from_str("12abc3s").is_err()); } #[test] fn test_negative() { assert!(from_str("-1").is_err()); } /// Test that capital letters are not allowed in suffixes. #[test] fn test_no_capital_letters() { assert!(from_str("1S").is_err()); assert!(from_str("1M").is_err()); assert!(from_str("1H").is_err()); assert!(from_str("1D").is_err()); } } uucore-0.0.30/src/lib/parser/shortcut_value_parser.rs000064400000000000000000000173161046102023000210400ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore abcdefgh abef Strs //! A parser that accepts shortcuts for values. //! `ShortcutValueParser` is similar to clap's `PossibleValuesParser` use clap::{ builder::{PossibleValue, TypedValueParser}, error::{ContextKind, ContextValue, ErrorKind}, }; /// A parser that accepts shortcuts for values. #[derive(Clone)] pub struct ShortcutValueParser(Vec); /// `ShortcutValueParser` is similar to clap's `PossibleValuesParser`: it verifies that the value is /// from an enumerated set of `PossibleValue`. /// /// Whereas `PossibleValuesParser` only accepts exact matches, `ShortcutValueParser` also accepts /// shortcuts as long as they are unambiguous. impl ShortcutValueParser { /// Create a new `ShortcutValueParser` from a list of `PossibleValue`. pub fn new(values: impl Into) -> Self { values.into() } fn generate_clap_error( &self, cmd: &clap::Command, arg: Option<&clap::Arg>, value: &str, possible_values: &[&PossibleValue], ) -> clap::Error { let mut err = clap::Error::new(ErrorKind::InvalidValue).with_cmd(cmd); if let Some(arg) = arg { err.insert( ContextKind::InvalidArg, ContextValue::String(arg.to_string()), ); } err.insert( ContextKind::InvalidValue, ContextValue::String(value.to_string()), ); err.insert( ContextKind::ValidValue, ContextValue::Strings(self.0.iter().map(|x| x.get_name().to_string()).collect()), ); // if `possible_values` is not empty then that means this error is because of an ambiguous value. if !possible_values.is_empty() { add_ambiguous_value_tip(possible_values, &mut err, value); } err } } /// Adds a suggestion when error is because of ambiguous values based on the provided possible values. fn add_ambiguous_value_tip( possible_values: &[&PossibleValue], err: &mut clap::error::Error, value: &str, ) { let mut formatted_possible_values = String::new(); for (i, s) in possible_values.iter().enumerate() { formatted_possible_values.push_str(&format!("'{}'", s.get_name())); if i < possible_values.len() - 2 { formatted_possible_values.push_str(", "); } else if i < possible_values.len() - 1 { formatted_possible_values.push_str(" or "); } } err.insert( ContextKind::Suggested, ContextValue::StyledStrs(vec![format!( "It looks like '{value}' could match several values. Did you mean {formatted_possible_values}?" ) .into()]), ); } impl TypedValueParser for ShortcutValueParser { type Value = String; fn parse_ref( &self, cmd: &clap::Command, arg: Option<&clap::Arg>, value: &std::ffi::OsStr, ) -> Result { let value = value .to_str() .ok_or(clap::Error::new(ErrorKind::InvalidUtf8))?; let matched_values: Vec<_> = self .0 .iter() .filter(|x| x.get_name_and_aliases().any(|name| name.starts_with(value))) .collect(); match matched_values.len() { 0 => Err(self.generate_clap_error(cmd, arg, value, &[])), 1 => Ok(matched_values[0].get_name().to_string()), _ => { if let Some(direct_match) = matched_values.iter().find(|x| x.get_name() == value) { Ok(direct_match.get_name().to_string()) } else { Err(self.generate_clap_error(cmd, arg, value, &matched_values)) } } } } fn possible_values(&self) -> Option + '_>> { Some(Box::new(self.0.iter().cloned())) } } impl From for ShortcutValueParser where I: IntoIterator, T: Into, { fn from(values: I) -> Self { Self(values.into_iter().map(|t| t.into()).collect()) } } #[cfg(test)] mod tests { use std::ffi::OsStr; use clap::{builder::PossibleValue, builder::TypedValueParser, error::ErrorKind, Command}; use super::ShortcutValueParser; #[test] fn test_parse_ref() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd"]); let values = ["a", "ab", "abc", "abcd"]; for value in values { let result = parser.parse_ref(&cmd, None, OsStr::new(value)); assert_eq!("abcd", result.unwrap()); } } #[test] fn test_parse_ref_with_invalid_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd"]); let invalid_values = ["e", "abe", "abcde"]; for invalid_value in invalid_values { let result = parser.parse_ref(&cmd, None, OsStr::new(invalid_value)); assert_eq!(ErrorKind::InvalidValue, result.unwrap_err().kind()); } } #[test] fn test_parse_ref_with_ambiguous_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd", "abef"]); let ambiguous_values = ["a", "ab"]; for ambiguous_value in ambiguous_values { let result = parser.parse_ref(&cmd, None, OsStr::new(ambiguous_value)); assert_eq!(ErrorKind::InvalidValue, result.as_ref().unwrap_err().kind()); assert!(result.unwrap_err().to_string().contains(&format!( "It looks like '{ambiguous_value}' could match several values. Did you mean 'abcd' or 'abef'?" ))); } let result = parser.parse_ref(&cmd, None, OsStr::new("abc")); assert_eq!("abcd", result.unwrap()); let result = parser.parse_ref(&cmd, None, OsStr::new("abe")); assert_eq!("abef", result.unwrap()); } #[test] fn test_parse_ref_with_ambiguous_value_that_is_a_possible_value() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new(["abcd", "abcdefgh"]); let result = parser.parse_ref(&cmd, None, OsStr::new("abcd")); assert_eq!("abcd", result.unwrap()); } #[test] #[cfg(unix)] fn test_parse_ref_with_invalid_utf8() { use std::os::unix::prelude::OsStrExt; let parser = ShortcutValueParser::new(["abcd"]); let cmd = Command::new("cmd"); let result = parser.parse_ref(&cmd, None, OsStr::from_bytes(&[0xc3, 0x28])); assert_eq!(ErrorKind::InvalidUtf8, result.unwrap_err().kind()); } #[test] fn test_ambiguous_word_same_meaning() { let cmd = Command::new("cmd"); let parser = ShortcutValueParser::new([ PossibleValue::new("atime").alias("access"), "status".into(), ]); // Even though "a" is ambiguous (it might mean "atime" or "access"), // the meaning is uniquely defined, therefore accept it. let atime_values = [ // spell-checker:disable-next-line "atime", "atim", "at", "a", "access", "acces", "acce", "acc", "ac", ]; // spell-checker:disable-next-line let status_values = ["status", "statu", "stat", "sta", "st", "st"]; for value in atime_values { let result = parser.parse_ref(&cmd, None, OsStr::new(value)); assert_eq!("atime", result.unwrap()); } for value in status_values { let result = parser.parse_ref(&cmd, None, OsStr::new(value)); assert_eq!("status", result.unwrap()); } } } uucore-0.0.30/src/lib/parser.rs000064400000000000000000000004231046102023000144040ustar 00000000000000// This file is part of the uutils coreutils package. // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. pub mod parse_glob; pub mod parse_size; pub mod parse_time; pub mod shortcut_value_parser;