os_display-0.1.3/.cargo_vcs_info.json0000644000000001360000000000100132030ustar { "git": { "sha1": "ad75962fade411cf00890cc2a11a1f16c6e388ca" }, "path_in_vcs": "" }os_display-0.1.3/CHANGELOG.md000064400000000000000000000007420072674642500136370ustar 00000000000000## v0.1.3 (2021-01-22) - Add `Quoted::external()` to escape double quotes for native commands on Windows. - Quote `U+2800 BRAILLE PATTERN BLANK` for clarity. ## v0.1.2 (2021-11-08) - Escape dangerous control codes for bidirectional text. See also: [CVE-2021-42574](https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html). ## v0.1.1 (2021-10-14) - Escape unicode control characters like `U+0085 NEXT LINE` and `U+2028 LINE SEPARATOR`. ## v0.1.0 (2021-10-13) - Initial release. os_display-0.1.3/Cargo.lock0000644000000010170000000000100111550ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] name = "os_display" version = "0.1.3" dependencies = [ "unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] "checksum unicode-width 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" os_display-0.1.3/Cargo.toml0000644000000022110000000000100111750ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "os_display" version = "0.1.3" authors = ["Jan Verbeek "] exclude = ["fuzz", ".gitignore", ".github", "scripts"] description = "Display strings in a safe platform-appropriate way" documentation = "https://docs.rs/os_display" readme = "README.md" keywords = ["shell", "cli", "terminal", "text", "no_std"] categories = ["command-line-interface", "text-processing"] license = "MIT" repository = "https://github.com/blyxxyz/os_display" [package.metadata.docs.rs] all-features = true [dependencies.unicode-width] version = "0.1.9" [features] alloc = [] default = ["native", "alloc", "std"] native = [] std = ["alloc"] unix = [] windows = [] os_display-0.1.3/Cargo.toml.orig000064400000000000000000000015530072674642500147160ustar 00000000000000[package] name = "os_display" version = "0.1.3" authors = ["Jan Verbeek "] description = "Display strings in a safe platform-appropriate way" keywords = ["shell", "cli", "terminal", "text", "no_std"] categories = ["command-line-interface", "text-processing"] repository = "https://github.com/blyxxyz/os_display" documentation = "https://docs.rs/os_display" license = "MIT" readme = "README.md" edition = "2018" exclude = ["fuzz", ".gitignore", ".github", "scripts"] [dependencies] unicode-width = "0.1.9" [features] default = ["native", "alloc", "std"] # Enable quoting for OsStr and in the style of the current platform native = [] # Enable bash/ksh-style quoting unix = [] # Enable PowerShell-style quoting windows = [] # Use the relevant parts of the standard library alloc = [] std = ["alloc"] [package.metadata.docs.rs] all-features = true os_display-0.1.3/LICENSE000064400000000000000000000020370072674642500130320ustar 00000000000000Copyright (c) 2021 Jan Verbeek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. os_display-0.1.3/README.md000064400000000000000000000171270072674642500133120ustar 00000000000000# `os_display` [![Crates.io](https://img.shields.io/crates/v/os_display.svg)](https://crates.io/crates/os_display) [![API reference](https://docs.rs/os_display/badge.svg)](https://docs.rs/os_display/) [![MSRV](https://img.shields.io/badge/MSRV-1.31-blue)](https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html) [![CI](https://img.shields.io/github/workflow/status/blyxxyz/os_display/CI/master)](https://github.com/blyxxyz/os_display/actions) Printing strings can be tricky. They may contain control codes that mess up the message or the whole terminal. On Unix even filenames can contain characters like that. Filenames may also contain invalid unicode, which is not preserved by [`Path::display`](https://doc.rust-lang.org/std/path/struct.Path.html#method.display). Finally, they can contain special characters that aren't safe to use in a command without quoting or escaping. This library lets you add quoting to filenames (and other strings) to display them more safely and usefully. The goal is to render them in such a way that they can be copied and pasted back into a shell without information loss. On Unix (and other platforms) values are quoted using bash/ksh syntax, while on Windows PowerShell syntax is used. ## When should I use this? This library is best suited for command line programs that deal with arbitrary filenames or other "dirty" text. `mv` for example is the very tool you use to rename files with problematic names, so it's nice if its messages handle them well. Programs that aren't expected to deal with weird data don't get as much benefit. The output is made for shells, so displaying it in e.g. a GUI may not make sense. Most programs get along fine without this. You likely don't strictly need it, but you may find it a nice improvement. ## Usage Import the `Quotable` trait: ```rust use os_display::Quotable; ``` This adds two methods to the common string types (including `OsStr`): `.quote()` and `.maybe_quote()`. They return `Quoted`, a wrapper with a custom `Display` implementation. `.quote()` always puts quotes around the text: ```rust // Found file 'filename' println!("Found file {}", "filename".quote()); // Found file "foo'bar" println!("Found file {}", "foo'bar".quote()); // Unix: Found file $'foo\nbar' // Windows: Found file "foo`nbar" println!("Found file {}", "foo\nbar".quote()); ``` `.maybe_quote()` only adds them if necessary because of whitespace or special characters: ```rust // filename: Not found println!("{}: Not found", "filename".maybe_quote()); // 'foo bar': Not found println!("{}: Not found", "foo bar".maybe_quote()); // '*?$': Not found println!("{}: Not found", "*?$".maybe_quote()); ``` `.quote()` is best used inside longer sentences while `.maybe_quote()` can be used for text that's already separated some other way (like by a colon). ## Limitations - Unicode may be quoted but only control characters are escaped. The printed text can still look weird, and a few (buggy) terminals drop certain characters. - This library should **not** be used to interpolate text into shell scripts. It's designed for readability, not absolute safety. Consider using the [`shell-escape`](https://crates.io/crates/shell-escape) crate instead (or ideally, passing in the values in some other way). - The output is not compatible with every single shell. - [PowerShell treats quotes differently in arguments to external commands](https://stackoverflow.com/questions/6714165). This library defaults to quoting for internal commandlets, which may not be what you want. The `Quoted::external()` method toggles this. - I'm not a Unicode expert. The first release of this crate had multiple oversights and there may be more. ## Invalid unicode On Unix: ```rust use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; // \xFF makes this invalid UTF-8, so to_str() would fail let bad_string = OsStr::from_bytes(&[b'x', 0xFF, b'y']); assert_eq!(bad_string.quote().to_string(), r#"$'x\xFFy'"#); ``` On Windows: ```rust use std::ffi::OsString; use std::os::windows::ffi::OsStringExt; // 0xD800 is an unpaired surrogate, making this invalid UTF-16 let bad_string = OsString::from_wide(&[b'a' as u16, 0xD800, b'b' as u16]); assert_eq!(bad_string.quote().to_string(), r#""a`u{D800}b""#); ``` ## Zero-width unicode Some codepoints are zero-width. They can make a string invisible, or they can make it hard to select. GNU tools struggle with this: ```console $ wc $'\u200b' wc: ​: No such file or directory ``` `os_display` adds quotes in such cases: ```rust assert_eq!("\u{200B}".maybe_quote().to_string(), "'\u{200B}'"); ``` It still misleadingly looks like `''` when printed, but it's possible to copy and paste it and get the right result. ## Bidirectional unicode A carefully-crafted string can move part of itself to the end of the line: ```console $ wc $'filename\u202E\u2066 [This does not belong!]\u2069\u2066' wc: 'filename': No such file or directory [This does not belong!] ``` This is known as a [*Trojan Source*](https://trojansource.codes/) attack. It uses control codes for bidirectional text. `os_display` escapes those control codes if they're not properly terminated. ## Feature flags By default you can only use the current platform's quoting style. That's appropriate most of the time. ### `windows`/`unix` The `windows` and `unix` optional features can be enabled to add constructors to `Quoted`. `Quoted::unix("some string")` will quote with bash/ksh syntax no matter the platform, and `Quoted::windows("etc")` uses PowerShell syntax. `Quoted::unix_raw` and `Quoted::windows_raw` take `&[u8]` (for malformed UTF-8) and `&[u16]` (for malformed UTF-16), respectively. ### `native` The `native` feature (enabled by default) is required for the `Quotable` trait and the `Quoted::native(&str)` and `Quoted::native_raw(&OsStr)` constructors. If it's not enabled then the quoting style has to be chosen explicitly. ### `alloc`/`std` This crate is `no_std`-compatible if the `alloc` and/or `std` features are disabled. The `std` feature is required to quote `OsStr`s. The `alloc` feature is required for `Quoted::windows_raw`. ## Alternative constructors `Quoted` has constructors for specific styles as well as `Quoted::native()` and `Quoted::native_raw()`. These can be used as an alternative to the `Quotable` trait if you prefer boring functions. By default quotes are always added. To get behavior like `.maybe_quote()` use the `.force()` method: ```rust println!("{}", Quoted::native(x).force(false)); ``` ## Testing The Unix implementation has been [fuzzed](https://github.com/rust-fuzz/cargo-fuzz) against bash, zsh, mksh, ksh93 and busybox to ensure all output is interpreted back as the original string. It has been fuzzed to a more limited extent against fish, dash, tcsh, posh, and yash (which don't support all of the required syntax). The PowerShell implementation has been fuzzed against PowerShell Core 7.1.4 running on Linux. Both implementations have been fuzzed to test their protection against Trojan Source attacks. ## Acknowledgments This library is modeled after the quoting done by [Gnulib](https://www.gnu.org/software/gnulib/) as seen in the GNU coreutils. The behavior is not identical, however: - GNU uses octal escapes, like `\377` instead of `\xFF`. - GNU eagerly switches quoting style midway through, like `''$'\n''xyz'` instead of `$'\nxyz'`. `os_display` avoids this unless necessary. - GNU escapes unassigned codepoints instead of leaving their handling up to the terminal. - GNU doesn't handle zero-width codepoints specially. The first version of this code was written for the [uutils project](https://github.com/uutils/coreutils). The feedback and the opportunity to use it in a large codebase were helpful. os_display-0.1.3/clippy.toml000064400000000000000000000000160072674642500142150ustar 00000000000000msrv = "1.31" os_display-0.1.3/examples/echo.rs000064400000000000000000000010770072674642500151320ustar 00000000000000use os_display::Quotable; fn main() { for arg in std::env::args_os().skip(1) { println!("Native: {}", arg.maybe_quote()); #[cfg(all(windows, feature = "unix"))] { if let Some(arg) = arg.to_str() { println!("Unix: {}", os_display::Quoted::unix(arg).force(false)); } } #[cfg(all(not(windows), feature = "windows"))] { if let Some(arg) = arg.to_str() { println!("Windows: {}", os_display::Quoted::windows(arg).force(false)); } } } } os_display-0.1.3/src/lib.rs000064400000000000000000000535620072674642500137410ustar 00000000000000//! Formatters for printing filenames and other strings in a terminal, with //! attention paid to special characters and invalid unicode. //! //! They will wrap quotes around them and add the necessary escapes to make //! them copy/paste-able into a shell. //! //! The [`Quotable`] trait adds `quote` and `maybe_quote` methods to string //! types. The [`Quoted`] type has constructors for more explicit control. //! //! # Examples //! ``` //! use std::path::Path; //! use os_display::Quotable; //! //! let path = Path::new("foo/bar.baz"); //! //! // Found file 'foo/bar.baz' //! println!("Found file {}", path.quote()); //! // foo/bar.baz: Not found //! println!("{}: Not found", path.maybe_quote()); //! ``` //! //! If the `windows`/`unix` features are enabled: //! //! ``` //! use os_display::Quoted; //! //! // "foo`nbar" //! # #[cfg(feature = "windows")] //! println!("{}", Quoted::windows("foo\nbar")); //! // $'foo\nbar' //! # #[cfg(feature = "unix")] //! println!("{}", Quoted::unix("foo\nbar")); //! ``` #![no_std] #![forbid(unsafe_code)] #![warn(missing_docs)] use core::fmt::{self, Display, Formatter}; #[cfg(feature = "std")] extern crate std; // alloc was unstable in 1.31, so do some shuffling to avoid it unless necessary. // 1.31 works with no features and with all features. // 1.36 is the minimum version that supports alloc without std. #[cfg(all(feature = "alloc", not(feature = "std")))] extern crate alloc; #[cfg(feature = "windows")] #[cfg(feature = "std")] use std as alloc; #[cfg(feature = "native")] #[cfg(feature = "std")] use std::{ffi::OsStr, path::Path}; #[cfg(any(feature = "unix", all(feature = "native", not(windows))))] mod unix; #[cfg(any(feature = "windows", all(feature = "native", windows)))] mod windows; /// A wrapper around string types for displaying with quoting and escaping applied. #[derive(Debug, Copy, Clone)] pub struct Quoted<'a> { source: Kind<'a>, force_quote: bool, #[cfg(any(feature = "windows", all(feature = "native", windows)))] external: bool, } #[derive(Debug, Copy, Clone)] enum Kind<'a> { #[cfg(any(feature = "unix", all(feature = "native", not(windows))))] Unix(&'a str), #[cfg(feature = "unix")] UnixRaw(&'a [u8]), #[cfg(any(feature = "windows", all(feature = "native", windows)))] Windows(&'a str), #[cfg(feature = "windows")] #[cfg(feature = "alloc")] WindowsRaw(&'a [u16]), #[cfg(feature = "native")] #[cfg(feature = "std")] NativeRaw(&'a std::ffi::OsStr), } impl<'a> Quoted<'a> { fn new(source: Kind<'a>) -> Self { Quoted { source, force_quote: true, #[cfg(any(feature = "windows", all(feature = "native", windows)))] external: false, } } /// Quote a string with the default style for the platform. /// /// On Windows this is PowerShell syntax, on all other platforms this is /// bash/ksh syntax. #[cfg(feature = "native")] pub fn native(text: &'a str) -> Self { #[cfg(windows)] return Quoted::new(Kind::Windows(text)); #[cfg(not(windows))] return Quoted::new(Kind::Unix(text)); } /// Quote an `OsStr` with the default style for the platform. /// /// On platforms other than Windows, Unix and WASI, if the encoding is /// invalid, the `Debug` representation will be used. #[cfg(feature = "native")] #[cfg(feature = "std")] pub fn native_raw(text: &'a OsStr) -> Self { Quoted::new(Kind::NativeRaw(text)) } /// Quote a string using bash/ksh syntax. /// /// # Optional /// This requires the optional `unix` feature. #[cfg(feature = "unix")] pub fn unix(text: &'a str) -> Self { Quoted::new(Kind::Unix(text)) } /// Quote possibly invalid UTF-8 using bash/ksh syntax. /// /// # Optional /// This requires the optional `unix` feature. #[cfg(feature = "unix")] pub fn unix_raw(bytes: &'a [u8]) -> Self { Quoted::new(Kind::UnixRaw(bytes)) } /// Quote a string using PowerShell syntax. /// /// # Optional /// This requires the optional `windows` feature. #[cfg(feature = "windows")] pub fn windows(text: &'a str) -> Self { Quoted::new(Kind::Windows(text)) } /// Quote possibly invalid UTF-16 using PowerShell syntax. /// /// # Optional /// This requires the optional `windows` feature and the (default) `alloc` feature. #[cfg(feature = "windows")] #[cfg(feature = "alloc")] pub fn windows_raw(units: &'a [u16]) -> Self { Quoted::new(Kind::WindowsRaw(units)) } /// Toggle forced quoting. If `true`, quotes are added even if no special /// characters are present. /// /// Defaults to `true`. pub fn force(mut self, force: bool) -> Self { self.force_quote = force; self } /// When quoting for PowerShell, toggle whether to quote for external programs. /// /// If enabled, double quotes (and sometimes backslashes) will be escaped so /// that they can be passed to external programs. /// /// If disabled, quoting will suit internal commandlets and .NET functions. /// Strings that look like options or numbers will be quoted. /// /// It is sadly impossible to quote a string such that it's suitable for both /// external and internal commands. /// /// The experimental `PSNativeCommandArgumentPassing` feature in PowerShell 7.2 /// disables the stripping of double quotes and backslashes. If it's enabled /// then this setting should be disabled. /// /// Defaults to `false`. This could change in a future (breaking) release. /// /// # Optional /// This requires either the `windows` or the `native` feature. It has no effect /// on Unix-style quoting. #[cfg(any(feature = "windows", feature = "native"))] #[allow(unused_mut, unused_variables)] pub fn external(mut self, external: bool) -> Self { #[cfg(any(feature = "windows", windows))] { self.external = external; } self } } impl<'a> Display for Quoted<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.source { #[cfg(feature = "native")] #[cfg(feature = "std")] Kind::NativeRaw(text) => { #[cfg(unix)] use std::os::unix::ffi::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::ffi::OsStrExt; #[cfg(windows)] use std::os::windows::ffi::OsStrExt; #[cfg(windows)] match text.to_str() { Some(text) => windows::write(f, text, self.force_quote, self.external), None => { windows::write_escaped(f, decode_utf16(text.encode_wide()), self.external) } } #[cfg(any(unix, target_os = "wasi"))] match text.to_str() { Some(text) => unix::write(f, text, self.force_quote), None => unix::write_escaped(f, text.as_bytes()), } #[cfg(not(any(windows, unix, target_os = "wasi")))] match text.to_str() { Some(text) => unix::write(f, text, self.force_quote), // Debug is our best shot for not losing information. // But you probably can't paste it into a shell. None => write!(f, "{:?}", text), } } #[cfg(any(feature = "unix", all(feature = "native", not(windows))))] Kind::Unix(text) => unix::write(f, text, self.force_quote), #[cfg(feature = "unix")] Kind::UnixRaw(bytes) => match core::str::from_utf8(bytes) { Ok(text) => unix::write(f, text, self.force_quote), Err(_) => unix::write_escaped(f, bytes), }, #[cfg(any(feature = "windows", all(feature = "native", windows)))] Kind::Windows(text) => windows::write(f, text, self.force_quote, self.external), #[cfg(feature = "windows")] #[cfg(feature = "alloc")] // Avoiding this allocation is possible in theory, but it'd require either // complicating or slowing down the common case. // Perhaps we could offer a non-allocating API for known-invalid UTF-16 strings // that we pass straight to write_escaped(), but it seems a bit awkward. // Please open an issue if you have a need for this. Kind::WindowsRaw(units) => match alloc::string::String::from_utf16(units) { Ok(text) => windows::write(f, &text, self.force_quote, self.external), Err(_) => { windows::write_escaped(f, decode_utf16(units.iter().cloned()), self.external) } }, } } } #[cfg(any(feature = "windows", all(feature = "native", feature = "std", windows)))] #[cfg(feature = "alloc")] fn decode_utf16(units: impl IntoIterator) -> impl Iterator> { core::char::decode_utf16(units).map(|res| res.map_err(|err| err.unpaired_surrogate())) } /// Characters that may not be safe to print in a terminal. /// /// This includes all the ASCII control characters. fn requires_escape(ch: char) -> bool { ch.is_control() || is_separator(ch) } /// U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR are currently the only /// in their categories. The terminals I tried don't treat them very specially, /// but gedit does. fn is_separator(ch: char) -> bool { ch == '\u{2028}' || ch == '\u{2029}' } /// These two ranges in PropList.txt: /// LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE /// LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE fn is_bidi(ch: char) -> bool { match ch { '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' => true, _ => false, } } /// Check whether text uses bidi in a potentially problematic way. /// /// See https://trojansource.codes/ and /// https://www.unicode.org/reports/tr9/tr9-42.html. /// /// If text fails this check then it's handled by write_escaped(), which /// escapes these bidi control characters no matter what. /// /// We can safely assume that there are no newlines (or unicode separators) /// in the text because those would get it sent to write_escaped() earlier. /// In unicode terms, this is all a single paragraph. #[inline(never)] fn is_suspicious_bidi(text: &str) -> bool { #[derive(Clone, Copy, PartialEq)] enum Kind { Formatting, Isolate, } const STACK_SIZE: usize = 16; // Can't use a Vec because of no_std let mut stack: [Option; STACK_SIZE] = [None; STACK_SIZE]; let mut pos = 0; for ch in text.chars() { match ch { '\u{202A}' | '\u{202B}' | '\u{202D}' | '\u{202E}' => { if pos >= STACK_SIZE { // Suspicious amount of nesting. return true; } stack[pos] = Some(Kind::Formatting); pos += 1; } '\u{202C}' => { if pos == 0 { // Unpaired terminator. // Not necessarily dangerous, but suspicious and // could disrupt preceding text. return true; } pos -= 1; if stack[pos] != Some(Kind::Formatting) { // Terminator doesn't match. // UAX #9 says to pop the stack until we find a match. // But we'll keep things simple and cautious. return true; } } '\u{2066}' | '\u{2067}' | '\u{2068}' => { if pos >= STACK_SIZE { return true; } stack[pos] = Some(Kind::Isolate); pos += 1; } '\u{2069}' => { if pos == 0 { return true; } pos -= 1; if stack[pos] != Some(Kind::Isolate) { return true; } } _ => (), } } pos != 0 } #[cfg(feature = "native")] mod native { use super::*; /// An extension trait to apply quoting to strings. /// /// This is implemented on [`str`], [`OsStr`] and [`Path`]. /// /// For finer control, see the constructors on [`Quoted`]. pub trait Quotable { /// Returns an object that implements [`Display`] for printing strings with /// proper quoting and escaping for the platform. /// /// On Unix this corresponds to bash/ksh syntax, on Windows PowerShell syntax /// is used. /// /// # Examples /// /// ``` /// use std::path::Path; /// use os_display::Quotable; /// /// let path = Path::new("foo/bar.baz"); /// /// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'" /// ``` fn quote(&self) -> Quoted<'_>; /// Like `quote()`, but don't actually add quotes unless necessary because of /// whitespace or special characters. /// /// # Examples /// /// ``` /// use std::path::Path; /// use os_display::Quotable; /// /// let foo = Path::new("foo/bar.baz"); /// let bar = "foo bar"; /// /// println!("{}: Not found", foo.maybe_quote()); // Prints "foo/bar.baz: Not found" /// println!("{}: Not found", bar.maybe_quote()); // Prints "'foo bar': Not found" /// ``` fn maybe_quote(&self) -> Quoted<'_> { let mut quoted = self.quote(); quoted.force_quote = false; quoted } } impl Quotable for str { fn quote(&self) -> Quoted<'_> { Quoted::native(self) } } #[cfg(feature = "std")] impl Quotable for OsStr { fn quote(&self) -> Quoted<'_> { Quoted::native_raw(self) } } #[cfg(feature = "std")] impl Quotable for Path { fn quote(&self) -> Quoted<'_> { Quoted::native_raw(self.as_ref()) } } impl<'a, T: Quotable + ?Sized> From<&'a T> for Quoted<'a> { fn from(val: &'a T) -> Self { val.quote() } } } #[cfg(feature = "native")] pub use crate::native::Quotable; #[cfg(feature = "std")] #[cfg(test)] mod tests { #![allow(unused)] use super::*; use std::string::{String, ToString}; const BOTH_ALWAYS: &[(&str, &str)] = &[ ("foo", "'foo'"), ("foo/bar.baz", "'foo/bar.baz'"), ("can't", r#""can't""#), ]; const BOTH_MAYBE: &[(&str, &str)] = &[ ("foo", "foo"), ("foo bar", "'foo bar'"), ("$foo", "'$foo'"), ("-", "-"), ("a#b", "a#b"), ("#ab", "'#ab'"), ("a~b", "a~b"), ("!", "'!'"), ("}", ("'}'")), ("\u{200B}", "'\u{200B}'"), ("\u{200B}a", "'\u{200B}a'"), ("a\u{200B}", "a\u{200B}"), ("\u{2000}", "'\u{2000}'"), ("\u{2800}", "'\u{2800}'"), // Odd but safe bidi ( "\u{2067}\u{2066}abc\u{2069}\u{2066}def\u{2069}\u{2069}", "'\u{2067}\u{2066}abc\u{2069}\u{2066}def\u{2069}\u{2069}'", ), ]; const UNIX_ALWAYS: &[(&str, &str)] = &[ ("", "''"), (r#"can'"t"#, r#"'can'\''"t'"#), (r#"can'$t"#, r#"'can'\''$t'"#), ("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#), ("foo\x02", r#"$'foo\x02'"#), (r#"'$''"#, r#"\''$'\'\'"#), ]; const UNIX_MAYBE: &[(&str, &str)] = &[ ("", "''"), ("-x", "-x"), ("a,b", "a,b"), ("a\\b", "'a\\b'"), ("\x02AB", "$'\\x02'$'AB'"), ("\x02GH", "$'\\x02GH'"), ("\t", r#"$'\t'"#), ("\r", r#"$'\r'"#), ("\u{85}", r#"$'\xC2\x85'"#), ("\u{85}a", r#"$'\xC2\x85'$'a'"#), ("\u{2028}", r#"$'\xE2\x80\xA8'"#), // Dangerous bidi ( "user\u{202E} \u{2066}// Check if admin\u{2069} \u{2066}", r#"$'user\xE2\x80\xAE \xE2\x81\xA6// Check if admin\xE2\x81\xA9 \xE2\x81\xA6'"#, ), ]; const UNIX_RAW: &[(&[u8], &str)] = &[ (b"foo\xFF", r#"$'foo\xFF'"#), (b"foo\xFFbar", r#"$'foo\xFF'$'bar'"#), ]; #[cfg(feature = "unix")] #[test] fn unix() { for &(orig, expected) in UNIX_ALWAYS.iter().chain(BOTH_ALWAYS) { assert_eq!(Quoted::unix(orig).to_string(), expected); } for &(orig, expected) in UNIX_MAYBE.iter().chain(BOTH_MAYBE) { assert_eq!(Quoted::unix(orig).force(false).to_string(), expected); } for &(orig, expected) in UNIX_RAW { assert_eq!(Quoted::unix_raw(orig).to_string(), expected); } let bidi_ok = nest_bidi(16); assert_eq!( Quoted::unix(&bidi_ok).to_string(), "'".to_string() + &bidi_ok + "'" ); let bidi_too_deep = nest_bidi(17); assert!(Quoted::unix(&bidi_too_deep).to_string().starts_with('$')); } const WINDOWS_ALWAYS: &[(&str, &str)] = &[ (r#"foo\bar"#, r#"'foo\bar'"#), (r#"can'"t"#, r#"'can''"t'"#), (r#"can'$t"#, r#"'can''$t'"#), ("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#), ("foo\x02", r#""foo`u{02}""#), (r#"'$''"#, r#"'''$'''''"#), ]; const WINDOWS_MAYBE: &[(&str, &str)] = &[ ("--%", "'--%'"), ("--ok", "--ok"), ("—x", "'—x'"), ("a,b", "'a,b'"), ("a\\b", "a\\b"), ("‘", r#""‘""#), (r#"‘""#, r#"''‘"'"#), ("„\0", r#""`„`0""#), ("\t", r#""`t""#), ("\r", r#""`r""#), ("\u{85}", r#""`u{85}""#), ("\u{2028}", r#""`u{2028}""#), ( "user\u{202E} \u{2066}// Check if admin\u{2069} \u{2066}", r#""user`u{202E} `u{2066}// Check if admin`u{2069} `u{2066}""#, ), ]; const WINDOWS_RAW: &[(&[u16], &str)] = &[(&[b'x' as u16, 0xD800], r#""x`u{D800}""#)]; const WINDOWS_EXTERNAL: &[(&str, &str)] = &[ ("", r#"'""'"#), (r#"\""#, r#"'\\\"'"#), (r#"\\""#, r#"'\\\\\"'"#), (r#"\x\""#, r#"'\x\\\"'"#), (r#"\x\"'""#, r#"'\x\\\"''\"'"#), ("\n\\\"", r#""`n\\\`"""#), ("\n\\\\\"", r#""`n\\\\\`"""#), ("\n\\x\\\"", r#""`n\x\\\`"""#), ("\n\\x\\\"'\"", r#""`n\x\\\`"'\`"""#), ("-x:", "'-x:'"), ("-x.x", "'-x.x'"), ("--%", r#"'"--%"'"#), ("--ok", "--ok"), ]; const WINDOWS_INTERNAL: &[(&str, &str)] = &[ ("", "''"), (r#"can'"t"#, r#"'can''"t'"#), ("-x", "'-x'"), ("—x", "'—x'"), ("‘\"", r#"''‘"'"#), ("--%", "'--%'"), ("--ok", "--ok"), ]; #[cfg(feature = "windows")] #[test] fn windows() { for &(orig, expected) in WINDOWS_ALWAYS.iter().chain(BOTH_ALWAYS) { assert_eq!(Quoted::windows(orig).to_string(), expected); } for &(orig, expected) in WINDOWS_MAYBE.iter().chain(BOTH_MAYBE) { assert_eq!(Quoted::windows(orig).force(false).to_string(), expected); } for &(orig, expected) in WINDOWS_RAW { assert_eq!(Quoted::windows_raw(orig).to_string(), expected); } for &(orig, expected) in WINDOWS_EXTERNAL { assert_eq!( Quoted::windows(orig) .force(false) .external(true) .to_string(), expected ); } for &(orig, expected) in WINDOWS_INTERNAL { assert_eq!( Quoted::windows(orig) .force(false) .external(false) .to_string(), expected ); } let bidi_ok = nest_bidi(16); assert_eq!( Quoted::windows(&bidi_ok).to_string(), "'".to_string() + &bidi_ok + "'" ); let bidi_too_deep = nest_bidi(17); assert!(Quoted::windows(&bidi_too_deep).to_string().contains('`')); } #[cfg(feature = "native")] #[cfg(windows)] #[test] fn native() { use std::ffi::OsString; use std::os::windows::ffi::OsStringExt; assert_eq!("'\"".quote().to_string(), r#"'''"'"#); assert_eq!("x\0".quote().to_string(), r#""x`0""#); assert_eq!( OsString::from_wide(&[b'x' as u16, 0xD800]) .quote() .to_string(), r#""x`u{D800}""# ); } #[cfg(feature = "native")] #[cfg(any(unix, target_os = "wasi"))] #[test] fn native() { #[cfg(unix)] use std::os::unix::ffi::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::ffi::OsStrExt; assert_eq!("'\"".quote().to_string(), r#"\''"'"#); assert_eq!("x\0".quote().to_string(), r#"$'x\x00'"#); assert_eq!( OsStr::from_bytes(b"x\xFF").quote().to_string(), r#"$'x\xFF'"# ); } #[cfg(feature = "native")] #[cfg(not(any(windows, unix, target_os = "wasi")))] #[test] fn native() { assert_eq!("'\"".quote().to_string(), r#"\''"'"#); assert_eq!("x\0".quote().to_string(), r#"$'x\x00'"#); } #[cfg(feature = "native")] #[test] fn can_quote_types() { use std::borrow::{Cow, ToOwned}; "foo".quote(); "foo".to_owned().quote(); Cow::Borrowed("foo").quote(); OsStr::new("foo").quote(); OsStr::new("foo").to_owned().quote(); Cow::Borrowed(OsStr::new("foo")).quote(); Path::new("foo").quote(); Path::new("foo").to_owned().quote(); Cow::Borrowed(Path::new("foo")).quote(); } fn nest_bidi(n: usize) -> String { let mut out = String::new(); for _ in 0..n { out.push('\u{2066}'); } out.push('a'); for _ in 0..n { out.push('\u{2069}'); } out } } os_display-0.1.3/src/unix.rs000064400000000000000000000230400072674642500141420ustar 00000000000000use core::fmt::{self, Formatter, Write}; use core::str::from_utf8; use unicode_width::UnicodeWidthChar; /// Characters with special meaning outside quotes. /// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 /// I don't know why % is in there. GNU doesn't quote it either. /// zsh and fish have trouble with standalone {}. /// ^ was used for piping in old shells and GNU quotes it. const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=^{} "; /// Characters with a special meaning at the beginning of a name. /// ~ expands a home directory. /// # starts a comment. /// ! is a common extension for expanding the shell history. const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!']; /// Characters that are interpreted specially in a double-quoted string. const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$', b'\\']; pub(crate) fn write(f: &mut Formatter<'_>, text: &str, force_quote: bool) -> fmt::Result { let mut is_single_safe = true; let mut is_double_safe = true; let mut requires_quote = force_quote; let mut is_bidi = false; if !requires_quote { if let Some(first) = text.chars().next() { if SPECIAL_SHELL_CHARS_START.contains(&first) { requires_quote = true; } // gnome-terminal (VTE), xterm, urxvt, tmux, screen, and VS Code's // builtin terminal all include zero-width characters at the end of the // selection but not at the start. // terminology and st seem to have trouble displaying them at all. // So if there's a zero-width character at the start we need quotes, but // if it's at the end we don't need to bother. // (This also ensures non-empty zero-width strings end up quoted.) if !requires_quote && first.width().unwrap_or(0) == 0 { // .width() returns Some(1) for unassigned codepoints. // This means we can't pre-emptively quote unknown codepoints in // case they become zero-width in the future. // (None is only returned for certain ASCII characters.) requires_quote = true; } } else { // Empty string requires_quote = true; } } for ch in text.chars() { if ch.is_ascii() { let ch = ch as u8; if ch == b'\'' { is_single_safe = false; } if is_double_safe && DOUBLE_UNSAFE.contains(&ch) { is_double_safe = false; } if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { requires_quote = true; } if ch.is_ascii_control() { return write_escaped(f, text.as_bytes()); } } else { if !requires_quote && (ch.is_whitespace() || ch == '\u{2800}') { // yash splits on unicode whitespace. // fish ignores unicode whitespace at the start of a bare string. // Therefore we quote unicode whitespace. // U+2800 BRAILLE PATTERN BLANK is not technically whitespace but we // quote it too. // This check goes stale when new whitespace codepoints are assigned. requires_quote = true; } if crate::is_bidi(ch) { is_bidi = true; } if crate::requires_escape(ch) { return write_escaped(f, text.as_bytes()); } } } if is_bidi && crate::is_suspicious_bidi(text) { return write_escaped(f, text.as_bytes()); } if !requires_quote { f.write_str(text) } else if is_single_safe { write_simple(f, text, '\'') } else if is_double_safe { write_simple(f, text, '\"') } else { write_single_escaped(f, text) } } fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { f.write_char(quote)?; f.write_str(text)?; f.write_char(quote)?; Ok(()) } fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { let mut iter = text.split('\''); if let Some(chunk) = iter.next() { if !chunk.is_empty() { write_simple(f, chunk, '\'')?; } } for chunk in iter { f.write_str("\\'")?; if !chunk.is_empty() { write_simple(f, chunk, '\'')?; } } Ok(()) } /// Write using the syntax described here: /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html /// /// Supported by these shells: /// - bash /// - zsh /// - busybox sh /// - mksh /// - ksh93 /// /// Not supported by these: /// - fish /// - dash /// - tcsh /// /// There's a proposal to add it to POSIX: /// https://www.austingroupbugs.net/view.php?id=249 pub(crate) fn write_escaped(f: &mut Formatter<'_>, text: &[u8]) -> fmt::Result { f.write_str("$'")?; // ksh variants accept more than two digits for a \x escape code, // e.g. \xA691. We have to take care to not accidentally output // something like that. If necessary we interrupt the quoting with // `'$'`. let mut in_escape = false; for chunk in from_utf8_iter(text) { match chunk { Ok(chunk) => { for ch in chunk.chars() { let was_escape = in_escape; in_escape = false; match ch { '\n' => f.write_str("\\n")?, '\t' => f.write_str("\\t")?, '\r' => f.write_str("\\r")?, // We could do \a, \b, \f, \v, but those are // rare enough to be confusing. // \0 is actually a case of the octal \nnn syntax, // and null bytes can't appear in arguments anyway, // so let's stay clear of that. // Some but not all shells have \e for \x1B. ch if crate::requires_escape(ch) || crate::is_bidi(ch) => { // Most shells support \uXXXX escape codes, but busybox sh // doesn't, so we always encode the raw UTF-8. Bit unfortunate, // but GNU does the same. for &byte in ch.encode_utf8(&mut [0; 4]).as_bytes() { write!(f, "\\x{:02X}", byte)?; } in_escape = true; } '\\' | '\'' => { // '?' and '"' can also be escaped this way // but AFAICT there's no reason to do so. f.write_char('\\')?; f.write_char(ch)?; } ch if was_escape && ch.is_ascii_hexdigit() => { f.write_str("'$'")?; f.write_char(ch)?; } ch => { f.write_char(ch)?; } } } } Err(unit) => { write!(f, "\\x{:02X}", unit)?; in_escape = true; } } } f.write_char('\'')?; Ok(()) } fn from_utf8_iter(bytes: &[u8]) -> impl Iterator> { struct Iter<'a> { bytes: &'a [u8], } impl<'a> Iterator for Iter<'a> { type Item = Result<&'a str, u8>; fn next(&mut self) -> Option { if self.bytes.is_empty() { return None; } match from_utf8(self.bytes) { Ok(text) => { self.bytes = &[]; Some(Ok(text)) } Err(err) if err.valid_up_to() == 0 => { let res = self.bytes[0]; self.bytes = &self.bytes[1..]; Some(Err(res)) } Err(err) => { let (valid, rest) = self.bytes.split_at(err.valid_up_to()); self.bytes = rest; Some(Ok(from_utf8(valid).unwrap())) } } } } Iter { bytes } } #[cfg(feature = "std")] #[cfg(test)] mod tests { use super::*; use std::vec::Vec; #[test] fn test_utf8_iter() { type ByteStr = &'static [u8]; type Chunk = Result<&'static str, u8>; const CASES: &[(ByteStr, &[Chunk])] = &[ (b"", &[]), (b"hello", &[Ok("hello")]), // Immediately invalid (b"\xFF", &[Err(b'\xFF')]), // Incomplete UTF-8 (b"\xC2", &[Err(b'\xC2')]), (b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]), (b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]), (b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]), (b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]), (b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]), (b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]), ( b"foo\xF4\x8Fbar", &[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")], ), ( b"foo\xFF\xC2bar", &[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")], ), ]; for &(case, expected) in CASES { assert_eq!( from_utf8_iter(case).collect::>().as_slice(), expected ); } } } os_display-0.1.3/src/windows.rs000064400000000000000000000332420072674642500146560ustar 00000000000000use core::fmt::{self, Formatter, Write}; use unicode_width::UnicodeWidthChar; // Much of this code is similar to the Unix version. // Not all comments are repeated, so read that first. /// I'm not too familiar with PowerShell, much of this is based on /// experimentation rather than documentation or deep understanding. /// I have noticed that ~?*[] only get expanded in some contexts, so watch /// out for that if doing your own tests. /// Get-ChildItem seems unwilling to quote anything so it doesn't help. /// The omission of \ is important because it's used in file paths. const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\"'*?[]=,{} "; /// A single stand-alone exclamation mark seems to have some special meaning. /// Tildes are unclear: In Powershell on Linux, quoting a tilde keeps it from /// expanding if passed to an external program, but not if passed to Get-ChildItem. const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '!']; const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$']; pub(crate) fn write( f: &mut Formatter<'_>, text: &str, force_quote: bool, external: bool, ) -> fmt::Result { match text { "" if external => { // If we write '' PowerShell will swallow the argument. return f.write_str(r#"'""'"#); } "--%" if external => { // PowerShell doesn't want to pass this to external commands at all. // This does work: return f.write_str(r#"'"--%"'"#); // It does not work if PSNativeCommandArgumentPassing is enabled. } "--%" => { // This one's still special if used internally. return f.write_str("'--%'"); } _ => (), } let mut is_single_safe = true; let mut is_double_safe = true; let mut has_ascii_double = false; let mut requires_quote = force_quote; let mut is_bidi = false; if !requires_quote { let mut chars = text.chars(); if let Some(first) = chars.next() { let second = chars.next(); if SPECIAL_SHELL_CHARS_START.contains(&first) { requires_quote = true; } if !external { // PowerShell may parse bare strings as numbers in some contexts. // `echo 1d` just outputs "1d", but `Set-Variable s 1d` assigns // the number 1 to s. if !requires_quote && first.is_ascii_digit() { requires_quote = true; } // Annoyingly, .0d is another example. // And filenames start with . commonly enough that we shouldn't quote // too eagerly. if !requires_quote && first == '.' { if let Some(second) = second { if second.is_ascii_digit() { requires_quote = true; } } } } // Unlike in Unix, quoting an argument may stop it // from being recognized as an option. I like that very much. if !requires_quote && unicode::is_dash(first) { if second == None || second == Some('-') { // "-" is a common special argument we don't want to quote. // Something with multiple leading dashes won't be recognized // as an option. (Except for --% sorta.) // So do nothing. } else if external { // External commands aren't picky, no need to bother quoting. // However... if text.find(&[':', '.'][..]).is_some() { // A : must be followed by a value. Strictly speaking // we only have to check that it's followed by a character // that's not in SPECIAL_SHELL_CHARS_START, but let's // be cautious. // A . starts a new argument for some reason. requires_quote = true; } } else { // This looks like an option, so stop commandlets from // recognizing it as one. requires_quote = true; } } if !requires_quote && first.width().unwrap_or(0) == 0 { requires_quote = true; } } else { // Empty string. requires_quote = true; } } for ch in text.chars() { if ch.is_ascii() { let ch = ch as u8; if ch == b'\'' { is_single_safe = false; } if ch == b'"' { has_ascii_double = true; } if is_double_safe && DOUBLE_UNSAFE.contains(&ch) { is_double_safe = false; } if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) { requires_quote = true; } if ch.is_ascii_control() { return write_escaped(f, text.chars().map(Ok), external); } } else { if !requires_quote && unicode::is_whitespace(ch) { requires_quote = true; } if (!requires_quote || is_double_safe) && unicode::is_double_quote(ch) { is_double_safe = false; requires_quote = true; } if (!requires_quote || is_single_safe) && unicode::is_single_quote(ch) { is_single_safe = false; requires_quote = true; } if crate::is_bidi(ch) { is_bidi = true; } if crate::requires_escape(ch) { return write_escaped(f, text.chars().map(Ok), external); } } } if is_bidi && crate::is_suspicious_bidi(text) { return write_escaped(f, text.chars().map(Ok), external); } if !requires_quote { f.write_str(text) } else if external && has_ascii_double { write_external_escaped(f, text) } else if is_single_safe { write_simple(f, text, '\'') } else if is_double_safe { write_simple(f, text, '\"') } else { write_single_escaped(f, text) } } fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result { f.write_char(quote)?; f.write_str(text)?; f.write_char(quote)?; Ok(()) } fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { // Quotes in PowerShell are escaped by doubling them. // The second quote is used, so '‘ becomes ‘. // Therefore we insert a ' before every quote we find. // If we think something is a single quote and quote it but the PowerShell // version doesn't (e.g. because it's old) then things go wrong. I don't // know of a way to solve this. A ` (backtick) escape only works between // double quotes or in a bare string. We can't unquote, use a bare string, // then requote, as we would in Unix: PowerShell sees that as multiple // arguments. f.write_char('\'')?; let mut pos = 0; for (index, _) in text.match_indices(unicode::is_single_quote) { f.write_str(&text[pos..index])?; f.write_char('\'')?; pos = index; } f.write_str(&text[pos..])?; f.write_char('\'')?; Ok(()) } fn write_external_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result { f.write_char('\'')?; let mut pos = 0; for (index, quote) in text.match_indices(|ch: char| ch == '"' || unicode::is_single_quote(ch)) { f.write_str(&text[pos..index])?; if quote == "\"" { // Let n be the number of backslashes before the quote. // We need to turn that into 2n + 1 backslahes. // Therefore we need to write n + 1 more. // The logic behind this is that double quotes must be escaped with // backslashes, and backslashes must be escaped only if they precede // a double quote. let backslashes = text[..index] .chars() .rev() .take_while(|&ch| ch == '\\') .count() + 1; for _ in 0..backslashes { f.write_char('\\')?; } } else { f.write_char('\'')?; } pos = index; } f.write_str(&text[pos..])?; f.write_char('\'')?; Ok(()) } pub(crate) fn write_escaped( f: &mut Formatter<'_>, text: impl Iterator>, external: bool, ) -> fmt::Result { // ` takes the role of \ since \ is already used as the path separator. // Things are UTF-16-oriented, so we escape bad code units as "`u{1234}". f.write_char('"')?; let mut backslashes: u32 = 0; for ch in text { match ch { Ok(ch) => { match ch { '\0' => f.write_str("`0")?, '\r' => f.write_str("`r")?, '\n' => f.write_str("`n")?, '\t' => f.write_str("`t")?, // Code unit escapes are only supported in PowerShell Core, // so we're more willing to use weird escapes here than on Unix. // There's also `e, for \x1B, but that one's Core-exclusive. '\x07' => f.write_str("`a")?, '\x08' => f.write_str("`b")?, '\x0b' => f.write_str("`v")?, '\x0c' => f.write_str("`f")?, ch if crate::requires_escape(ch) || crate::is_bidi(ch) => { write!(f, "`u{{{:02X}}}", ch as u32)? } '`' => f.write_str("``")?, '$' => f.write_str("`$")?, '"' if external => { // First we need to escape all the backslashes that came before. for _ in 0..backslashes { f.write_char('\\')?; } // Then we need to escape this double quote for CommandLineToArgv. f.write_char('\\')?; // Then we need to escape it for the PowerShell string. f.write_char('`')?; // And then we can finally write the quote itself. f.write_char('"')?; } ch if unicode::is_double_quote(ch) => { // We can quote this with either ` or ". // But if we use " and the PowerShell version doesn't actually // see this as a double quote then we're in trouble. // ` is safer. f.write_char('`')?; f.write_char(ch)?; } ch => f.write_char(ch)?, } if ch == '\\' { backslashes += 1; } else { backslashes = 0; } } Err(unit) => write!(f, "`u{{{:04X}}}", unit)?, } } f.write_char('"')?; Ok(()) } /// PowerShell makes liberal use of Unicode: /// /// This may have to be updated in the future. mod unicode { /// PowerShell considers these to be whitespace: /// 1. ASCII: Space, Horizontal tab, Form feed, Carriage return /// 2. Unicode: No-break space, Next line /// 3. Everything that satisfies System.Char.IsSeparator, i.e. everything /// in the categories {space, line, paragraph} separator /// /// This overlaps with but is not identical to char::is_whitespace(). /// /// There is some redundancy throughout this implementation. We already /// know that ch is not ASCII, and \u{A0} is repeated. But that's all /// optimized away in the end so no need to worry about it. pub(crate) fn is_whitespace(ch: char) -> bool { match ch { ' ' | '\t' | '\x0B' | '\x0C' => true, '\u{00A0}' | '\u{0085}' => true, // BRAILLE PATTERN BLANK // Not strictly whitespace but invisible enough to quote '\u{2800}' => true, c => is_separator(c), } } /// I don't want to add a dependency just for this, and /// as of writing, the unicode_categories crate is out of /// date anyway. So hardcode the category check. /// /// curl -s https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \ /// | grep -e Zl -e Zp -e Zs | cut -d ';' -f 1 /// /// Unicode 15.0 will release on September 11, 2022. fn is_separator(ch: char) -> bool { match ch { '\u{0020}' | '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{2028}' | '\u{2029}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => true, _ => false, } } /// These can be used to start options. /// /// There exist others, but PowerShell doesn't care about them. pub(crate) fn is_dash(ch: char) -> bool { match ch { '-' | '\u{2013}' | '\u{2014}' | '\u{2015}' => true, _ => false, } } pub(crate) fn is_single_quote(ch: char) -> bool { match ch { '\'' | '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => true, _ => false, } } pub(crate) fn is_double_quote(ch: char) -> bool { match ch { '"' | '\u{201C}' | '\u{201D}' | '\u{201E}' => true, _ => false, } } }