wild-2.2.1/.cargo_vcs_info.json0000644000000001360000000000100117750ustar { "git": { "sha1": "fb5b2a9facb12da0413d0dcf819b73bce1ad0c01" }, "path_in_vcs": "" }wild-2.2.1/Cargo.toml0000644000000030010000000000100077650ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "wild" version = "2.2.1" authors = ["Kornel "] include = [ "/src/*.rs", "/Cargo.toml", "/LICENSE", "/README.md", ] description = "Glob (wildcard) expanded command-line arguments on Windows" homepage = "https://lib.rs/crates/wild" documentation = "https://docs.rs/wild" readme = "README.md" keywords = [ "wildcards", "glob", "windows", "shell", "CommandLineToArgvW", ] categories = [ "command-line-interface", "os::windows-apis", ] license = "Apache-2.0 OR MIT" repository = "https://gitlab.com/kornelski/wild" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", "--generate-link-to-definition", ] targets = ["x86_64-unknown-linux-gnu"] [dev-dependencies.glob] version = "0.3.1" [features] glob-quoted-on-windows = [] [target."cfg(windows)".dependencies.glob] version = "0.3.1" [badges.appveyor] repository = "pornel/wild" [badges.gitlab] repository = "kornelski/wild" [badges.maintenance] status = "passively-maintained" wild-2.2.1/Cargo.toml.orig000064400000000000000000000027331046102023000134610ustar 00000000000000[package] authors = ["Kornel "] categories = ["command-line-interface", "os::windows-apis"] description = "Glob (wildcard) expanded command-line arguments on Windows" documentation = "https://docs.rs/wild" homepage = "https://lib.rs/crates/wild" keywords = ["wildcards", "glob", "windows", "shell", "CommandLineToArgvW"] license = "Apache-2.0 OR MIT" name = "wild" readme = "README.md" repository = "https://gitlab.com/kornelski/wild" version = "2.2.1" edition = "2021" include = ["/src/*.rs", "/Cargo.toml", "/LICENSE", "/README.md"] [badges] gitlab = { repository = "kornelski/wild" } appveyor = { repository = "pornel/wild" } maintenance = { status = "passively-maintained" } [target.'cfg(windows)'.dependencies] glob = "0.3.1" [dev-dependencies] glob = "0.3.1" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] all-features = true rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"] [features] # Give up on trying to accurately emulate Unix-like argument quoting semantics, # and always interpret `*` (etc.) as file path patterns, even in quoted strings. # # This affects only Windows. # # This creates possibility of non-file arguments that contain `*` to be interpreted as file paths. # OTOH it makes it easier for users to use globs on paths with spaces, and to call executables via tools/APIs that always quote args. # # Don't enable this feature from libraries. This decision should be left to binaries. glob-quoted-on-windows = [] wild-2.2.1/LICENSE000064400000000000000000000020401046102023000115660ustar 00000000000000Copyright 2018 Kornel Lesiński Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. wild-2.2.1/README.md000064400000000000000000000030441046102023000120450ustar 00000000000000# [`Wild::args`](https://lib.rs/crates/wild) for [Rust](https://www.rust-lang.org) Allows Rust applications support wildcard arguments (`*foo*`, `file.???`, `*.log.[0-9]`, etc.) on command-line, uniformly on all platforms, including Windows. Unix shells automatically interpret wildcard arguments and pass them expanded (already converted to file names) to applications, but Windows' `cmd.exe` doesn't do that. For consistent cross-platform behavior, this crate emulates Unix-like expansion on Windows. You only need to use `wild::args()` instead of `std::env::args()`. It is more robust than using [`glob()`](https://lib.rs/crates/glob) on values from `std::env::args()`, because this crate is aware of argument quoting, and special characteres in quotes (`"*"`) are intentionally not expanded. The glob syntax on Windows is limited to `*`, `?`, and `[a-z]`/`[!a-z]` ranges, as supported by the glob crate. Parsing of quoted arguments precisely follows Windows' native syntax ([`CommandLineToArgvW`][1], specifically). [1]: https://docs.microsoft.com/en-us/windows/desktop/api/shellapi/nf-shellapi-commandlinetoargvw ## Usage `wild::args()` is a drop-in replacement for `std::env::args()`. ```toml [dependencies] wild = "2" ``` ```rust fn main() { let args = wild::args(); println!("The args are: {:?}", args.collect::>()); } ``` ## Usage with [Clap](https://lib.rs/crates/clap) ```rust let matches = clap::App::new("your_app") .arg(…) .arg(…) .arg(…) // .get_matches(); change to: .get_matches_from(wild::args()); ``` wild-2.2.1/src/argsiter.rs000064400000000000000000000101021046102023000135340ustar 00000000000000use crate::globiter::GlobArgs; use std::ffi::OsString; use std::fmt; /// Windows replacement for `std::env::ArgsOs` #[cfg_attr(test, allow(dead_code))] pub struct ArgsOs { pub(crate) args: GlobArgs<'static>, pub(crate) current_arg_globs: Option, } impl ArgsOs { /// Expects result of `GetCommandLineW` #[inline] pub(crate) fn from_raw_command_line(cmd: &'static [u16]) -> Self { Self { args: GlobArgs::new(cmd), current_arg_globs: None, } } } /// Windows replacement for `std::env::Args` pub struct Args { pub(crate) iter: ArgsOs, } fn first_non_error(iter: &mut I) -> Option where I: Iterator> { loop { match iter.next() { Some(Ok(item)) => return Some(item), None => return None, Some(Err(_)) => {}, } } } impl Iterator for Args { type Item = String; fn next(&mut self) -> Option { self.iter.next().map(|s| s.to_string_lossy().to_string()) } } impl Iterator for ArgsOs { type Item = OsString; fn next(&mut self) -> Option { if let Some(path) = self.current_arg_globs.as_mut().and_then(first_non_error) { return Some(path.into_os_string()); } let arg = self.args.next()?; // if None — end of args let glob_opts = glob::MatchOptions { case_sensitive: false, ..Default::default() }; if let Some(Ok(mut glob_iter)) = arg.pattern.as_ref().map(move |pat| glob::glob_with(pat, glob_opts)) { let first_glob = first_non_error(&mut glob_iter); self.current_arg_globs = Some(glob_iter); match first_glob { Some(path) => Some(path.into_os_string()), None => { // non-matching patterns are passed as regular strings self.current_arg_globs = None; Some(arg.text) }, } // Invalid patterns are passed as regular strings } else { // valid, but non-wildcard args passed as is, in order to avoid normalizing slashes Some(arg.text) } } } impl fmt::Debug for Args { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.iter.fmt(f) } } impl fmt::Debug for ArgsOs { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.args.fmt(f) } } #[test] fn finds_cargo_toml() { let cmd = "foo.exe _not_?a?_[f]ilename_ \"_not_?a?_[p]attern_\" Cargo.tom?".chars().map(|c| c as u16).collect::>(); let args = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); let iter = Args { iter: args }; assert_eq!("\"foo.exe _not_?a?_[f]ilename_ \\\"_not_?a?_[p]attern_\\\" Cargo.tom?\"", format!("{:?}", iter)); let args: Vec<_> = iter.collect(); assert_eq!(4, args.len()); assert_eq!("foo.exe", &args[0]); assert_eq!("_not_?a?_[f]ilename_", &args[1]); assert_eq!("_not_?a?_[p]attern_", &args[2]); assert_eq!("Cargo.toml", &args[3]); } #[test] fn unquoted_slashes_unchanged() { let cmd = r#"foo.exe //// .. ./ \\\\"#.chars().map(|c| c as u16).collect::>(); let args = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); let iter = Args { iter: args }; let args: Vec<_> = iter.collect(); assert_eq!(5, args.len()); assert_eq!("foo.exe", &args[0]); assert_eq!("////", &args[1]); assert_eq!("..", &args[2]); assert_eq!("./", &args[3]); assert_eq!(r#"\\\\"#, &args[4]); } #[test] fn finds_readme_case_insensitive() { let cmd = "foo.exe _not_?a?_[f]ilename_ \"_not_?a?_[p]attern_\" read*.MD".chars().map(|c| c as u16).collect::>(); let iter = ArgsOs::from_raw_command_line(Box::leak(cmd.into_boxed_slice())); let args: Vec<_> = iter.map(|c| c.to_string_lossy().to_string()).collect(); assert_eq!(4, args.len()); assert_eq!("foo.exe", &args[0]); assert_eq!("_not_?a?_[f]ilename_", &args[1]); assert_eq!("_not_?a?_[p]attern_", &args[2]); assert_eq!("README.md", &args[3]); } wild-2.2.1/src/globiter.rs000064400000000000000000000063131046102023000135340ustar 00000000000000use crate::parser::CommandLineWParser; use crate::parser::CharCode; use std::ffi::OsString; use std::fmt; pub(crate) struct ArgOs { /// `Some` if contains a glob /// /// Pattern is a string, because https://github.com/rust-lang-nursery/glob/issues/23 pub pattern: Option, pub text: OsString, } /// Iterator retuning glob-escaped arguments. Call `args()` to obtain it. #[must_use] pub(crate) struct GlobArgs<'argsline> { parser: CommandLineWParser<'argsline>, } impl<'a> fmt::Debug for GlobArgs<'a> { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.parser.fmt(f) } } #[cfg(windows)] use std::os::windows::ffi::OsStringExt; /// This is used only in tests on non-Windows #[cfg(not(windows))] trait LossyOsStringExt { fn from_wide(wide: &[u16]) -> OsString { OsString::from(String::from_utf16_lossy(wide)) } } #[cfg(not(windows))] impl LossyOsStringExt for OsString {} impl<'a> Iterator for GlobArgs<'a> { type Item = ArgOs; fn next(&mut self) -> Option { let mut pattern: Option> = None; let mut text = vec![]; let everything_as_unquoted = cfg!(feature = "glob-quoted-on-windows"); let has_arg = self.parser.accumulate_next(|c| { let (quoted, c) = match c { CharCode::Quoted(c) => (!everything_as_unquoted, c), CharCode::Unquoted(c) => (false, c), }; const Q: u16 = b'?' as u16; const A: u16 = b'*' as u16; const L: u16 = b'[' as u16; const R: u16 = b']' as u16; match c { Q | A | L | R => { if quoted { if let Some(pattern) = &mut pattern { pattern.extend([L, c, R]); } } else { let p = pattern.get_or_insert_with(|| { text.iter().flat_map(|&c| match c { // type inference picks a slice here, sometimes! Q | A | L | R => <[u16; 3] as IntoIterator>::into_iter([L, c, R]).take(3), _ => <[u16; 3] as IntoIterator>::into_iter([c, 0, 0]).take(1), }).collect() }); p.push(c); } }, _ => if let Some(p) = &mut pattern { p.push(c) }, }; text.push(c); }); if has_arg { Some(ArgOs { pattern: pattern.map(|pattern| { char::decode_utf16(pattern) .map(|r| r.unwrap_or('?')) .collect::() }), text: OsString::from_wide(&text), }) } else { None } } } impl<'argsline> GlobArgs<'argsline> { /// UTF-16/UCS2 string from `GetCommandLineW` #[allow(dead_code)] #[inline] pub(crate) fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { Self { parser: CommandLineWParser::new(command_line_args_ucs2), } } } wild-2.2.1/src/lib.rs000064400000000000000000000176501046102023000125010ustar 00000000000000#![cfg_attr(docsrs, feature(doc_cfg))] //! Emulates glob (wildcard) argument expansion on Windows. No-op on other platforms. //! //! Unix shells expand command-line arguments like `a*`, `file.???` and pass them expanded to applications. //! On Windows `cmd.exe` doesn't do that, so this crate emulates the expansion there. //! Instead of `std::env::args()` use `wild::args()`. //! //! The glob syntax on Windows is limited to `*`, `?`, and `[a-z]`/`[!a-z]` ranges. //! Glob characters in quotes (`"*"`) are not expanded. //! //! Parsing of quoted arguments precisely follows Windows native syntax (`CommandLineToArgvW`, specifically) //! with all its weirdness. //! //! ## Usage //! //! Use `wild::args()` instead of `std::env::args()` (or `wild::args_os()` instead of `std::env::args_os()`). //! //! If you use [Clap](https://lib.rs/crates/clap), use `.get_matches_from(wild::args_os())` instead of `.get_matches()`. /// An optional, experimental low-level interface for parsing command-line strings from other sources. In most cases [`args`] and [`args_os`] are more appropriate. #[cfg(any(test, windows))] pub mod parser; #[cfg(any(test, windows))] mod globiter; #[cfg(any(test, windows))] mod argsiter; #[cfg(windows)] pub use crate::argsiter::*; /// Iterator of arguments. Equivalent to `std::env::Args`. See [`args`] for details. /// /// On unix it's an alias for `std::env::Args`. /// On Windows it's a custom iterator that implements glog expansion. #[cfg(not(windows))] pub type Args = std::env::Args; /// Same as [`Args`], but keeps invalid Unicode intact. #[cfg(not(windows))] pub type ArgsOs = std::env::ArgsOs; /// Returns an iterator of glob-expanded command-line arguments. Equivalent of `std::env::args()`. /// /// On non-Windows platforms it returns `env::args()` as-is, /// assuming expansion has already been done by the shell. /// /// On Windows it emulates the glob expansion itself. /// The iterator will parse arguments incrementally and access /// the file system as it parses. This allows reading potentially huge lists of /// filenames, but it's not an atomic snapshot (use `.collect()` if you need that). #[cfg(not(windows))] #[must_use] pub fn args() -> Args { std::env::args() } #[cfg(not(windows))] #[must_use] pub fn args_os() -> ArgsOs { std::env::args_os() } /// Returns an iterator of glob-expanded command-line arguments. Equivalent of `std::env::args()`. /// /// On Windows it emulates the glob expansion itself. /// The iterator will parse arguments incermentally and access /// the file system as it parses. This allows reading potentially huge lists of /// filenames, but it's not an atomic snapshot (use `.collect()` if you need that). /// /// On non-Windows platforms it returns `env::args()` as-is, /// assuming expansion has already been done by the shell. #[cfg(windows)] #[must_use] pub fn args() -> Args { Args { iter: args_os() } } /// Same as [`args`], but returns `OsString` #[cfg(windows)] #[must_use] pub fn args_os() -> ArgsOs { ArgsOs::from_raw_command_line(raw_command_line()) } #[cfg(windows)] extern "system" { fn GetCommandLineW() -> *const u16; } #[cfg(windows)] fn raw_command_line() -> &'static [u16] { unsafe { let line_ptr = GetCommandLineW(); if line_ptr.is_null() { return &[]; } let mut len = 0; while *line_ptr.add(len) != 0 { len += 1; } std::slice::from_raw_parts(line_ptr, len) } } #[cfg(test)] fn parsed(s: &str) -> String { let t: Vec<_> = s.encode_utf16().collect(); let args: Vec<_> = globiter::GlobArgs::new(&t) .map(|s| s.pattern.map(|p| format!("")).unwrap_or(s.text.to_string_lossy().into_owned())) .collect(); args.join(";") } #[cfg(test)] fn unquoted(s: &str) -> String { let t: Vec<_> = s.encode_utf16().collect(); let args: Vec<_> = globiter::GlobArgs::new(&t) .map(|s| s.text.to_string_lossy().to_string()) .collect(); args.join(";") } #[test] fn test_actual_args() { assert!(args_os().count() >= 1); } #[test] fn test_parse_1() { assert_eq!(r#"漢字"#, parsed("漢字")); assert_eq!(r#"漢字"#, parsed("\"漢字\"")); assert_eq!(r#"漢\字"#, parsed("\"漢\\字\"")); assert_eq!(r#"unquoted"#, parsed("unquoted")); assert_eq!(r#""#, parsed("*")); assert_eq!(r#""#, parsed("?")); assert_eq!(r#"quoted"#, parsed("\"quoted\"")); assert_eq!(r#"quoted"#, unquoted("\"quoted\"")); assert_eq!(r#"*"#, unquoted("\"*\"")); assert_eq!(r#"?"#, unquoted("\"?\"")); assert_eq!(r#"]"#, unquoted("\"]\"")); assert_eq!(r#"quo"ted"#, parsed(r#" "quo\"ted" "#)); // backslash can escape quotes assert_eq!(r#""#, parsed(r#" "quo""ted?" "#)); // and quote can escape quotes assert_eq!(r#"unquo"ted"#, parsed(r#" unquo\"ted "#)); // backslash can escape quotes, even outside quotes assert_eq!(r#""#, parsed(r#" unquo""ted? "#)); // quote escaping does not work outside quotes assert_eq!(r#"""#, parsed(r#""""""#)); // quote escapes quote in quoted string assert_eq!(r#"""#, parsed(r#"""""""#)); assert_eq!(r#""""#, parsed(r#""""""""#)); assert_eq!(r#""""#, parsed(r#"""""""""#)); // """ == "X", """""" = "X""X" assert_eq!(r#""""#, parsed(r#""""""""""#)); assert_eq!(r#"""""#, parsed(r#"""""""""""#)); assert_eq!(r#"\\server\share\path with spaces"#, parsed(r#""\\server\share\path with spaces""#)); // lone double backslash is not special assert_eq!("aba", parsed(r#""a"b"a""#)); // quotes can go in and out assert_eq!("abac", parsed(r#""a"b"a"c"#)); // quotes can go in and out assert_eq!(r#"\\"#, parsed(r#"\\\\""#)); assert_eq!(r#""#, parsed(r#"?\\\\"?"#)); // unpaired quote is interpreted like an end quote assert_eq!(r#"\""#, parsed(r#"\\\""#)); assert_eq!(r#""#, parsed(r#"\\\"[a-z]"#)); assert_eq!(" ", parsed(r#"" "#)); // unterminated quotes are OK assert_eq!("", parsed(r#""""#)); assert_eq!(r#""#, parsed(r#"[a-c]""[d-z]"#)); assert_eq!("", parsed(r#"""#)); assert_eq!("x", parsed(r#"x""#)); assert_eq!(r#"\;x;y"#, parsed(r"\ x y")); assert_eq!(r#"\\;x;y"#, parsed(r"\\ x y")); assert_eq!(r#"a\\\;x;y"#, parsed(r"a\\\ x y")); assert_eq!(r#";x;y"#, parsed(r"a\\\* x y")); assert_eq!(r#"a\\\ x;y"#, parsed(r#""a\\\ x" y"#)); assert_eq!(r#"\"#, parsed(r"\")); assert_eq!(r#"\\"#, parsed(r"\\")); assert_eq!(r#"\\\"#, parsed(r"\\\")); assert_eq!(r#"\\\\"#, parsed(r"\\\\")); assert_eq!(r#"\\a"#, parsed(r#"\\\\"a"#)); assert_eq!(r#"\\a"#, parsed(r#"\\\\"a""#)); assert_eq!(r#"¥¥"#, parsed(r#"¥¥""#)); // in Unicode this isn't backslash assert_eq!(r#".\path\to\folder\;-rf"#, parsed(r#".\path\to\folder\ -rf"#)); } #[test] #[cfg(not(feature = "glob-quoted-on-windows"))] fn test_unquoted() { assert_eq!(r#"*"#, parsed("\"*\"")); assert_eq!(r#"?"#, parsed("\"?\"")); assert_eq!(r#"]"#, parsed("\"]\"")); assert_eq!("", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out assert_eq!(r#""#, parsed(r#""[a-c]""[d-z]""#)); } #[test] #[cfg(feature = "glob-quoted-on-windows")] fn test_unquoted() { assert_eq!(r#""#, parsed("\"*\"")); assert_eq!(r#""#, parsed("\"?\"")); assert_eq!(r#""#, parsed("\"]\"")); assert_eq!("", parsed(r#"c*"a*"b*"a*"c*"#)); // quotes can go in and out assert_eq!(r#""#, parsed(r#""[a-c]""[d-z]""#)); } #[test] fn test_parse_multi() { assert_eq!(r#"unquoted;quoted"#, parsed("unquoted \"quoted\"")); assert_eq!(r#"quo"ted;quo"ted "#, parsed(r#" "quo\"ted" "quo""ted" "#)); assert_eq!(r#"unquo"ted;""#, parsed(r#" unquo\"ted """"""#)); assert_eq!(r#"a;a"#, parsed(r#"a"" a"#)); assert_eq!(r#"a";a"#, parsed(r#"a""" a"#)); assert_eq!(r#"\\;\""#, parsed(r#"\\\\" \\\" "#)); assert_eq!("x; ", parsed(r#" x " "#)); } wild-2.2.1/src/parser.rs000064400000000000000000000133531046102023000132230ustar 00000000000000use std::fmt; /// An experimental, low-level access to each individual character of raw arguments. #[must_use] pub struct CommandLineWParser<'argsline> { line: std::slice::Iter<'argsline, u16>, } impl<'argsline> CommandLineWParser<'argsline> { #[inline] #[must_use] pub fn new(command_line_args_ucs2: &'argsline [u16]) -> Self { Self { line: command_line_args_ucs2.iter(), } } } impl<'a> fmt::Debug for CommandLineWParser<'a> { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { String::from_utf16_lossy(self.line.as_slice()).fmt(f) } } #[derive(Debug)] enum State { BetweenArgs, InArg(bool), OnQuote, /// number + in quotes Backslashes(usize, bool), } /// A single code unit, which may be UCS-2 or half-broken UTF-16. Not a character. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CharCode { /// This code unit was found inside quotes (it's just text) Quoted(u16), /// This code unit was found outside quotes (you could interpret it as a glob) Unquoted(u16), } const SPACE: u16 = b' ' as u16; const TAB: u16 = b'\t' as u16; const QUOTE: u16 = b'"' as u16; const BACKSLASH: u16 = b'\\' as u16; /// Given UCS2/potentially-broken-UTF-16 string parses one argument, following /// the absolutely bizarre quoting rules of `CommandLineToArgvW`, and returns /// a bool indicating whether there's anything more left. /// /// Calling this repeatedly until it returns false will parse all arguments. /// /// The callback is expected to accumulate code units itself. /// /// This parses u16 code units, rather than code points. /// This allows supporting unpaired surrogates and ensures they won't "eat" any control characters. impl<'argsline> CommandLineWParser<'argsline> { pub fn accumulate_next(&mut self, mut push: CharacterAccumulator) -> bool where CharacterAccumulator: FnMut(CharCode) { use self::State::*; let mut state = BetweenArgs; for &cu in &mut self.line { state = match state { BetweenArgs => match cu { SPACE | TAB => BetweenArgs, QUOTE => InArg(true), BACKSLASH => Backslashes(1, false), c => { push(CharCode::Unquoted(c)); InArg(false) }, }, InArg(quoted) => match cu { BACKSLASH => Backslashes(1, quoted), QUOTE if quoted => OnQuote, QUOTE if !quoted => InArg(true), SPACE | TAB if !quoted => { return true; }, c => { push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); InArg(quoted) }, }, OnQuote => match cu { QUOTE => { // In quoted arg "" means literal quote and the end of the quoted string (but not arg) push(CharCode::Quoted(QUOTE)); InArg(false) }, SPACE | TAB => { return true; }, c => { push(CharCode::Unquoted(c)); InArg(false) }, }, Backslashes(count, quoted) => match cu { BACKSLASH => Backslashes(count + 1, quoted), QUOTE => { // backslashes followed by a quotation mark are treated as pairs of protected backslashes let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count/2 { push(b); } if count & 1 != 0 { // An odd number of backslashes is treated as followed by a protected quotation mark. push(if quoted { CharCode::Quoted(QUOTE) } else { CharCode::Unquoted(QUOTE) }); InArg(quoted) } else if quoted { // An even number of backslashes is treated as followed by a word terminator. return true; } else { InArg(quoted) } }, c => { // A string of backslashes not followed by a quotation mark has no special meaning. let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count { push(b); } match c { SPACE | TAB if !quoted => return true, c => { push(if quoted { CharCode::Quoted(c) } else { CharCode::Unquoted(c) }); InArg(quoted) }, } }, }, }; } match state { BetweenArgs => false, OnQuote | InArg(..) => true, Backslashes(count, quoted) => { // A string of backslashes not followed by a quotation mark has no special meaning. let b = if quoted { CharCode::Quoted(BACKSLASH) } else { CharCode::Unquoted(BACKSLASH) }; for _ in 0..count { push(b); } true }, } } }