ignore-0.4.18/.cargo_vcs_info.json0000644000000001120000000000100124030ustar { "git": { "sha1": "14860b0f16ce9cab3c8bb9205a8268b5be22dc30" } } ignore-0.4.18/COPYING000064400000000000000000000001760072674642500122700ustar 00000000000000This project is dual-licensed under the Unlicense and MIT licenses. You may use this code under the terms of either license. ignore-0.4.18/Cargo.lock0000644000000112570000000000100103720ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" dependencies = [ "memchr", ] [[package]] name = "bstr" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279" dependencies = [ "memchr", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "crossbeam-channel" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ "cfg-if", "lazy_static", ] [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "globset" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0fc1b9fa0e64ffb1aa5b95daa0f0f167734fd528b7c02eabc581d9d843649b1" dependencies = [ "aho-corasick", "bstr", "fnv", "log", "regex", ] [[package]] name = "ignore" version = "0.4.18" dependencies = [ "crossbeam-channel", "crossbeam-utils", "globset", "lazy_static", "log", "memchr", "regex", "same-file", "thread_local", "walkdir", "winapi-util", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "log" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ "cfg-if", ] [[package]] name = "memchr" version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "once_cell" version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" [[package]] name = "regex" version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "thread_local" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" dependencies = [ "once_cell", ] [[package]] name = "walkdir" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", "winapi", "winapi-util", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" ignore-0.4.18/Cargo.toml0000644000000031510000000000100104070ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "ignore" version = "0.4.18" authors = ["Andrew Gallant "] description = "A fast library for efficiently matching ignore files such as `.gitignore`\nagainst file paths.\n" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" documentation = "https://docs.rs/ignore" readme = "README.md" keywords = ["glob", "ignore", "gitignore", "pattern", "file"] license = "Unlicense/MIT" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" [lib] name = "ignore" bench = false [dependencies.crossbeam-utils] version = "0.8.0" [dependencies.globset] version = "0.4.7" [dependencies.lazy_static] version = "1.1" [dependencies.log] version = "0.4.5" [dependencies.memchr] version = "2.1" [dependencies.regex] version = "1.1" [dependencies.same-file] version = "1.0.4" [dependencies.thread_local] version = "1" [dependencies.walkdir] version = "2.2.7" [dev-dependencies.crossbeam-channel] version = "0.5.0" [features] simd-accel = ["globset/simd-accel"] [target."cfg(windows)".dependencies.winapi-util] version = "0.1.2" ignore-0.4.18/Cargo.toml.orig000064400000000000000000000017000072674642500141160ustar 00000000000000[package] name = "ignore" version = "0.4.18" #:version authors = ["Andrew Gallant "] description = """ A fast library for efficiently matching ignore files such as `.gitignore` against file paths. """ documentation = "https://docs.rs/ignore" homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore" readme = "README.md" keywords = ["glob", "ignore", "gitignore", "pattern", "file"] license = "Unlicense/MIT" edition = "2018" [lib] name = "ignore" bench = false [dependencies] crossbeam-utils = "0.8.0" globset = { version = "0.4.7", path = "../globset" } lazy_static = "1.1" log = "0.4.5" memchr = "2.1" regex = "1.1" same-file = "1.0.4" thread_local = "1" walkdir = "2.2.7" [target.'cfg(windows)'.dependencies.winapi-util] version = "0.1.2" [dev-dependencies] crossbeam-channel = "0.5.0" [features] simd-accel = ["globset/simd-accel"] ignore-0.4.18/LICENSE-MIT000064400000000000000000000020710072674642500126650ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Andrew Gallant Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ignore-0.4.18/README.md000064400000000000000000000031650072674642500125150ustar 00000000000000ignore ====== The ignore crate provides a fast recursive directory iterator that respects various filters such as globs, file types and `.gitignore` files. This crate also provides lower level direct access to gitignore and file type matchers. [![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions) [![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/). ### Documentation [https://docs.rs/ignore](https://docs.rs/ignore) ### Usage Add this to your `Cargo.toml`: ```toml [dependencies] ignore = "0.4" ``` ### Example This example shows the most basic usage of this crate. This code will recursively traverse the current directory while automatically filtering out files and directories according to ignore globs found in files like `.ignore` and `.gitignore`: ```rust,no_run use ignore::Walk; for result in Walk::new("./") { // Each item yielded by the iterator is either a directory entry or an // error, so either print the path or the error. match result { Ok(entry) => println!("{}", entry.path().display()), Err(err) => println!("ERROR: {}", err), } } ``` ### Example: advanced By default, the recursive directory iterator will ignore hidden files and directories. This can be disabled by building the iterator with `WalkBuilder`: ```rust,no_run use ignore::WalkBuilder; for result in WalkBuilder::new("./").hidden(false).build() { println!("{:?}", result); } ``` See the documentation for `WalkBuilder` for many other options. ignore-0.4.18/UNLICENSE000064400000000000000000000022730072674642500125050ustar 00000000000000This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to ignore-0.4.18/examples/walk.rs000064400000000000000000000037760072674642500143700ustar 00000000000000use std::env; use std::io::{self, Write}; use std::path::Path; use std::thread; use ignore::WalkBuilder; use walkdir::WalkDir; fn main() { let mut path = env::args().nth(1).unwrap(); let mut parallel = false; let mut simple = false; let (tx, rx) = crossbeam_channel::bounded::(100); if path == "parallel" { path = env::args().nth(2).unwrap(); parallel = true; } else if path == "walkdir" { path = env::args().nth(2).unwrap(); simple = true; } let stdout_thread = thread::spawn(move || { let mut stdout = io::BufWriter::new(io::stdout()); for dent in rx { write_path(&mut stdout, dent.path()); } }); if parallel { let walker = WalkBuilder::new(path).threads(6).build_parallel(); walker.run(|| { let tx = tx.clone(); Box::new(move |result| { use ignore::WalkState::*; tx.send(DirEntry::Y(result.unwrap())).unwrap(); Continue }) }); } else if simple { let walker = WalkDir::new(path); for result in walker { tx.send(DirEntry::X(result.unwrap())).unwrap(); } } else { let walker = WalkBuilder::new(path).build(); for result in walker { tx.send(DirEntry::Y(result.unwrap())).unwrap(); } } drop(tx); stdout_thread.join().unwrap(); } enum DirEntry { X(walkdir::DirEntry), Y(ignore::DirEntry), } impl DirEntry { fn path(&self) -> &Path { match *self { DirEntry::X(ref x) => x.path(), DirEntry::Y(ref y) => y.path(), } } } #[cfg(unix)] fn write_path(mut wtr: W, path: &Path) { use std::os::unix::ffi::OsStrExt; wtr.write(path.as_os_str().as_bytes()).unwrap(); wtr.write(b"\n").unwrap(); } #[cfg(not(unix))] fn write_path(mut wtr: W, path: &Path) { wtr.write(path.to_string_lossy().as_bytes()).unwrap(); wtr.write(b"\n").unwrap(); } ignore-0.4.18/src/default_types.rs000064400000000000000000000220730072674642500152420ustar 00000000000000/// This list represents the default file types that ripgrep ships with. In /// general, any file format is fair game, although it should generally be /// limited to reasonably popular open formats. For other cases, you can add /// types to each invocation of ripgrep with the '--type-add' flag. /// /// If you would like to add or improve this list, please file a PR: /// . /// /// Please try to keep this list sorted lexicographically and wrapped to 79 /// columns (inclusive). #[rustfmt::skip] pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[ ("agda", &["*.agda", "*.lagda"]), ("aidl", &["*.aidl"]), ("amake", &["*.mk", "*.bp"]), ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]), ("asm", &["*.asm", "*.s", "*.S"]), ("asp", &[ "*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs", "*.ascx.vb", ]), ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]), ("avro", &["*.avdl", "*.avpr", "*.avsc"]), ("awk", &["*.awk"]), ("bazel", &["*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "WORKSPACE"]), ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), ("brotli", &["*.br"]), ("buildstream", &["*.bst"]), ("bzip2", &["*.bz2", "*.tbz2"]), ("c", &["*.[chH]", "*.[chH].in", "*.cats"]), ("cabal", &["*.cabal"]), ("cbor", &["*.cbor"]), ("ceylon", &["*.ceylon"]), ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), ("cmake", &["*.cmake", "CMakeLists.txt"]), ("coffeescript", &["*.coffee"]), ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]), ("coq", &["*.v"]), ("cpp", &[ "*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl", "*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in", ]), ("creole", &["*.creole"]), ("crystal", &["Projectfile", "*.cr"]), ("cs", &["*.cs"]), ("csharp", &["*.cs"]), ("cshtml", &["*.cshtml"]), ("css", &["*.css", "*.scss"]), ("csv", &["*.csv"]), ("cython", &["*.pyx", "*.pxi", "*.pxd"]), ("d", &["*.d"]), ("dart", &["*.dart"]), ("dhall", &["*.dhall"]), ("diff", &["*.patch", "*.diff"]), ("docker", &["*Dockerfile*"]), ("dvc", &["Dvcfile", "*.dvc"]), ("ebuild", &["*.ebuild"]), ("edn", &["*.edn"]), ("elisp", &["*.el"]), ("elixir", &["*.ex", "*.eex", "*.exs"]), ("elm", &["*.elm"]), ("erb", &["*.erb"]), ("erlang", &["*.erl", "*.hrl"]), ("fidl", &["*.fidl"]), ("fish", &["*.fish"]), ("flatbuffers", &["*.fbs"]), ("fortran", &[ "*.f", "*.F", "*.f77", "*.F77", "*.pfo", "*.f90", "*.F90", "*.f95", "*.F95", ]), ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), ("fut", &[".fut"]), ("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]), ("gn", &["*.gn", "*.gni"]), ("go", &["*.go"]), ("gradle", &["*.gradle"]), ("groovy", &["*.groovy", "*.gradle"]), ("gzip", &["*.gz", "*.tgz"]), ("h", &["*.h", "*.hpp"]), ("haml", &["*.haml"]), ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]), ("hbs", &["*.hbs"]), ("hs", &["*.hs", "*.lhs"]), ("html", &["*.htm", "*.html", "*.ejs"]), ("idris", &["*.idr", "*.lidr"]), ("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]), ("jinja", &["*.j2", "*.jinja", "*.jinja2"]), ("jl", &["*.jl"]), ("js", &["*.js", "*.jsx", "*.vue"]), ("json", &["*.json", "composer.lock"]), ("jsonl", &["*.jsonl"]), ("julia", &["*.jl"]), ("jupyter", &["*.ipynb", "*.jpynb"]), ("k", &["*.k"]), ("kotlin", &["*.kt", "*.kts"]), ("less", &["*.less"]), ("license", &[ // General "COPYING", "COPYING[.-]*", "COPYRIGHT", "COPYRIGHT[.-]*", "EULA", "EULA[.-]*", "licen[cs]e", "licen[cs]e.*", "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*", "NOTICE", "NOTICE[.-]*", "PATENTS", "PATENTS[.-]*", "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*", // GPL (gpl.txt, etc.) "agpl[.-]*", "gpl[.-]*", "lgpl[.-]*", // Other license-specific (APACHE-2.0.txt, etc.) "AGPL-*[0-9]*", "APACHE-*[0-9]*", "BSD-*[0-9]*", "CC-BY-*", "GFDL-*[0-9]*", "GNU-*[0-9]*", "GPL-*[0-9]*", "LGPL-*[0-9]*", "MIT-*[0-9]*", "MPL-*[0-9]*", "OFL-*[0-9]*", ]), ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), ("lock", &["*.lock", "package-lock.json"]), ("log", &["*.log"]), ("lua", &["*.lua"]), ("lz4", &["*.lz4"]), ("lzma", &["*.lzma"]), ("m4", &["*.ac", "*.m4"]), ("make", &[ "[Gg][Nn][Uu]makefile", "[Mm]akefile", "[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am", "[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in", "*.mk", "*.mak" ]), ("mako", &["*.mako", "*.mao"]), ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]), ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), ("matlab", &["*.m"]), ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]), ("meson", &["meson.build", "meson_options.txt"]), ("minified", &["*.min.html", "*.min.css", "*.min.js"]), ("mint", &["*.mint"]), ("mk", &["mkfile"]), ("ml", &["*.ml"]), ("msbuild", &[ "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets", ]), ("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]), ("nix", &["*.nix"]), ("objc", &["*.h", "*.m"]), ("objcpp", &["*.h", "*.mm"]), ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), ("org", &["*.org", "*.org_archive"]), ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]), ("pdf", &["*.pdf"]), ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]), ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), ("po", &["*.po"]), ("pod", &["*.pod"]), ("postscript", &["*.eps", "*.ps"]), ("protobuf", &["*.proto"]), ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]), ("puppet", &["*.erb", "*.pp", "*.rb"]), ("purs", &["*.purs"]), ("py", &["*.py"]), ("qmake", &["*.pro", "*.pri", "*.prf"]), ("qml", &["*.qml"]), ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), ("racket", &["*.rkt"]), ("rdoc", &["*.rdoc"]), ("readme", &["README*", "*README"]), ("red", &["*.r", "*.red", "*.reds"]), ("robot", &["*.robot"]), ("rst", &["*.rst"]), ("ruby", &[ // Idiomatic files "config.ru", "Gemfile", ".irbrc", "Rakefile", // Extensions "*.gemspec", "*.rb", "*.rbw" ]), ("rust", &["*.rs"]), ("sass", &["*.sass", "*.scss"]), ("scala", &["*.scala", "*.sbt"]), ("sh", &[ // Portable/misc. init files ".login", ".logout", ".profile", "profile", // bash-specific init files ".bash_login", "bash_login", ".bash_logout", "bash_logout", ".bash_profile", "bash_profile", ".bashrc", "bashrc", "*.bashrc", // csh-specific init files ".cshrc", "*.cshrc", // ksh-specific init files ".kshrc", "*.kshrc", // tcsh-specific init files ".tcshrc", // zsh-specific init files ".zshenv", "zshenv", ".zlogin", "zlogin", ".zlogout", "zlogout", ".zprofile", "zprofile", ".zshrc", "zshrc", // Extensions "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh", ]), ("slim", &["*.skim", "*.slim", "*.slime"]), ("smarty", &["*.tpl"]), ("sml", &["*.sml", "*.sig"]), ("soy", &["*.soy"]), ("spark", &["*.spark"]), ("spec", &["*.spec"]), ("sql", &["*.sql", "*.psql"]), ("stylus", &["*.styl"]), ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), ("svg", &["*.svg"]), ("swift", &["*.swift"]), ("swig", &["*.def", "*.i"]), ("systemd", &[ "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path", "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target", "*.timer", ]), ("taskpaper", &["*.taskpaper"]), ("tcl", &["*.tcl"]), ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]), ("textile", &["*.textile"]), ("tf", &["*.tf"]), ("thrift", &["*.thrift"]), ("toml", &["*.toml", "Cargo.lock"]), ("ts", &["*.ts", "*.tsx"]), ("twig", &["*.twig"]), ("txt", &["*.txt"]), ("typoscript", &["*.typoscript", "*.ts"]), ("vala", &["*.vala"]), ("vb", &["*.vb"]), ("vcl", &["*.vcl"]), ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]), ("vhdl", &["*.vhd", "*.vhdl"]), ("vim", &["*.vim"]), ("vimscript", &["*.vim"]), ("webidl", &["*.idl", "*.webidl", "*.widl"]), ("wiki", &["*.mediawiki", "*.wiki"]), ("xml", &[ "*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb", "*.rng", "*.sch", "*.xhtml", ]), ("xz", &["*.xz", "*.txz"]), ("yacc", &["*.y"]), ("yaml", &["*.yaml", "*.yml"]), ("yang", &["*.yang"]), ("z", &["*.Z"]), ("zig", &["*.zig"]), ("zsh", &[ ".zshenv", "zshenv", ".zlogin", "zlogin", ".zlogout", "zlogout", ".zprofile", "zprofile", ".zshrc", "zshrc", "*.zsh", ]), ("zstd", &["*.zst", "*.zstd"]), ]; ignore-0.4.18/src/dir.rs000064400000000000000000001205470072674642500131550ustar 00000000000000// This module provides a data structure, `Ignore`, that connects "directory // traversal" with "ignore matchers." Specifically, it knows about gitignore // semantics and precedence, and is organized based on directory hierarchy. // Namely, every matcher logically corresponds to ignore rules from a single // directory, and points to the matcher for its corresponding parent directory. // In this sense, `Ignore` is a *persistent* data structure. // // This design was specifically chosen to make it possible to use this data // structure in a parallel directory iterator. // // My initial intention was to expose this module as part of this crate's // public API, but I think the data structure's public API is too complicated // with non-obvious failure modes. Alas, such things haven't been documented // well. use std::collections::HashMap; use std::ffi::{OsStr, OsString}; use std::fs::{File, FileType}; use std::io::{self, BufRead}; use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; use crate::gitignore::{self, Gitignore, GitignoreBuilder}; use crate::overrides::{self, Override}; use crate::pathutil::{is_hidden, strip_prefix}; use crate::types::{self, Types}; use crate::walk::DirEntry; use crate::{Error, Match, PartialErrorBuilder}; /// IgnoreMatch represents information about where a match came from when using /// the `Ignore` matcher. #[derive(Clone, Debug)] pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>); /// IgnoreMatchInner describes precisely where the match information came from. /// This is private to allow expansion to more matchers in the future. #[derive(Clone, Debug)] enum IgnoreMatchInner<'a> { Override(overrides::Glob<'a>), Gitignore(&'a gitignore::Glob), Types(types::Glob<'a>), Hidden, } impl<'a> IgnoreMatch<'a> { fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> { IgnoreMatch(IgnoreMatchInner::Override(x)) } fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> { IgnoreMatch(IgnoreMatchInner::Gitignore(x)) } fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> { IgnoreMatch(IgnoreMatchInner::Types(x)) } fn hidden() -> IgnoreMatch<'static> { IgnoreMatch(IgnoreMatchInner::Hidden) } } /// Options for the ignore matcher, shared between the matcher itself and the /// builder. #[derive(Clone, Copy, Debug)] struct IgnoreOptions { /// Whether to ignore hidden file paths or not. hidden: bool, /// Whether to read .ignore files. ignore: bool, /// Whether to respect any ignore files in parent directories. parents: bool, /// Whether to read git's global gitignore file. git_global: bool, /// Whether to read .gitignore files. git_ignore: bool, /// Whether to read .git/info/exclude files. git_exclude: bool, /// Whether to ignore files case insensitively ignore_case_insensitive: bool, /// Whether a git repository must be present in order to apply any /// git-related ignore rules. require_git: bool, } /// Ignore is a matcher useful for recursively walking one or more directories. #[derive(Clone, Debug)] pub struct Ignore(Arc); #[derive(Clone, Debug)] struct IgnoreInner { /// A map of all existing directories that have already been /// compiled into matchers. /// /// Note that this is never used during matching, only when adding new /// parent directory matchers. This avoids needing to rebuild glob sets for /// parent directories if many paths are being searched. compiled: Arc>>, /// The path to the directory that this matcher was built from. dir: PathBuf, /// An override matcher (default is empty). overrides: Arc, /// A file type matcher. types: Arc, /// The parent directory to match next. /// /// If this is the root directory or there are otherwise no more /// directories to match, then `parent` is `None`. parent: Option, /// Whether this is an absolute parent matcher, as added by add_parent. is_absolute_parent: bool, /// The absolute base path of this matcher. Populated only if parent /// directories are added. absolute_base: Option>, /// Explicit global ignore matchers specified by the caller. explicit_ignores: Arc>, /// Ignore files used in addition to `.ignore` custom_ignore_filenames: Arc>, /// The matcher for custom ignore files custom_ignore_matcher: Gitignore, /// The matcher for .ignore files. ignore_matcher: Gitignore, /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. git_global_matcher: Arc, /// The matcher for .gitignore files. git_ignore_matcher: Gitignore, /// Special matcher for `.git/info/exclude` files. git_exclude_matcher: Gitignore, /// Whether this directory contains a .git sub-directory. has_git: bool, /// Ignore config. opts: IgnoreOptions, } impl Ignore { /// Return the directory path of this matcher. pub fn path(&self) -> &Path { &self.0.dir } /// Return true if this matcher has no parent. pub fn is_root(&self) -> bool { self.0.parent.is_none() } /// Returns true if this matcher was added via the `add_parents` method. pub fn is_absolute_parent(&self) -> bool { self.0.is_absolute_parent } /// Return this matcher's parent, if one exists. pub fn parent(&self) -> Option { self.0.parent.clone() } /// Create a new `Ignore` matcher with the parent directories of `dir`. /// /// Note that this can only be called on an `Ignore` matcher with no /// parents (i.e., `is_root` returns `true`). This will panic otherwise. pub fn add_parents>( &self, path: P, ) -> (Ignore, Option) { if !self.0.opts.parents && !self.0.opts.git_ignore && !self.0.opts.git_exclude && !self.0.opts.git_global { // If we never need info from parent directories, then don't do // anything. return (self.clone(), None); } if !self.is_root() { panic!("Ignore::add_parents called on non-root matcher"); } let absolute_base = match path.as_ref().canonicalize() { Ok(path) => Arc::new(path), Err(_) => { // There's not much we can do here, so just return our // existing matcher. We drop the error to be consistent // with our general pattern of ignoring I/O errors when // processing ignore files. return (self.clone(), None); } }; // List of parents, from child to root. let mut parents = vec![]; let mut path = &**absolute_base; while let Some(parent) = path.parent() { parents.push(parent); path = parent; } let mut errs = PartialErrorBuilder::default(); let mut ig = self.clone(); for parent in parents.into_iter().rev() { let mut compiled = self.0.compiled.write().unwrap(); if let Some(prebuilt) = compiled.get(parent.as_os_str()) { ig = prebuilt.clone(); continue; } let (mut igtmp, err) = ig.add_child_path(parent); errs.maybe_push(err); igtmp.is_absolute_parent = true; igtmp.absolute_base = Some(absolute_base.clone()); igtmp.has_git = if self.0.opts.git_ignore { parent.join(".git").exists() } else { false }; ig = Ignore(Arc::new(igtmp)); compiled.insert(parent.as_os_str().to_os_string(), ig.clone()); } (ig, errs.into_error_option()) } /// Create a new `Ignore` matcher for the given child directory. /// /// Since building the matcher may require reading from multiple /// files, it's possible that this method partially succeeds. Therefore, /// a matcher is always returned (which may match nothing) and an error is /// returned if it exists. /// /// Note that all I/O errors are completely ignored. pub fn add_child>( &self, dir: P, ) -> (Ignore, Option) { let (ig, err) = self.add_child_path(dir.as_ref()); (Ignore(Arc::new(ig)), err) } /// Like add_child, but takes a full path and returns an IgnoreInner. fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { let git_type = if self.0.opts.git_ignore || self.0.opts.git_exclude { dir.join(".git").metadata().ok().map(|md| md.file_type()) } else { None }; let has_git = git_type.map(|_| true).unwrap_or(false); let mut errs = PartialErrorBuilder::default(); let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() { Gitignore::empty() } else { let (m, err) = create_gitignore( &dir, &dir, &self.0.custom_ignore_filenames, self.0.opts.ignore_case_insensitive, ); errs.maybe_push(err); m }; let ig_matcher = if !self.0.opts.ignore { Gitignore::empty() } else { let (m, err) = create_gitignore( &dir, &dir, &[".ignore"], self.0.opts.ignore_case_insensitive, ); errs.maybe_push(err); m }; let gi_matcher = if !self.0.opts.git_ignore { Gitignore::empty() } else { let (m, err) = create_gitignore( &dir, &dir, &[".gitignore"], self.0.opts.ignore_case_insensitive, ); errs.maybe_push(err); m }; let gi_exclude_matcher = if !self.0.opts.git_exclude { Gitignore::empty() } else { match resolve_git_commondir(dir, git_type) { Ok(git_dir) => { let (m, err) = create_gitignore( &dir, &git_dir, &["info/exclude"], self.0.opts.ignore_case_insensitive, ); errs.maybe_push(err); m } Err(err) => { errs.maybe_push(err); Gitignore::empty() } } }; let ig = IgnoreInner { compiled: self.0.compiled.clone(), dir: dir.to_path_buf(), overrides: self.0.overrides.clone(), types: self.0.types.clone(), parent: Some(self.clone()), is_absolute_parent: false, absolute_base: self.0.absolute_base.clone(), explicit_ignores: self.0.explicit_ignores.clone(), custom_ignore_filenames: self.0.custom_ignore_filenames.clone(), custom_ignore_matcher: custom_ig_matcher, ignore_matcher: ig_matcher, git_global_matcher: self.0.git_global_matcher.clone(), git_ignore_matcher: gi_matcher, git_exclude_matcher: gi_exclude_matcher, has_git, opts: self.0.opts, }; (ig, errs.into_error_option()) } /// Returns true if at least one type of ignore rule should be matched. fn has_any_ignore_rules(&self) -> bool { let opts = self.0.opts; let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty(); let has_explicit_ignores = !self.0.explicit_ignores.is_empty(); opts.ignore || opts.git_global || opts.git_ignore || opts.git_exclude || has_custom_ignore_files || has_explicit_ignores } /// Like `matched`, but works with a directory entry instead. pub fn matched_dir_entry<'a>( &'a self, dent: &DirEntry, ) -> Match> { let m = self.matched(dent.path(), dent.is_dir()); if m.is_none() && self.0.opts.hidden && is_hidden(dent) { return Match::Ignore(IgnoreMatch::hidden()); } m } /// Returns a match indicating whether the given file path should be /// ignored or not. /// /// The match contains information about its origin. fn matched<'a, P: AsRef>( &'a self, path: P, is_dir: bool, ) -> Match> { // We need to be careful with our path. If it has a leading ./, then // strip it because it causes nothing but trouble. let mut path = path.as_ref(); if let Some(p) = strip_prefix("./", path) { path = p; } // Match against the override patterns. If an override matches // regardless of whether it's whitelist/ignore, then we quit and // return that result immediately. Overrides have the highest // precedence. if !self.0.overrides.is_empty() { let mat = self .0 .overrides .matched(path, is_dir) .map(IgnoreMatch::overrides); if !mat.is_none() { return mat; } } let mut whitelisted = Match::None; if self.has_any_ignore_rules() { let mat = self.matched_ignore(path, is_dir); if mat.is_ignore() { return mat; } else if mat.is_whitelist() { whitelisted = mat; } } if !self.0.types.is_empty() { let mat = self.0.types.matched(path, is_dir).map(IgnoreMatch::types); if mat.is_ignore() { return mat; } else if mat.is_whitelist() { whitelisted = mat; } } whitelisted } /// Performs matching only on the ignore files for this directory and /// all parent directories. fn matched_ignore<'a>( &'a self, path: &Path, is_dir: bool, ) -> Match> { let ( mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit, ) = (Match::None, Match::None, Match::None, Match::None, Match::None); let any_git = !self.0.opts.require_git || self.parents().any(|ig| ig.0.has_git); let mut saw_git = false; for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { if m_custom_ignore.is_none() { m_custom_ignore = ig.0.custom_ignore_matcher .matched(path, is_dir) .map(IgnoreMatch::gitignore); } if m_ignore.is_none() { m_ignore = ig.0.ignore_matcher .matched(path, is_dir) .map(IgnoreMatch::gitignore); } if any_git && !saw_git && m_gi.is_none() { m_gi = ig.0.git_ignore_matcher .matched(path, is_dir) .map(IgnoreMatch::gitignore); } if any_git && !saw_git && m_gi_exclude.is_none() { m_gi_exclude = ig.0.git_exclude_matcher .matched(path, is_dir) .map(IgnoreMatch::gitignore); } saw_git = saw_git || ig.0.has_git; } if self.0.opts.parents { if let Some(abs_parent_path) = self.absolute_base() { let path = abs_parent_path.join(path); for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) { if m_custom_ignore.is_none() { m_custom_ignore = ig.0.custom_ignore_matcher .matched(&path, is_dir) .map(IgnoreMatch::gitignore); } if m_ignore.is_none() { m_ignore = ig.0.ignore_matcher .matched(&path, is_dir) .map(IgnoreMatch::gitignore); } if any_git && !saw_git && m_gi.is_none() { m_gi = ig.0.git_ignore_matcher .matched(&path, is_dir) .map(IgnoreMatch::gitignore); } if any_git && !saw_git && m_gi_exclude.is_none() { m_gi_exclude = ig.0.git_exclude_matcher .matched(&path, is_dir) .map(IgnoreMatch::gitignore); } saw_git = saw_git || ig.0.has_git; } } } for gi in self.0.explicit_ignores.iter().rev() { if !m_explicit.is_none() { break; } m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); } let m_global = if any_git { self.0 .git_global_matcher .matched(&path, is_dir) .map(IgnoreMatch::gitignore) } else { Match::None }; m_custom_ignore .or(m_ignore) .or(m_gi) .or(m_gi_exclude) .or(m_global) .or(m_explicit) } /// Returns an iterator over parent ignore matchers, including this one. pub fn parents(&self) -> Parents<'_> { Parents(Some(self)) } /// Returns the first absolute path of the first absolute parent, if /// one exists. fn absolute_base(&self) -> Option<&Path> { self.0.absolute_base.as_ref().map(|p| &***p) } } /// An iterator over all parents of an ignore matcher, including itself. /// /// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher. pub struct Parents<'a>(Option<&'a Ignore>); impl<'a> Iterator for Parents<'a> { type Item = &'a Ignore; fn next(&mut self) -> Option<&'a Ignore> { match self.0.take() { None => None, Some(ig) => { self.0 = ig.0.parent.as_ref(); Some(ig) } } } } /// A builder for creating an Ignore matcher. #[derive(Clone, Debug)] pub struct IgnoreBuilder { /// The root directory path for this ignore matcher. dir: PathBuf, /// An override matcher (default is empty). overrides: Arc, /// A type matcher (default is empty). types: Arc, /// Explicit global ignore matchers. explicit_ignores: Vec, /// Ignore files in addition to .ignore. custom_ignore_filenames: Vec, /// Ignore config. opts: IgnoreOptions, } impl IgnoreBuilder { /// Create a new builder for an `Ignore` matcher. /// /// All relative file paths are resolved with respect to the current /// working directory. pub fn new() -> IgnoreBuilder { IgnoreBuilder { dir: Path::new("").to_path_buf(), overrides: Arc::new(Override::empty()), types: Arc::new(Types::empty()), explicit_ignores: vec![], custom_ignore_filenames: vec![], opts: IgnoreOptions { hidden: true, ignore: true, parents: true, git_global: true, git_ignore: true, git_exclude: true, ignore_case_insensitive: false, require_git: true, }, } } /// Builds a new `Ignore` matcher. /// /// The matcher returned won't match anything until ignore rules from /// directories are added to it. pub fn build(&self) -> Ignore { let git_global_matcher = if !self.opts.git_global { Gitignore::empty() } else { let mut builder = GitignoreBuilder::new(""); builder .case_insensitive(self.opts.ignore_case_insensitive) .unwrap(); let (gi, err) = builder.build_global(); if let Some(err) = err { log::debug!("{}", err); } gi }; Ignore(Arc::new(IgnoreInner { compiled: Arc::new(RwLock::new(HashMap::new())), dir: self.dir.clone(), overrides: self.overrides.clone(), types: self.types.clone(), parent: None, is_absolute_parent: true, absolute_base: None, explicit_ignores: Arc::new(self.explicit_ignores.clone()), custom_ignore_filenames: Arc::new( self.custom_ignore_filenames.clone(), ), custom_ignore_matcher: Gitignore::empty(), ignore_matcher: Gitignore::empty(), git_global_matcher: Arc::new(git_global_matcher), git_ignore_matcher: Gitignore::empty(), git_exclude_matcher: Gitignore::empty(), has_git: false, opts: self.opts, })) } /// Add an override matcher. /// /// By default, no override matcher is used. /// /// This overrides any previous setting. pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder { self.overrides = Arc::new(overrides); self } /// Add a file type matcher. /// /// By default, no file type matcher is used. /// /// This overrides any previous setting. pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder { self.types = Arc::new(types); self } /// Adds a new global ignore matcher from the ignore file path given. pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder { self.explicit_ignores.push(ig); self } /// Add a custom ignore file name /// /// These ignore files have higher precedence than all other ignore files. /// /// When specifying multiple names, earlier names have lower precedence than /// later names. pub fn add_custom_ignore_filename>( &mut self, file_name: S, ) -> &mut IgnoreBuilder { self.custom_ignore_filenames.push(file_name.as_ref().to_os_string()); self } /// Enables ignoring hidden files. /// /// This is enabled by default. pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.hidden = yes; self } /// Enables reading `.ignore` files. /// /// `.ignore` files have the same semantics as `gitignore` files and are /// supported by search tools such as ripgrep and The Silver Searcher. /// /// This is enabled by default. pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.ignore = yes; self } /// Enables reading ignore files from parent directories. /// /// If this is enabled, then .gitignore files in parent directories of each /// file path given are respected. Otherwise, they are ignored. /// /// This is enabled by default. pub fn parents(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.parents = yes; self } /// Add a global gitignore matcher. /// /// Its precedence is lower than both normal `.gitignore` files and /// `.git/info/exclude` files. /// /// This overwrites any previous global gitignore setting. /// /// This is enabled by default. pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.git_global = yes; self } /// Enables reading `.gitignore` files. /// /// `.gitignore` files have match semantics as described in the `gitignore` /// man page. /// /// This is enabled by default. pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.git_ignore = yes; self } /// Enables reading `.git/info/exclude` files. /// /// `.git/info/exclude` files have match semantics as described in the /// `gitignore` man page. /// /// This is enabled by default. pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.git_exclude = yes; self } /// Whether a git repository is required to apply git-related ignore /// rules (global rules, .gitignore and local exclude rules). /// /// When disabled, git-related ignore rules are applied even when searching /// outside a git repository. pub fn require_git(&mut self, yes: bool) -> &mut IgnoreBuilder { self.opts.require_git = yes; self } /// Process ignore files case insensitively /// /// This is disabled by default. pub fn ignore_case_insensitive( &mut self, yes: bool, ) -> &mut IgnoreBuilder { self.opts.ignore_case_insensitive = yes; self } } /// Creates a new gitignore matcher for the directory given. /// /// The matcher is meant to match files below `dir`. /// Ignore globs are extracted from each of the file names relative to /// `dir_for_ignorefile` in the order given (earlier names have lower /// precedence than later names). /// /// I/O errors are ignored. pub fn create_gitignore>( dir: &Path, dir_for_ignorefile: &Path, names: &[T], case_insensitive: bool, ) -> (Gitignore, Option) { let mut builder = GitignoreBuilder::new(dir); let mut errs = PartialErrorBuilder::default(); builder.case_insensitive(case_insensitive).unwrap(); for name in names { let gipath = dir_for_ignorefile.join(name.as_ref()); // This check is not necessary, but is added for performance. Namely, // a simple stat call checking for existence can often be just a bit // quicker than actually trying to open a file. Since the number of // directories without ignore files likely greatly exceeds the number // with ignore files, this check generally makes sense. // // However, until demonstrated otherwise, we speculatively do not do // this on Windows since Windows is notorious for having slow file // system operations. Namely, it's not clear whether this analysis // makes sense on Windows. // // For more details: https://github.com/BurntSushi/ripgrep/pull/1381 if cfg!(windows) || gipath.exists() { errs.maybe_push_ignore_io(builder.add(gipath)); } } let gi = match builder.build() { Ok(gi) => gi, Err(err) => { errs.push(err); GitignoreBuilder::new(dir).build().unwrap() } }; (gi, errs.into_error_option()) } /// Find the GIT_COMMON_DIR for the given git worktree. /// /// This is the directory that may contain a private ignore file /// "info/exclude". Unlike git, this function does *not* read environment /// variables GIT_DIR and GIT_COMMON_DIR, because it is not clear how to use /// them when multiple repositories are searched. /// /// Some I/O errors are ignored. fn resolve_git_commondir( dir: &Path, git_type: Option, ) -> Result> { let git_dir_path = || dir.join(".git"); let git_dir = git_dir_path(); if !git_type.map_or(false, |ft| ft.is_file()) { return Ok(git_dir); } let file = match File::open(git_dir) { Ok(file) => io::BufReader::new(file), Err(err) => { return Err(Some(Error::Io(err).with_path(git_dir_path()))); } }; let dot_git_line = match file.lines().next() { Some(Ok(line)) => line, Some(Err(err)) => { return Err(Some(Error::Io(err).with_path(git_dir_path()))); } None => return Err(None), }; if !dot_git_line.starts_with("gitdir: ") { return Err(None); } let real_git_dir = PathBuf::from(&dot_git_line["gitdir: ".len()..]); let git_commondir_file = || real_git_dir.join("commondir"); let file = match File::open(git_commondir_file()) { Ok(file) => io::BufReader::new(file), Err(_) => return Err(None), }; let commondir_line = match file.lines().next() { Some(Ok(line)) => line, Some(Err(err)) => { return Err(Some(Error::Io(err).with_path(git_commondir_file()))); } None => return Err(None), }; let commondir_abs = if commondir_line.starts_with(".") { real_git_dir.join(commondir_line) // relative commondir } else { PathBuf::from(commondir_line) }; Ok(commondir_abs) } #[cfg(test)] mod tests { use std::fs::{self, File}; use std::io::Write; use std::path::Path; use crate::dir::IgnoreBuilder; use crate::gitignore::Gitignore; use crate::tests::TempDir; use crate::Error; fn wfile>(path: P, contents: &str) { let mut file = File::create(path).unwrap(); file.write_all(contents.as_bytes()).unwrap(); } fn mkdirp>(path: P) { fs::create_dir_all(path).unwrap(); } fn partial(err: Error) -> Vec { match err { Error::Partial(errs) => errs, _ => panic!("expected partial error but got {:?}", err), } } fn tmpdir() -> TempDir { TempDir::new().unwrap() } #[test] fn explicit_ignore() { let td = tmpdir(); wfile(td.path().join("not-an-ignore"), "foo\n!bar"); let (gi, err) = Gitignore::new(td.path().join("not-an-ignore")); assert!(err.is_none()); let (ig, err) = IgnoreBuilder::new().add_ignore(gi).build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } #[test] fn git_exclude() { let td = tmpdir(); mkdirp(td.path().join(".git/info")); wfile(td.path().join(".git/info/exclude"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } #[test] fn gitignore() { let td = tmpdir(); mkdirp(td.path().join(".git")); wfile(td.path().join(".gitignore"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } #[test] fn gitignore_no_git() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_none()); assert!(ig.matched("bar", false).is_none()); assert!(ig.matched("baz", false).is_none()); } #[test] fn gitignore_allowed_no_git() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new() .require_git(false) .build() .add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } #[test] fn ignore() { let td = tmpdir(); wfile(td.path().join(".ignore"), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } #[test] fn custom_ignore() { let td = tmpdir(); let custom_ignore = ".customignore"; wfile(td.path().join(custom_ignore), "foo\n!bar"); let (ig, err) = IgnoreBuilder::new() .add_custom_ignore_filename(custom_ignore) .build() .add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_ignore()); assert!(ig.matched("bar", false).is_whitelist()); assert!(ig.matched("baz", false).is_none()); } // Tests that a custom ignore file will override an .ignore. #[test] fn custom_ignore_over_ignore() { let td = tmpdir(); let custom_ignore = ".customignore"; wfile(td.path().join(".ignore"), "foo"); wfile(td.path().join(custom_ignore), "!foo"); let (ig, err) = IgnoreBuilder::new() .add_custom_ignore_filename(custom_ignore) .build() .add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_whitelist()); } // Tests that earlier custom ignore files have lower precedence than later. #[test] fn custom_ignore_precedence() { let td = tmpdir(); let custom_ignore1 = ".customignore1"; let custom_ignore2 = ".customignore2"; wfile(td.path().join(custom_ignore1), "foo"); wfile(td.path().join(custom_ignore2), "!foo"); let (ig, err) = IgnoreBuilder::new() .add_custom_ignore_filename(custom_ignore1) .add_custom_ignore_filename(custom_ignore2) .build() .add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_whitelist()); } // Tests that an .ignore will override a .gitignore. #[test] fn ignore_over_gitignore() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join(".ignore"), "!foo"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("foo", false).is_whitelist()); } // Tests that exclude has lower precedent than both .ignore and .gitignore. #[test] fn exclude_lowest() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "!foo"); wfile(td.path().join(".ignore"), "!bar"); mkdirp(td.path().join(".git/info")); wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); assert!(ig.matched("baz", false).is_ignore()); assert!(ig.matched("foo", false).is_whitelist()); assert!(ig.matched("bar", false).is_whitelist()); } #[test] fn errored() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "{foo"); let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_some()); } #[test] fn errored_both() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "{foo"); wfile(td.path().join(".ignore"), "{bar"); let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); assert_eq!(2, partial(err.expect("an error")).len()); } #[test] fn errored_partial() { let td = tmpdir(); mkdirp(td.path().join(".git")); wfile(td.path().join(".gitignore"), "{foo\nbar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_some()); assert!(ig.matched("bar", false).is_ignore()); } #[test] fn errored_partial_and_ignore() { let td = tmpdir(); wfile(td.path().join(".gitignore"), "{foo\nbar"); wfile(td.path().join(".ignore"), "!bar"); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_some()); assert!(ig.matched("bar", false).is_whitelist()); } #[test] fn not_present_empty() { let td = tmpdir(); let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); assert!(err.is_none()); } #[test] fn stops_at_git_dir() { // This tests that .gitignore files beyond a .git barrier aren't // matched, but .ignore files are. let td = tmpdir(); mkdirp(td.path().join(".git")); mkdirp(td.path().join("foo/.git")); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join(".ignore"), "bar"); let ig0 = IgnoreBuilder::new().build(); let (ig1, err) = ig0.add_child(td.path()); assert!(err.is_none()); let (ig2, err) = ig1.add_child(ig1.path().join("foo")); assert!(err.is_none()); assert!(ig1.matched("foo", false).is_ignore()); assert!(ig2.matched("foo", false).is_none()); assert!(ig1.matched("bar", false).is_ignore()); assert!(ig2.matched("bar", false).is_ignore()); } #[test] fn absolute_parent() { let td = tmpdir(); mkdirp(td.path().join(".git")); mkdirp(td.path().join("foo")); wfile(td.path().join(".gitignore"), "bar"); // First, check that the parent gitignore file isn't detected if the // parent isn't added. This establishes a baseline. let ig0 = IgnoreBuilder::new().build(); let (ig1, err) = ig0.add_child(td.path().join("foo")); assert!(err.is_none()); assert!(ig1.matched("bar", false).is_none()); // Second, check that adding a parent directory actually works. let ig0 = IgnoreBuilder::new().build(); let (ig1, err) = ig0.add_parents(td.path().join("foo")); assert!(err.is_none()); let (ig2, err) = ig1.add_child(td.path().join("foo")); assert!(err.is_none()); assert!(ig2.matched("bar", false).is_ignore()); } #[test] fn absolute_parent_anchored() { let td = tmpdir(); mkdirp(td.path().join(".git")); mkdirp(td.path().join("src/llvm")); wfile(td.path().join(".gitignore"), "/llvm/\nfoo"); let ig0 = IgnoreBuilder::new().build(); let (ig1, err) = ig0.add_parents(td.path().join("src")); assert!(err.is_none()); let (ig2, err) = ig1.add_child("src"); assert!(err.is_none()); assert!(ig1.matched("llvm", true).is_none()); assert!(ig2.matched("llvm", true).is_none()); assert!(ig2.matched("src/llvm", true).is_none()); assert!(ig2.matched("foo", false).is_ignore()); assert!(ig2.matched("src/foo", false).is_ignore()); } #[test] fn git_info_exclude_in_linked_worktree() { let td = tmpdir(); let git_dir = td.path().join(".git"); mkdirp(git_dir.join("info")); wfile(git_dir.join("info/exclude"), "ignore_me"); mkdirp(git_dir.join("worktrees/linked-worktree")); let commondir_path = || git_dir.join("worktrees/linked-worktree/commondir"); mkdirp(td.path().join("linked-worktree")); let worktree_git_dir_abs = format!( "gitdir: {}", git_dir.join("worktrees/linked-worktree").to_str().unwrap(), ); wfile(td.path().join("linked-worktree/.git"), &worktree_git_dir_abs); // relative commondir wfile(commondir_path(), "../.."); let ib = IgnoreBuilder::new().build(); let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); assert!(err.is_none()); assert!(ignore.matched("ignore_me", false).is_ignore()); // absolute commondir wfile(commondir_path(), git_dir.to_str().unwrap()); let (ignore, err) = ib.add_child(td.path().join("linked-worktree")); assert!(err.is_none()); assert!(ignore.matched("ignore_me", false).is_ignore()); // missing commondir file assert!(fs::remove_file(commondir_path()).is_ok()); let (_, err) = ib.add_child(td.path().join("linked-worktree")); // We squash the error in this case, because it occurs in repositories // that are not linked worktrees but have submodules. assert!(err.is_none()); wfile(td.path().join("linked-worktree/.git"), "garbage"); let (_, err) = ib.add_child(td.path().join("linked-worktree")); assert!(err.is_none()); wfile(td.path().join("linked-worktree/.git"), "gitdir: garbage"); let (_, err) = ib.add_child(td.path().join("linked-worktree")); assert!(err.is_none()); } } ignore-0.4.18/src/gitignore.rs000064400000000000000000000674500072674642500143710ustar 00000000000000/*! The gitignore module provides a way to match globs from a gitignore file against file paths. Note that this module implements the specification as described in the `gitignore` man page from scratch. That is, this module does *not* shell out to the `git` command line tool. */ use std::cell::RefCell; use std::env; use std::fs::File; use std::io::{self, BufRead, Read}; use std::path::{Path, PathBuf}; use std::str; use std::sync::Arc; use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; use regex::bytes::Regex; use thread_local::ThreadLocal; use crate::pathutil::{is_file_name, strip_prefix}; use crate::{Error, Match, PartialErrorBuilder}; /// Glob represents a single glob in a gitignore file. /// /// This is used to report information about the highest precedent glob that /// matched in one or more gitignore files. #[derive(Clone, Debug)] pub struct Glob { /// The file path that this glob was extracted from. from: Option, /// The original glob string. original: String, /// The actual glob string used to convert to a regex. actual: String, /// Whether this is a whitelisted glob or not. is_whitelist: bool, /// Whether this glob should only match directories or not. is_only_dir: bool, } impl Glob { /// Returns the file path that defined this glob. pub fn from(&self) -> Option<&Path> { self.from.as_ref().map(|p| &**p) } /// The original glob as it was defined in a gitignore file. pub fn original(&self) -> &str { &self.original } /// The actual glob that was compiled to respect gitignore /// semantics. pub fn actual(&self) -> &str { &self.actual } /// Whether this was a whitelisted glob or not. pub fn is_whitelist(&self) -> bool { self.is_whitelist } /// Whether this glob must match a directory or not. pub fn is_only_dir(&self) -> bool { self.is_only_dir } /// Returns true if and only if this glob has a `**/` prefix. fn has_doublestar_prefix(&self) -> bool { self.actual.starts_with("**/") || self.actual == "**" } } /// Gitignore is a matcher for the globs in one or more gitignore files /// in the same directory. #[derive(Clone, Debug)] pub struct Gitignore { set: GlobSet, root: PathBuf, globs: Vec, num_ignores: u64, num_whitelists: u64, matches: Option>>>>, } impl Gitignore { /// Creates a new gitignore matcher from the gitignore file path given. /// /// If it's desirable to include multiple gitignore files in a single /// matcher, or read gitignore globs from a different source, then /// use `GitignoreBuilder`. /// /// This always returns a valid matcher, even if it's empty. In particular, /// a Gitignore file can be partially valid, e.g., when one glob is invalid /// but the rest aren't. /// /// Note that I/O errors are ignored. For more granular control over /// errors, use `GitignoreBuilder`. pub fn new>( gitignore_path: P, ) -> (Gitignore, Option) { let path = gitignore_path.as_ref(); let parent = path.parent().unwrap_or(Path::new("/")); let mut builder = GitignoreBuilder::new(parent); let mut errs = PartialErrorBuilder::default(); errs.maybe_push_ignore_io(builder.add(path)); match builder.build() { Ok(gi) => (gi, errs.into_error_option()), Err(err) => { errs.push(err); (Gitignore::empty(), errs.into_error_option()) } } } /// Creates a new gitignore matcher from the global ignore file, if one /// exists. /// /// The global config file path is specified by git's `core.excludesFile` /// config option. /// /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` /// does not exist or does not specify `core.excludesFile`, then /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not /// set or is empty, then `$HOME/.config/git/ignore` is used instead. pub fn global() -> (Gitignore, Option) { GitignoreBuilder::new("").build_global() } /// Creates a new empty gitignore matcher that never matches anything. /// /// Its path is empty. pub fn empty() -> Gitignore { Gitignore { set: GlobSet::empty(), root: PathBuf::from(""), globs: vec![], num_ignores: 0, num_whitelists: 0, matches: None, } } /// Returns the directory containing this gitignore matcher. /// /// All matches are done relative to this path. pub fn path(&self) -> &Path { &*self.root } /// Returns true if and only if this gitignore has zero globs, and /// therefore never matches any file path. pub fn is_empty(&self) -> bool { self.set.is_empty() } /// Returns the total number of globs, which should be equivalent to /// `num_ignores + num_whitelists`. pub fn len(&self) -> usize { self.set.len() } /// Returns the total number of ignore globs. pub fn num_ignores(&self) -> u64 { self.num_ignores } /// Returns the total number of whitelisted globs. pub fn num_whitelists(&self) -> u64 { self.num_whitelists } /// Returns whether the given path (file or directory) matched a pattern in /// this gitignore matcher. /// /// `is_dir` should be true if the path refers to a directory and false /// otherwise. /// /// The given path is matched relative to the path given when building /// the matcher. Specifically, before matching `path`, its prefix (as /// determined by a common suffix of the directory containing this /// gitignore) is stripped. If there is no common suffix/prefix overlap, /// then `path` is assumed to be relative to this matcher. pub fn matched>( &self, path: P, is_dir: bool, ) -> Match<&Glob> { if self.is_empty() { return Match::None; } self.matched_stripped(self.strip(path.as_ref()), is_dir) } /// Returns whether the given path (file or directory, and expected to be /// under the root) or any of its parent directories (up to the root) /// matched a pattern in this gitignore matcher. /// /// NOTE: This method is more expensive than walking the directory hierarchy /// top-to-bottom and matching the entries. But, is easier to use in cases /// when a list of paths are available without a hierarchy. /// /// `is_dir` should be true if the path refers to a directory and false /// otherwise. /// /// The given path is matched relative to the path given when building /// the matcher. Specifically, before matching `path`, its prefix (as /// determined by a common suffix of the directory containing this /// gitignore) is stripped. If there is no common suffix/prefix overlap, /// then `path` is assumed to be relative to this matcher. /// /// # Panics /// /// This method panics if the given file path is not under the root path /// of this matcher. pub fn matched_path_or_any_parents>( &self, path: P, is_dir: bool, ) -> Match<&Glob> { if self.is_empty() { return Match::None; } let mut path = self.strip(path.as_ref()); assert!(!path.has_root(), "path is expected to be under the root"); match self.matched_stripped(path, is_dir) { Match::None => (), // walk up a_match => return a_match, } while let Some(parent) = path.parent() { match self.matched_stripped(parent, /* is_dir */ true) { Match::None => path = parent, // walk up a_match => return a_match, } } Match::None } /// Like matched, but takes a path that has already been stripped. fn matched_stripped>( &self, path: P, is_dir: bool, ) -> Match<&Glob> { if self.is_empty() { return Match::None; } let path = path.as_ref(); let _matches = self.matches.as_ref().unwrap().get_or_default(); let mut matches = _matches.borrow_mut(); let candidate = Candidate::new(path); self.set.matches_candidate_into(&candidate, &mut *matches); for &i in matches.iter().rev() { let glob = &self.globs[i]; if !glob.is_only_dir() || is_dir { return if glob.is_whitelist() { Match::Whitelist(glob) } else { Match::Ignore(glob) }; } } Match::None } /// Strips the given path such that it's suitable for matching with this /// gitignore matcher. fn strip<'a, P: 'a + AsRef + ?Sized>( &'a self, path: &'a P, ) -> &'a Path { let mut path = path.as_ref(); // A leading ./ is completely superfluous. We also strip it from // our gitignore root path, so we need to strip it from our candidate // path too. if let Some(p) = strip_prefix("./", path) { path = p; } // Strip any common prefix between the candidate path and the root // of the gitignore, to make sure we get relative matching right. // BUT, a file name might not have any directory components to it, // in which case, we don't want to accidentally strip any part of the // file name. // // As an additional special case, if the root is just `.`, then we // shouldn't try to strip anything, e.g., when path begins with a `.`. if self.root != Path::new(".") && !is_file_name(path) { if let Some(p) = strip_prefix(&self.root, path) { path = p; // If we're left with a leading slash, get rid of it. if let Some(p) = strip_prefix("/", path) { path = p; } } } path } } /// Builds a matcher for a single set of globs from a .gitignore file. #[derive(Clone, Debug)] pub struct GitignoreBuilder { builder: GlobSetBuilder, root: PathBuf, globs: Vec, case_insensitive: bool, } impl GitignoreBuilder { /// Create a new builder for a gitignore file. /// /// The path given should be the path at which the globs for this gitignore /// file should be matched. Note that paths are always matched relative /// to the root path given here. Generally, the root path should correspond /// to the *directory* containing a `.gitignore` file. pub fn new>(root: P) -> GitignoreBuilder { let root = root.as_ref(); GitignoreBuilder { builder: GlobSetBuilder::new(), root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), globs: vec![], case_insensitive: false, } } /// Builds a new matcher from the globs added so far. /// /// Once a matcher is built, no new globs can be added to it. pub fn build(&self) -> Result { let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); let set = self .builder .build() .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; Ok(Gitignore { set: set, root: self.root.clone(), globs: self.globs.clone(), num_ignores: nignore as u64, num_whitelists: nwhite as u64, matches: Some(Arc::new(ThreadLocal::default())), }) } /// Build a global gitignore matcher using the configuration in this /// builder. /// /// This consumes ownership of the builder unlike `build` because it /// must mutate the builder to add the global gitignore globs. /// /// Note that this ignores the path given to this builder's constructor /// and instead derives the path automatically from git's global /// configuration. pub fn build_global(mut self) -> (Gitignore, Option) { match gitconfig_excludes_path() { None => (Gitignore::empty(), None), Some(path) => { if !path.is_file() { (Gitignore::empty(), None) } else { let mut errs = PartialErrorBuilder::default(); errs.maybe_push_ignore_io(self.add(path)); match self.build() { Ok(gi) => (gi, errs.into_error_option()), Err(err) => { errs.push(err); (Gitignore::empty(), errs.into_error_option()) } } } } } } /// Add each glob from the file path given. /// /// The file given should be formatted as a `gitignore` file. /// /// Note that partial errors can be returned. For example, if there was /// a problem adding one glob, an error for that will be returned, but /// all other valid globs will still be added. pub fn add>(&mut self, path: P) -> Option { let path = path.as_ref(); let file = match File::open(path) { Err(err) => return Some(Error::Io(err).with_path(path)), Ok(file) => file, }; let rdr = io::BufReader::new(file); let mut errs = PartialErrorBuilder::default(); for (i, line) in rdr.lines().enumerate() { let lineno = (i + 1) as u64; let line = match line { Ok(line) => line, Err(err) => { errs.push(Error::Io(err).tagged(path, lineno)); break; } }; if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { errs.push(err.tagged(path, lineno)); } } errs.into_error_option() } /// Add each glob line from the string given. /// /// If this string came from a particular `gitignore` file, then its path /// should be provided here. /// /// The string given should be formatted as a `gitignore` file. #[cfg(test)] fn add_str( &mut self, from: Option, gitignore: &str, ) -> Result<&mut GitignoreBuilder, Error> { for line in gitignore.lines() { self.add_line(from.clone(), line)?; } Ok(self) } /// Add a line from a gitignore file to this builder. /// /// If this line came from a particular `gitignore` file, then its path /// should be provided here. /// /// If the line could not be parsed as a glob, then an error is returned. pub fn add_line( &mut self, from: Option, mut line: &str, ) -> Result<&mut GitignoreBuilder, Error> { #![allow(deprecated)] if line.starts_with("#") { return Ok(self); } if !line.ends_with("\\ ") { line = line.trim_right(); } if line.is_empty() { return Ok(self); } let mut glob = Glob { from: from, original: line.to_string(), actual: String::new(), is_whitelist: false, is_only_dir: false, }; let mut is_absolute = false; if line.starts_with("\\!") || line.starts_with("\\#") { line = &line[1..]; is_absolute = line.chars().nth(0) == Some('/'); } else { if line.starts_with("!") { glob.is_whitelist = true; line = &line[1..]; } if line.starts_with("/") { // `man gitignore` says that if a glob starts with a slash, // then the glob can only match the beginning of a path // (relative to the location of gitignore). We achieve this by // simply banning wildcards from matching /. line = &line[1..]; is_absolute = true; } } // If it ends with a slash, then this should only match directories, // but the slash should otherwise not be used while globbing. if let Some((i, c)) = line.char_indices().rev().nth(0) { if c == '/' { glob.is_only_dir = true; line = &line[..i]; } } glob.actual = line.to_string(); // If there is a literal slash, then this is a glob that must match the // entire path name. Otherwise, we should let it match anywhere, so use // a **/ prefix. if !is_absolute && !line.chars().any(|c| c == '/') { // ... but only if we don't already have a **/ prefix. if !glob.has_doublestar_prefix() { glob.actual = format!("**/{}", glob.actual); } } // If the glob ends with `/**`, then we should only match everything // inside a directory, but not the directory itself. Standard globs // will match the directory. So we add `/*` to force the issue. if glob.actual.ends_with("/**") { glob.actual = format!("{}/*", glob.actual); } let parsed = GlobBuilder::new(&glob.actual) .literal_separator(true) .case_insensitive(self.case_insensitive) .backslash_escape(true) .build() .map_err(|err| Error::Glob { glob: Some(glob.original.clone()), err: err.kind().to_string(), })?; self.builder.add(parsed); self.globs.push(glob); Ok(self) } /// Toggle whether the globs should be matched case insensitively or not. /// /// When this option is changed, only globs added after the change will be /// affected. /// /// This is disabled by default. pub fn case_insensitive( &mut self, yes: bool, ) -> Result<&mut GitignoreBuilder, Error> { // TODO: This should not return a `Result`. Fix this in the next semver // release. self.case_insensitive = yes; Ok(self) } } /// Return the file path of the current environment's global gitignore file. /// /// Note that the file path returned may not exist. fn gitconfig_excludes_path() -> Option { // git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably, // both can be active at the same time, where $HOME/.gitconfig takes // precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then // we're done. match gitconfig_home_contents().and_then(|x| parse_excludes_file(&x)) { Some(path) => return Some(path), None => {} } match gitconfig_xdg_contents().and_then(|x| parse_excludes_file(&x)) { Some(path) => return Some(path), None => {} } excludes_file_default() } /// Returns the file contents of git's global config file, if one exists, in /// the user's home directory. fn gitconfig_home_contents() -> Option> { let home = match home_dir() { None => return None, Some(home) => home, }; let mut file = match File::open(home.join(".gitconfig")) { Err(_) => return None, Ok(file) => io::BufReader::new(file), }; let mut contents = vec![]; file.read_to_end(&mut contents).ok().map(|_| contents) } /// Returns the file contents of git's global config file, if one exists, in /// the user's XDG_CONFIG_HOME directory. fn gitconfig_xdg_contents() -> Option> { let path = env::var_os("XDG_CONFIG_HOME") .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) .or_else(|| home_dir().map(|p| p.join(".config"))) .map(|x| x.join("git/config")); let mut file = match path.and_then(|p| File::open(p).ok()) { None => return None, Some(file) => io::BufReader::new(file), }; let mut contents = vec![]; file.read_to_end(&mut contents).ok().map(|_| contents) } /// Returns the default file path for a global .gitignore file. /// /// Specifically, this respects XDG_CONFIG_HOME. fn excludes_file_default() -> Option { env::var_os("XDG_CONFIG_HOME") .and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) }) .or_else(|| home_dir().map(|p| p.join(".config"))) .map(|x| x.join("git/ignore")) } /// Extract git's `core.excludesfile` config setting from the raw file contents /// given. fn parse_excludes_file(data: &[u8]) -> Option { // N.B. This is the lazy approach, and isn't technically correct, but // probably works in more circumstances. I guess we would ideally have // a full INI parser. Yuck. lazy_static::lazy_static! { static ref RE: Regex = Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); }; let caps = match RE.captures(data) { None => return None, Some(caps) => caps, }; str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s))) } /// Expands ~ in file paths to the value of $HOME. fn expand_tilde(path: &str) -> String { let home = match home_dir() { None => return path.to_string(), Some(home) => home.to_string_lossy().into_owned(), }; path.replace("~", &home) } /// Returns the location of the user's home directory. fn home_dir() -> Option { // We're fine with using env::home_dir for now. Its bugs are, IMO, pretty // minor corner cases. We should still probably eventually migrate to // the `dirs` crate to get a proper implementation. #![allow(deprecated)] env::home_dir() } #[cfg(test)] mod tests { use super::{Gitignore, GitignoreBuilder}; use std::path::Path; fn gi_from_str>(root: P, s: &str) -> Gitignore { let mut builder = GitignoreBuilder::new(root); builder.add_str(None, s).unwrap(); builder.build().unwrap() } macro_rules! ignored { ($name:ident, $root:expr, $gi:expr, $path:expr) => { ignored!($name, $root, $gi, $path, false); }; ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { #[test] fn $name() { let gi = gi_from_str($root, $gi); assert!(gi.matched($path, $is_dir).is_ignore()); } }; } macro_rules! not_ignored { ($name:ident, $root:expr, $gi:expr, $path:expr) => { not_ignored!($name, $root, $gi, $path, false); }; ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { #[test] fn $name() { let gi = gi_from_str($root, $gi); assert!(!gi.matched($path, $is_dir).is_ignore()); } }; } const ROOT: &'static str = "/home/foobar/rust/rg"; ignored!(ig1, ROOT, "months", "months"); ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); ignored!(ig3, ROOT, "*.rs", "src/main.rs"); ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); ignored!(ig5, ROOT, "/*.c", "cat-file.c"); ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); ignored!(ig8, ROOT, "foo/", "foo", true); ignored!(ig9, ROOT, "**/foo", "foo"); ignored!(ig10, ROOT, "**/foo", "src/foo"); ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); ignored!(ig15, ROOT, "abc/**", "abc/x"); ignored!(ig16, ROOT, "abc/**", "abc/x/y"); ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); ignored!(ig18, ROOT, "a/**/b", "a/b"); ignored!(ig19, ROOT, "a/**/b", "a/x/b"); ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); ignored!(ig21, ROOT, r"\!xy", "!xy"); ignored!(ig22, ROOT, r"\#foo", "#foo"); ignored!(ig23, ROOT, "foo", "./foo"); ignored!(ig24, ROOT, "target", "grep/target"); ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); ignored!(ig27, ROOT, "foo/", "xyz/foo", true); ignored!(ig28, "./src", "/llvm/", "./src/llvm", true); ignored!(ig29, ROOT, "node_modules/ ", "node_modules", true); ignored!(ig30, ROOT, "**/", "foo/bar", true); ignored!(ig31, ROOT, "path1/*", "path1/foo"); ignored!(ig32, ROOT, ".a/b", ".a/b"); ignored!(ig33, "./", ".a/b", ".a/b"); ignored!(ig34, ".", ".a/b", ".a/b"); ignored!(ig35, "./.", ".a/b", ".a/b"); ignored!(ig36, "././", ".a/b", ".a/b"); ignored!(ig37, "././.", ".a/b", ".a/b"); ignored!(ig38, ROOT, "\\[", "["); ignored!(ig39, ROOT, "\\?", "?"); ignored!(ig40, ROOT, "\\*", "*"); ignored!(ig41, ROOT, "\\a", "a"); ignored!(ig42, ROOT, "s*.rs", "sfoo.rs"); ignored!(ig43, ROOT, "**", "foo.rs"); ignored!(ig44, ROOT, "**/**/*", "a/foo.rs"); not_ignored!(ignot1, ROOT, "amonths", "months"); not_ignored!(ignot2, ROOT, "monthsa", "months"); not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); not_ignored!(ignot7, ROOT, "foo/", "foo", false); not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); not_ignored!(ignot11, ROOT, "#foo", "#foo"); not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); not_ignored!(ignot13, ROOT, "foo/**", "foo", true); not_ignored!( ignot14, "./third_party/protobuf", "m4/ltoptions.m4", "./third_party/protobuf/csharp/src/packages/repositories.config" ); not_ignored!(ignot15, ROOT, "!/bar", "foo/bar"); not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true); not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs"); not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo"); not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs"); fn bytes(s: &str) -> Vec { s.to_string().into_bytes() } fn path_string>(path: P) -> String { path.as_ref().to_str().unwrap().to_string() } #[test] fn parse_excludes_file1() { let data = bytes("[core]\nexcludesFile = /foo/bar"); let got = super::parse_excludes_file(&data).unwrap(); assert_eq!(path_string(got), "/foo/bar"); } #[test] fn parse_excludes_file2() { let data = bytes("[core]\nexcludesFile = ~/foo/bar"); let got = super::parse_excludes_file(&data).unwrap(); assert_eq!(path_string(got), super::expand_tilde("~/foo/bar")); } #[test] fn parse_excludes_file3() { let data = bytes("[core]\nexcludeFile = /foo/bar"); assert!(super::parse_excludes_file(&data).is_none()); } // See: https://github.com/BurntSushi/ripgrep/issues/106 #[test] fn regression_106() { gi_from_str("/", " "); } #[test] fn case_insensitive() { let gi = GitignoreBuilder::new(ROOT) .case_insensitive(true) .unwrap() .add_str(None, "*.html") .unwrap() .build() .unwrap(); assert!(gi.matched("foo.html", false).is_ignore()); assert!(gi.matched("foo.HTML", false).is_ignore()); assert!(!gi.matched("foo.htm", false).is_ignore()); assert!(!gi.matched("foo.HTM", false).is_ignore()); } ignored!(cs1, ROOT, "*.html", "foo.html"); not_ignored!(cs2, ROOT, "*.html", "foo.HTML"); not_ignored!(cs3, ROOT, "*.html", "foo.htm"); not_ignored!(cs4, ROOT, "*.html", "foo.HTM"); } ignore-0.4.18/src/lib.rs000064400000000000000000000440620072674642500131420ustar 00000000000000/*! The ignore crate provides a fast recursive directory iterator that respects various filters such as globs, file types and `.gitignore` files. The precise matching rules and precedence is explained in the documentation for `WalkBuilder`. Secondarily, this crate exposes gitignore and file type matchers for use cases that demand more fine-grained control. # Example This example shows the most basic usage of this crate. This code will recursively traverse the current directory while automatically filtering out files and directories according to ignore globs found in files like `.ignore` and `.gitignore`: ```rust,no_run use ignore::Walk; for result in Walk::new("./") { // Each item yielded by the iterator is either a directory entry or an // error, so either print the path or the error. match result { Ok(entry) => println!("{}", entry.path().display()), Err(err) => println!("ERROR: {}", err), } } ``` # Example: advanced By default, the recursive directory iterator will ignore hidden files and directories. This can be disabled by building the iterator with `WalkBuilder`: ```rust,no_run use ignore::WalkBuilder; for result in WalkBuilder::new("./").hidden(false).build() { println!("{:?}", result); } ``` See the documentation for `WalkBuilder` for many other options. */ #![deny(missing_docs)] use std::error; use std::fmt; use std::io; use std::path::{Path, PathBuf}; pub use crate::walk::{ DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder, WalkParallel, WalkState, }; mod default_types; mod dir; pub mod gitignore; pub mod overrides; mod pathutil; pub mod types; mod walk; /// Represents an error that can occur when parsing a gitignore file. #[derive(Debug)] pub enum Error { /// A collection of "soft" errors. These occur when adding an ignore /// file partially succeeded. Partial(Vec), /// An error associated with a specific line number. WithLineNumber { /// The line number. line: u64, /// The underlying error. err: Box, }, /// An error associated with a particular file path. WithPath { /// The file path. path: PathBuf, /// The underlying error. err: Box, }, /// An error associated with a particular directory depth when recursively /// walking a directory. WithDepth { /// The directory depth. depth: usize, /// The underlying error. err: Box, }, /// An error that occurs when a file loop is detected when traversing /// symbolic links. Loop { /// The ancestor file path in the loop. ancestor: PathBuf, /// The child file path in the loop. child: PathBuf, }, /// An error that occurs when doing I/O, such as reading an ignore file. Io(io::Error), /// An error that occurs when trying to parse a glob. Glob { /// The original glob that caused this error. This glob, when /// available, always corresponds to the glob provided by an end user. /// e.g., It is the glob as written in a `.gitignore` file. /// /// (This glob may be distinct from the glob that is actually /// compiled, after accounting for `gitignore` semantics.) glob: Option, /// The underlying glob error as a string. err: String, }, /// A type selection for a file type that is not defined. UnrecognizedFileType(String), /// A user specified file type definition could not be parsed. InvalidDefinition, } impl Clone for Error { fn clone(&self) -> Error { match *self { Error::Partial(ref errs) => Error::Partial(errs.clone()), Error::WithLineNumber { line, ref err } => { Error::WithLineNumber { line: line, err: err.clone() } } Error::WithPath { ref path, ref err } => { Error::WithPath { path: path.clone(), err: err.clone() } } Error::WithDepth { depth, ref err } => { Error::WithDepth { depth: depth, err: err.clone() } } Error::Loop { ref ancestor, ref child } => Error::Loop { ancestor: ancestor.clone(), child: child.clone(), }, Error::Io(ref err) => match err.raw_os_error() { Some(e) => Error::Io(io::Error::from_raw_os_error(e)), None => Error::Io(io::Error::new(err.kind(), err.to_string())), }, Error::Glob { ref glob, ref err } => { Error::Glob { glob: glob.clone(), err: err.clone() } } Error::UnrecognizedFileType(ref err) => { Error::UnrecognizedFileType(err.clone()) } Error::InvalidDefinition => Error::InvalidDefinition, } } } impl Error { /// Returns true if this is a partial error. /// /// A partial error occurs when only some operations failed while others /// may have succeeded. For example, an ignore file may contain an invalid /// glob among otherwise valid globs. pub fn is_partial(&self) -> bool { match *self { Error::Partial(_) => true, Error::WithLineNumber { ref err, .. } => err.is_partial(), Error::WithPath { ref err, .. } => err.is_partial(), Error::WithDepth { ref err, .. } => err.is_partial(), _ => false, } } /// Returns true if this error is exclusively an I/O error. pub fn is_io(&self) -> bool { match *self { Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), Error::WithLineNumber { ref err, .. } => err.is_io(), Error::WithPath { ref err, .. } => err.is_io(), Error::WithDepth { ref err, .. } => err.is_io(), Error::Loop { .. } => false, Error::Io(_) => true, Error::Glob { .. } => false, Error::UnrecognizedFileType(_) => false, Error::InvalidDefinition => false, } } /// Inspect the original [`io::Error`] if there is one. /// /// [`None`] is returned if the [`Error`] doesn't correspond to an /// [`io::Error`]. This might happen, for example, when the error was /// produced because a cycle was found in the directory tree while /// following symbolic links. /// /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To /// obtain an owned value, the [`into_io_error`] can be used instead. /// /// > This is the original [`io::Error`] and is _not_ the same as /// > [`impl From for std::io::Error`][impl] which contains additional context about the /// error. /// /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html /// [`Error`]: struct.Error.html /// [`into_io_error`]: struct.Error.html#method.into_io_error /// [impl]: struct.Error.html#impl-From%3CError%3E pub fn io_error(&self) -> Option<&std::io::Error> { match *self { Error::Partial(ref errs) => { if errs.len() == 1 { errs[0].io_error() } else { None } } Error::WithLineNumber { ref err, .. } => err.io_error(), Error::WithPath { ref err, .. } => err.io_error(), Error::WithDepth { ref err, .. } => err.io_error(), Error::Loop { .. } => None, Error::Io(ref err) => Some(err), Error::Glob { .. } => None, Error::UnrecognizedFileType(_) => None, Error::InvalidDefinition => None, } } /// Similar to [`io_error`] except consumes self to convert to the original /// [`io::Error`] if one exists. /// /// [`io_error`]: struct.Error.html#method.io_error /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html pub fn into_io_error(self) -> Option { match self { Error::Partial(mut errs) => { if errs.len() == 1 { errs.remove(0).into_io_error() } else { None } } Error::WithLineNumber { err, .. } => err.into_io_error(), Error::WithPath { err, .. } => err.into_io_error(), Error::WithDepth { err, .. } => err.into_io_error(), Error::Loop { .. } => None, Error::Io(err) => Some(err), Error::Glob { .. } => None, Error::UnrecognizedFileType(_) => None, Error::InvalidDefinition => None, } } /// Returns a depth associated with recursively walking a directory (if /// this error was generated from a recursive directory iterator). pub fn depth(&self) -> Option { match *self { Error::WithPath { ref err, .. } => err.depth(), Error::WithDepth { depth, .. } => Some(depth), _ => None, } } /// Turn an error into a tagged error with the given file path. fn with_path>(self, path: P) -> Error { Error::WithPath { path: path.as_ref().to_path_buf(), err: Box::new(self), } } /// Turn an error into a tagged error with the given depth. fn with_depth(self, depth: usize) -> Error { Error::WithDepth { depth: depth, err: Box::new(self) } } /// Turn an error into a tagged error with the given file path and line /// number. If path is empty, then it is omitted from the error. fn tagged>(self, path: P, lineno: u64) -> Error { let errline = Error::WithLineNumber { line: lineno, err: Box::new(self) }; if path.as_ref().as_os_str().is_empty() { return errline; } errline.with_path(path) } /// Build an error from a walkdir error. fn from_walkdir(err: walkdir::Error) -> Error { let depth = err.depth(); if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { return Error::WithDepth { depth: depth, err: Box::new(Error::Loop { ancestor: anc.to_path_buf(), child: child.to_path_buf(), }), }; } let path = err.path().map(|p| p.to_path_buf()); let mut ig_err = Error::Io(io::Error::from(err)); if let Some(path) = path { ig_err = Error::WithPath { path: path, err: Box::new(ig_err) }; } ig_err } } impl error::Error for Error { #[allow(deprecated)] fn description(&self) -> &str { match *self { Error::Partial(_) => "partial error", Error::WithLineNumber { ref err, .. } => err.description(), Error::WithPath { ref err, .. } => err.description(), Error::WithDepth { ref err, .. } => err.description(), Error::Loop { .. } => "file system loop found", Error::Io(ref err) => err.description(), Error::Glob { ref err, .. } => err, Error::UnrecognizedFileType(_) => "unrecognized file type", Error::InvalidDefinition => "invalid definition", } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Error::Partial(ref errs) => { let msgs: Vec = errs.iter().map(|err| err.to_string()).collect(); write!(f, "{}", msgs.join("\n")) } Error::WithLineNumber { line, ref err } => { write!(f, "line {}: {}", line, err) } Error::WithPath { ref path, ref err } => { write!(f, "{}: {}", path.display(), err) } Error::WithDepth { ref err, .. } => err.fmt(f), Error::Loop { ref ancestor, ref child } => write!( f, "File system loop found: \ {} points to an ancestor {}", child.display(), ancestor.display() ), Error::Io(ref err) => err.fmt(f), Error::Glob { glob: None, ref err } => write!(f, "{}", err), Error::Glob { glob: Some(ref glob), ref err } => { write!(f, "error parsing glob '{}': {}", glob, err) } Error::UnrecognizedFileType(ref ty) => { write!(f, "unrecognized file type: {}", ty) } Error::InvalidDefinition => write!( f, "invalid definition (format is type:glob, e.g., \ html:*.html)" ), } } } impl From for Error { fn from(err: io::Error) -> Error { Error::Io(err) } } #[derive(Debug, Default)] struct PartialErrorBuilder(Vec); impl PartialErrorBuilder { fn push(&mut self, err: Error) { self.0.push(err); } fn push_ignore_io(&mut self, err: Error) { if !err.is_io() { self.push(err); } } fn maybe_push(&mut self, err: Option) { if let Some(err) = err { self.push(err); } } fn maybe_push_ignore_io(&mut self, err: Option) { if let Some(err) = err { self.push_ignore_io(err); } } fn into_error_option(mut self) -> Option { if self.0.is_empty() { None } else if self.0.len() == 1 { Some(self.0.pop().unwrap()) } else { Some(Error::Partial(self.0)) } } } /// The result of a glob match. /// /// The type parameter `T` typically refers to a type that provides more /// information about a particular match. For example, it might identify /// the specific gitignore file and the specific glob pattern that caused /// the match. #[derive(Clone, Debug)] pub enum Match { /// The path didn't match any glob. None, /// The highest precedent glob matched indicates the path should be /// ignored. Ignore(T), /// The highest precedent glob matched indicates the path should be /// whitelisted. Whitelist(T), } impl Match { /// Returns true if the match result didn't match any globs. pub fn is_none(&self) -> bool { match *self { Match::None => true, Match::Ignore(_) | Match::Whitelist(_) => false, } } /// Returns true if the match result implies the path should be ignored. pub fn is_ignore(&self) -> bool { match *self { Match::Ignore(_) => true, Match::None | Match::Whitelist(_) => false, } } /// Returns true if the match result implies the path should be /// whitelisted. pub fn is_whitelist(&self) -> bool { match *self { Match::Whitelist(_) => true, Match::None | Match::Ignore(_) => false, } } /// Inverts the match so that `Ignore` becomes `Whitelist` and /// `Whitelist` becomes `Ignore`. A non-match remains the same. pub fn invert(self) -> Match { match self { Match::None => Match::None, Match::Ignore(t) => Match::Whitelist(t), Match::Whitelist(t) => Match::Ignore(t), } } /// Return the value inside this match if it exists. pub fn inner(&self) -> Option<&T> { match *self { Match::None => None, Match::Ignore(ref t) => Some(t), Match::Whitelist(ref t) => Some(t), } } /// Apply the given function to the value inside this match. /// /// If the match has no value, then return the match unchanged. pub fn map U>(self, f: F) -> Match { match self { Match::None => Match::None, Match::Ignore(t) => Match::Ignore(f(t)), Match::Whitelist(t) => Match::Whitelist(f(t)), } } /// Return the match if it is not none. Otherwise, return other. pub fn or(self, other: Self) -> Self { if self.is_none() { other } else { self } } } #[cfg(test)] mod tests { use std::env; use std::error; use std::fs; use std::path::{Path, PathBuf}; use std::result; /// A convenient result type alias. pub type Result = result::Result>; macro_rules! err { ($($tt:tt)*) => { Box::::from(format!($($tt)*)) } } /// A simple wrapper for creating a temporary directory that is /// automatically deleted when it's dropped. /// /// We use this in lieu of tempfile because tempfile brings in too many /// dependencies. #[derive(Debug)] pub struct TempDir(PathBuf); impl Drop for TempDir { fn drop(&mut self) { fs::remove_dir_all(&self.0).unwrap(); } } impl TempDir { /// Create a new empty temporary directory under the system's configured /// temporary directory. pub fn new() -> Result { use std::sync::atomic::{AtomicUsize, Ordering}; static TRIES: usize = 100; static COUNTER: AtomicUsize = AtomicUsize::new(0); let tmpdir = env::temp_dir(); for _ in 0..TRIES { let count = COUNTER.fetch_add(1, Ordering::SeqCst); let path = tmpdir.join("rust-ignore").join(count.to_string()); if path.is_dir() { continue; } fs::create_dir_all(&path).map_err(|e| { err!("failed to create {}: {}", path.display(), e) })?; return Ok(TempDir(path)); } Err(err!("failed to create temp dir after {} tries", TRIES)) } /// Return the underlying path to this temporary directory. pub fn path(&self) -> &Path { &self.0 } } } ignore-0.4.18/src/overrides.rs000064400000000000000000000211200072674642500143640ustar 00000000000000/*! The overrides module provides a way to specify a set of override globs. This provides functionality similar to `--include` or `--exclude` in command line tools. */ use std::path::Path; use crate::gitignore::{self, Gitignore, GitignoreBuilder}; use crate::{Error, Match}; /// Glob represents a single glob in an override matcher. /// /// This is used to report information about the highest precedent glob /// that matched. /// /// Note that not all matches necessarily correspond to a specific glob. For /// example, if there are one or more whitelist globs and a file path doesn't /// match any glob in the set, then the file path is considered to be ignored. /// /// The lifetime `'a` refers to the lifetime of the matcher that produced /// this glob. #[derive(Clone, Debug)] pub struct Glob<'a>(GlobInner<'a>); #[derive(Clone, Debug)] enum GlobInner<'a> { /// No glob matched, but the file path should still be ignored. UnmatchedIgnore, /// A glob matched. Matched(&'a gitignore::Glob), } impl<'a> Glob<'a> { fn unmatched() -> Glob<'a> { Glob(GlobInner::UnmatchedIgnore) } } /// Manages a set of overrides provided explicitly by the end user. #[derive(Clone, Debug)] pub struct Override(Gitignore); impl Override { /// Returns an empty matcher that never matches any file path. pub fn empty() -> Override { Override(Gitignore::empty()) } /// Returns the directory of this override set. /// /// All matches are done relative to this path. pub fn path(&self) -> &Path { self.0.path() } /// Returns true if and only if this matcher is empty. /// /// When a matcher is empty, it will never match any file path. pub fn is_empty(&self) -> bool { self.0.is_empty() } /// Returns the total number of ignore globs. pub fn num_ignores(&self) -> u64 { self.0.num_whitelists() } /// Returns the total number of whitelisted globs. pub fn num_whitelists(&self) -> u64 { self.0.num_ignores() } /// Returns whether the given file path matched a pattern in this override /// matcher. /// /// `is_dir` should be true if the path refers to a directory and false /// otherwise. /// /// If there are no overrides, then this always returns `Match::None`. /// /// If there is at least one whitelist override and `is_dir` is false, then /// this never returns `Match::None`, since non-matches are interpreted as /// ignored. /// /// The given path is matched to the globs relative to the path given /// when building the override matcher. Specifically, before matching /// `path`, its prefix (as determined by a common suffix of the directory /// given) is stripped. If there is no common suffix/prefix overlap, then /// `path` is assumed to reside in the same directory as the root path for /// this set of overrides. pub fn matched<'a, P: AsRef>( &'a self, path: P, is_dir: bool, ) -> Match> { if self.is_empty() { return Match::None; } let mat = self.0.matched(path, is_dir).invert(); if mat.is_none() && self.num_whitelists() > 0 && !is_dir { return Match::Ignore(Glob::unmatched()); } mat.map(move |giglob| Glob(GlobInner::Matched(giglob))) } } /// Builds a matcher for a set of glob overrides. pub struct OverrideBuilder { builder: GitignoreBuilder, } impl OverrideBuilder { /// Create a new override builder. /// /// Matching is done relative to the directory path provided. pub fn new>(path: P) -> OverrideBuilder { OverrideBuilder { builder: GitignoreBuilder::new(path) } } /// Builds a new override matcher from the globs added so far. /// /// Once a matcher is built, no new globs can be added to it. pub fn build(&self) -> Result { Ok(Override(self.builder.build()?)) } /// Add a glob to the set of overrides. /// /// Globs provided here have precisely the same semantics as a single /// line in a `gitignore` file, where the meaning of `!` is inverted: /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, /// all matches of the glob provided are treated as whitelist matches. pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { self.builder.add_line(None, glob)?; Ok(self) } /// Toggle whether the globs should be matched case insensitively or not. /// /// When this option is changed, only globs added after the change will be affected. /// /// This is disabled by default. pub fn case_insensitive( &mut self, yes: bool, ) -> Result<&mut OverrideBuilder, Error> { // TODO: This should not return a `Result`. Fix this in the next semver // release. self.builder.case_insensitive(yes)?; Ok(self) } } #[cfg(test)] mod tests { use super::{Override, OverrideBuilder}; const ROOT: &'static str = "/home/andrew/foo"; fn ov(globs: &[&str]) -> Override { let mut builder = OverrideBuilder::new(ROOT); for glob in globs { builder.add(glob).unwrap(); } builder.build().unwrap() } #[test] fn empty() { let ov = ov(&[]); assert!(ov.matched("a.foo", false).is_none()); assert!(ov.matched("a", false).is_none()); assert!(ov.matched("", false).is_none()); } #[test] fn simple() { let ov = ov(&["*.foo", "!*.bar"]); assert!(ov.matched("a.foo", false).is_whitelist()); assert!(ov.matched("a.foo", true).is_whitelist()); assert!(ov.matched("a.rs", false).is_ignore()); assert!(ov.matched("a.rs", true).is_none()); assert!(ov.matched("a.bar", false).is_ignore()); assert!(ov.matched("a.bar", true).is_ignore()); } #[test] fn only_ignores() { let ov = ov(&["!*.bar"]); assert!(ov.matched("a.rs", false).is_none()); assert!(ov.matched("a.rs", true).is_none()); assert!(ov.matched("a.bar", false).is_ignore()); assert!(ov.matched("a.bar", true).is_ignore()); } #[test] fn precedence() { let ov = ov(&["*.foo", "!*.bar.foo"]); assert!(ov.matched("a.foo", false).is_whitelist()); assert!(ov.matched("a.baz", false).is_ignore()); assert!(ov.matched("a.bar.foo", false).is_ignore()); } #[test] fn gitignore() { let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); assert!(ov.matched("bar/lib.rs", false).is_whitelist()); assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); assert!(ov.matched("wat/bar/lib.rs", false).is_ignore()); assert!(ov.matched("foo", false).is_whitelist()); assert!(ov.matched("wat/foo", false).is_ignore()); assert!(ov.matched("baz", false).is_ignore()); assert!(ov.matched("baz/a", false).is_whitelist()); assert!(ov.matched("baz/a/b", false).is_whitelist()); } #[test] fn allow_directories() { // This tests that directories are NOT ignored when they are unmatched. let ov = ov(&["*.rs"]); assert!(ov.matched("foo.rs", false).is_whitelist()); assert!(ov.matched("foo.c", false).is_ignore()); assert!(ov.matched("foo", false).is_ignore()); assert!(ov.matched("foo", true).is_none()); assert!(ov.matched("src/foo.rs", false).is_whitelist()); assert!(ov.matched("src/foo.c", false).is_ignore()); assert!(ov.matched("src/foo", false).is_ignore()); assert!(ov.matched("src/foo", true).is_none()); } #[test] fn absolute_path() { let ov = ov(&["!/bar"]); assert!(ov.matched("./foo/bar", false).is_none()); } #[test] fn case_insensitive() { let ov = OverrideBuilder::new(ROOT) .case_insensitive(true) .unwrap() .add("*.html") .unwrap() .build() .unwrap(); assert!(ov.matched("foo.html", false).is_whitelist()); assert!(ov.matched("foo.HTML", false).is_whitelist()); assert!(ov.matched("foo.htm", false).is_ignore()); assert!(ov.matched("foo.HTM", false).is_ignore()); } #[test] fn default_case_sensitive() { let ov = OverrideBuilder::new(ROOT).add("*.html").unwrap().build().unwrap(); assert!(ov.matched("foo.html", false).is_whitelist()); assert!(ov.matched("foo.HTML", false).is_ignore()); assert!(ov.matched("foo.htm", false).is_ignore()); assert!(ov.matched("foo.HTM", false).is_ignore()); } } ignore-0.4.18/src/pathutil.rs000064400000000000000000000103520072674642500142210ustar 00000000000000use std::ffi::OsStr; use std::path::Path; use crate::walk::DirEntry; /// Returns true if and only if this entry is considered to be hidden. /// /// This only returns true if the base name of the path starts with a `.`. /// /// On Unix, this implements a more optimized check. #[cfg(unix)] pub fn is_hidden(dent: &DirEntry) -> bool { use std::os::unix::ffi::OsStrExt; if let Some(name) = file_name(dent.path()) { name.as_bytes().get(0) == Some(&b'.') } else { false } } /// Returns true if and only if this entry is considered to be hidden. /// /// On Windows, this returns true if one of the following is true: /// /// * The base name of the path starts with a `.`. /// * The file attributes have the `HIDDEN` property set. #[cfg(windows)] pub fn is_hidden(dent: &DirEntry) -> bool { use std::os::windows::fs::MetadataExt; use winapi_util::file; // This looks like we're doing an extra stat call, but on Windows, the // directory traverser reuses the metadata retrieved from each directory // entry and stores it on the DirEntry itself. So this is "free." if let Ok(md) = dent.metadata() { if file::is_hidden(md.file_attributes() as u64) { return true; } } if let Some(name) = file_name(dent.path()) { name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) } else { false } } /// Returns true if and only if this entry is considered to be hidden. /// /// This only returns true if the base name of the path starts with a `.`. #[cfg(not(any(unix, windows)))] pub fn is_hidden(dent: &DirEntry) -> bool { if let Some(name) = file_name(dent.path()) { name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) } else { false } } /// Strip `prefix` from the `path` and return the remainder. /// /// If `path` doesn't have a prefix `prefix`, then return `None`. #[cfg(unix)] pub fn strip_prefix<'a, P: AsRef + ?Sized>( prefix: &'a P, path: &'a Path, ) -> Option<&'a Path> { use std::os::unix::ffi::OsStrExt; let prefix = prefix.as_ref().as_os_str().as_bytes(); let path = path.as_os_str().as_bytes(); if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { None } else { Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..]))) } } /// Strip `prefix` from the `path` and return the remainder. /// /// If `path` doesn't have a prefix `prefix`, then return `None`. #[cfg(not(unix))] pub fn strip_prefix<'a, P: AsRef + ?Sized>( prefix: &'a P, path: &'a Path, ) -> Option<&'a Path> { path.strip_prefix(prefix).ok() } /// Returns true if this file path is just a file name. i.e., Its parent is /// the empty string. #[cfg(unix)] pub fn is_file_name>(path: P) -> bool { use memchr::memchr; use std::os::unix::ffi::OsStrExt; let path = path.as_ref().as_os_str().as_bytes(); memchr(b'/', path).is_none() } /// Returns true if this file path is just a file name. i.e., Its parent is /// the empty string. #[cfg(not(unix))] pub fn is_file_name>(path: P) -> bool { path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) } /// The final component of the path, if it is a normal file. /// /// If the path terminates in ., .., or consists solely of a root of prefix, /// file_name will return None. #[cfg(unix)] pub fn file_name<'a, P: AsRef + ?Sized>( path: &'a P, ) -> Option<&'a OsStr> { use memchr::memrchr; use std::os::unix::ffi::OsStrExt; let path = path.as_ref().as_os_str().as_bytes(); if path.is_empty() { return None; } else if path.len() == 1 && path[0] == b'.' { return None; } else if path.last() == Some(&b'.') { return None; } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { return None; } let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); Some(OsStr::from_bytes(&path[last_slash..])) } /// The final component of the path, if it is a normal file. /// /// If the path terminates in ., .., or consists solely of a root of prefix, /// file_name will return None. #[cfg(not(unix))] pub fn file_name<'a, P: AsRef + ?Sized>( path: &'a P, ) -> Option<&'a OsStr> { path.as_ref().file_name() } ignore-0.4.18/src/types.rs000064400000000000000000000466350072674642500135500ustar 00000000000000/*! The types module provides a way of associating globs on file names to file types. This can be used to match specific types of files. For example, among the default file types provided, the Rust file type is defined to be `*.rs` with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with name `c`. Note that the set of default types may change over time. # Example This shows how to create and use a simple file type matcher using the default file types defined in this crate. ``` use ignore::types::TypesBuilder; let mut builder = TypesBuilder::new(); builder.add_defaults(); builder.select("rust"); let matcher = builder.build().unwrap(); assert!(matcher.matched("foo.rs", false).is_whitelist()); assert!(matcher.matched("foo.c", false).is_ignore()); ``` # Example: negation This is like the previous example, but shows how negating a file type works. That is, this will let us match file paths that *don't* correspond to a particular file type. ``` use ignore::types::TypesBuilder; let mut builder = TypesBuilder::new(); builder.add_defaults(); builder.negate("c"); let matcher = builder.build().unwrap(); assert!(matcher.matched("foo.rs", false).is_none()); assert!(matcher.matched("foo.c", false).is_ignore()); ``` # Example: custom file type definitions This shows how to extend this library default file type definitions with your own. ``` use ignore::types::TypesBuilder; let mut builder = TypesBuilder::new(); builder.add_defaults(); builder.add("foo", "*.foo"); // Another way of adding a file type definition. // This is useful when accepting input from an end user. builder.add_def("bar:*.bar"); // Note: we only select `foo`, not `bar`. builder.select("foo"); let matcher = builder.build().unwrap(); assert!(matcher.matched("x.foo", false).is_whitelist()); // This is ignored because we only selected the `foo` file type. assert!(matcher.matched("x.bar", false).is_ignore()); ``` We can also add file type definitions based on other definitions. ``` use ignore::types::TypesBuilder; let mut builder = TypesBuilder::new(); builder.add_defaults(); builder.add("foo", "*.foo"); builder.add_def("bar:include:foo,cpp"); builder.select("bar"); let matcher = builder.build().unwrap(); assert!(matcher.matched("x.foo", false).is_whitelist()); assert!(matcher.matched("y.cpp", false).is_whitelist()); ``` */ use std::cell::RefCell; use std::collections::HashMap; use std::path::Path; use std::sync::Arc; use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; use regex::Regex; use thread_local::ThreadLocal; use crate::default_types::DEFAULT_TYPES; use crate::pathutil::file_name; use crate::{Error, Match}; /// Glob represents a single glob in a set of file type definitions. /// /// There may be more than one glob for a particular file type. /// /// This is used to report information about the highest precedent glob /// that matched. /// /// Note that not all matches necessarily correspond to a specific glob. /// For example, if there are one or more selections and a file path doesn't /// match any of those selections, then the file path is considered to be /// ignored. /// /// The lifetime `'a` refers to the lifetime of the underlying file type /// definition, which corresponds to the lifetime of the file type matcher. #[derive(Clone, Debug)] pub struct Glob<'a>(GlobInner<'a>); #[derive(Clone, Debug)] enum GlobInner<'a> { /// No glob matched, but the file path should still be ignored. UnmatchedIgnore, /// A glob matched. Matched { /// The file type definition which provided the glob. def: &'a FileTypeDef, /// The index of the glob that matched inside the file type definition. which: usize, /// Whether the selection was negated or not. negated: bool, }, } impl<'a> Glob<'a> { fn unmatched() -> Glob<'a> { Glob(GlobInner::UnmatchedIgnore) } /// Return the file type definition that matched, if one exists. A file type /// definition always exists when a specific definition matches a file /// path. pub fn file_type_def(&self) -> Option<&FileTypeDef> { match self { Glob(GlobInner::UnmatchedIgnore) => None, Glob(GlobInner::Matched { def, .. }) => Some(def), } } } /// A single file type definition. /// /// File type definitions can be retrieved in aggregate from a file type /// matcher. File type definitions are also reported when its responsible /// for a match. #[derive(Clone, Debug, Eq, PartialEq)] pub struct FileTypeDef { name: String, globs: Vec, } impl FileTypeDef { /// Return the name of this file type. pub fn name(&self) -> &str { &self.name } /// Return the globs used to recognize this file type. pub fn globs(&self) -> &[String] { &self.globs } } /// Types is a file type matcher. #[derive(Clone, Debug)] pub struct Types { /// All of the file type definitions, sorted lexicographically by name. defs: Vec, /// All of the selections made by the user. selections: Vec>, /// Whether there is at least one Selection::Select in our selections. /// When this is true, a Match::None is converted to Match::Ignore. has_selected: bool, /// A mapping from glob index in the set to two indices. The first is an /// index into `selections` and the second is an index into the /// corresponding file type definition's list of globs. glob_to_selection: Vec<(usize, usize)>, /// The set of all glob selections, used for actual matching. set: GlobSet, /// Temporary storage for globs that match. matches: Arc>>>, } /// Indicates the type of a selection for a particular file type. #[derive(Clone, Debug)] enum Selection { Select(String, T), Negate(String, T), } impl Selection { fn is_negated(&self) -> bool { match *self { Selection::Select(..) => false, Selection::Negate(..) => true, } } fn name(&self) -> &str { match *self { Selection::Select(ref name, _) => name, Selection::Negate(ref name, _) => name, } } fn map U>(self, f: F) -> Selection { match self { Selection::Select(name, inner) => { Selection::Select(name, f(inner)) } Selection::Negate(name, inner) => { Selection::Negate(name, f(inner)) } } } fn inner(&self) -> &T { match *self { Selection::Select(_, ref inner) => inner, Selection::Negate(_, ref inner) => inner, } } } impl Types { /// Creates a new file type matcher that never matches any path and /// contains no file type definitions. pub fn empty() -> Types { Types { defs: vec![], selections: vec![], has_selected: false, glob_to_selection: vec![], set: GlobSetBuilder::new().build().unwrap(), matches: Arc::new(ThreadLocal::default()), } } /// Returns true if and only if this matcher has zero selections. pub fn is_empty(&self) -> bool { self.selections.is_empty() } /// Returns the number of selections used in this matcher. pub fn len(&self) -> usize { self.selections.len() } /// Return the set of current file type definitions. /// /// Definitions and globs are sorted. pub fn definitions(&self) -> &[FileTypeDef] { &self.defs } /// Returns a match for the given path against this file type matcher. /// /// The path is considered whitelisted if it matches a selected file type. /// The path is considered ignored if it matches a negated file type. /// If at least one file type is selected and `path` doesn't match, then /// the path is also considered ignored. pub fn matched<'a, P: AsRef>( &'a self, path: P, is_dir: bool, ) -> Match> { // File types don't apply to directories, and we can't do anything // if our glob set is empty. if is_dir || self.set.is_empty() { return Match::None; } // We only want to match against the file name, so extract it. // If one doesn't exist, then we can't match it. let name = match file_name(path.as_ref()) { Some(name) => name, None if self.has_selected => { return Match::Ignore(Glob::unmatched()); } None => { return Match::None; } }; let mut matches = self.matches.get_or_default().borrow_mut(); self.set.matches_into(name, &mut *matches); // The highest precedent match is the last one. if let Some(&i) = matches.last() { let (isel, iglob) = self.glob_to_selection[i]; let sel = &self.selections[isel]; let glob = Glob(GlobInner::Matched { def: sel.inner(), which: iglob, negated: sel.is_negated(), }); return if sel.is_negated() { Match::Ignore(glob) } else { Match::Whitelist(glob) }; } if self.has_selected { Match::Ignore(Glob::unmatched()) } else { Match::None } } } /// TypesBuilder builds a type matcher from a set of file type definitions and /// a set of file type selections. pub struct TypesBuilder { types: HashMap, selections: Vec>, } impl TypesBuilder { /// Create a new builder for a file type matcher. /// /// The builder contains *no* type definitions to start with. A set /// of default type definitions can be added with `add_defaults`, and /// additional type definitions can be added with `select` and `negate`. pub fn new() -> TypesBuilder { TypesBuilder { types: HashMap::new(), selections: vec![] } } /// Build the current set of file type definitions *and* selections into /// a file type matcher. pub fn build(&self) -> Result { let defs = self.definitions(); let has_selected = self.selections.iter().any(|s| !s.is_negated()); let mut selections = vec![]; let mut glob_to_selection = vec![]; let mut build_set = GlobSetBuilder::new(); for (isel, selection) in self.selections.iter().enumerate() { let def = match self.types.get(selection.name()) { Some(def) => def.clone(), None => { let name = selection.name().to_string(); return Err(Error::UnrecognizedFileType(name)); } }; for (iglob, glob) in def.globs.iter().enumerate() { build_set.add( GlobBuilder::new(glob) .literal_separator(true) .build() .map_err(|err| Error::Glob { glob: Some(glob.to_string()), err: err.kind().to_string(), })?, ); glob_to_selection.push((isel, iglob)); } selections.push(selection.clone().map(move |_| def)); } let set = build_set .build() .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?; Ok(Types { defs: defs, selections: selections, has_selected: has_selected, glob_to_selection: glob_to_selection, set: set, matches: Arc::new(ThreadLocal::default()), }) } /// Return the set of current file type definitions. /// /// Definitions and globs are sorted. pub fn definitions(&self) -> Vec { let mut defs = vec![]; for def in self.types.values() { let mut def = def.clone(); def.globs.sort(); defs.push(def); } defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); defs } /// Select the file type given by `name`. /// /// If `name` is `all`, then all file types currently defined are selected. pub fn select(&mut self, name: &str) -> &mut TypesBuilder { if name == "all" { for name in self.types.keys() { self.selections.push(Selection::Select(name.to_string(), ())); } } else { self.selections.push(Selection::Select(name.to_string(), ())); } self } /// Ignore the file type given by `name`. /// /// If `name` is `all`, then all file types currently defined are negated. pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { if name == "all" { for name in self.types.keys() { self.selections.push(Selection::Negate(name.to_string(), ())); } } else { self.selections.push(Selection::Negate(name.to_string(), ())); } self } /// Clear any file type definitions for the type name given. pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { self.types.remove(name); self } /// Add a new file type definition. `name` can be arbitrary and `pat` /// should be a glob recognizing file paths belonging to the `name` type. /// /// If `name` is `all` or otherwise contains any character that is not a /// Unicode letter or number, then an error is returned. pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { lazy_static::lazy_static! { static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap(); }; if name == "all" || !RE.is_match(name) { return Err(Error::InvalidDefinition); } let (key, glob) = (name.to_string(), glob.to_string()); self.types .entry(key) .or_insert_with(|| FileTypeDef { name: name.to_string(), globs: vec![], }) .globs .push(glob); Ok(()) } /// Add a new file type definition specified in string form. There are two /// valid formats: /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the /// given name with the given glob. /// 2. `{name}:include:{comma-separated list of already defined names}. /// This defines an 'include' definition that associates the given name /// with the definitions of the given existing types. /// Names may not include any characters that are not /// Unicode letters or numbers. pub fn add_def(&mut self, def: &str) -> Result<(), Error> { let parts: Vec<&str> = def.split(':').collect(); match parts.len() { 2 => { let name = parts[0]; let glob = parts[1]; if name.is_empty() || glob.is_empty() { return Err(Error::InvalidDefinition); } self.add(name, glob) } 3 => { let name = parts[0]; let types_string = parts[2]; if name.is_empty() || parts[1] != "include" || types_string.is_empty() { return Err(Error::InvalidDefinition); } let types = types_string.split(','); // Check ahead of time to ensure that all types specified are // present and fail fast if not. if types.clone().any(|t| !self.types.contains_key(t)) { return Err(Error::InvalidDefinition); } for type_name in types { let globs = self.types.get(type_name).unwrap().globs.clone(); for glob in globs { self.add(name, &glob)?; } } Ok(()) } _ => Err(Error::InvalidDefinition), } } /// Add a set of default file type definitions. pub fn add_defaults(&mut self) -> &mut TypesBuilder { static MSG: &'static str = "adding a default type should never fail"; for &(name, exts) in DEFAULT_TYPES { for ext in exts { self.add(name, ext).expect(MSG); } } self } } #[cfg(test)] mod tests { use super::TypesBuilder; macro_rules! matched { ($name:ident, $types:expr, $sel:expr, $selnot:expr, $path:expr) => { matched!($name, $types, $sel, $selnot, $path, true); }; (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, $path:expr) => { matched!($name, $types, $sel, $selnot, $path, false); }; ($name:ident, $types:expr, $sel:expr, $selnot:expr, $path:expr, $matched:expr) => { #[test] fn $name() { let mut btypes = TypesBuilder::new(); for tydef in $types { btypes.add_def(tydef).unwrap(); } for sel in $sel { btypes.select(sel); } for selnot in $selnot { btypes.negate(selnot); } let types = btypes.build().unwrap(); let mat = types.matched($path, false); assert_eq!($matched, !mat.is_ignore()); } }; } fn types() -> Vec<&'static str> { vec![ "html:*.html", "html:*.htm", "rust:*.rs", "js:*.js", "foo:*.{rs,foo}", "combo:include:html,rust", ] } matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); matched!(match2, types(), vec!["html"], vec![], "index.html"); matched!(match3, types(), vec!["html"], vec![], "index.htm"); matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); matched!(match5, types(), vec![], vec![], "index.html"); matched!(match6, types(), vec![], vec!["rust"], "index.html"); matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); matched!(match8, types(), vec!["combo"], vec![], "index.html"); matched!(match9, types(), vec!["combo"], vec![], "lib.rs"); matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js"); #[test] fn test_invalid_defs() { let mut btypes = TypesBuilder::new(); for tydef in types() { btypes.add_def(tydef).unwrap(); } // Preserve the original definitions for later comparison. let original_defs = btypes.definitions(); let bad_defs = vec![ // Reference to type that does not exist "combo:include:html,python", // Bad format "combo:foobar:html,rust", "", ]; for def in bad_defs { assert!(btypes.add_def(def).is_err()); // Ensure that nothing changed, even if some of the includes were valid. assert_eq!(btypes.definitions(), original_defs); } } } ignore-0.4.18/src/walk.rs000064400000000000000000002247210072674642500133340ustar 00000000000000use std::cmp; use std::ffi::OsStr; use std::fmt; use std::fs::{self, FileType, Metadata}; use std::io; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; use std::thread; use std::time::Duration; use std::vec; use same_file::Handle; use walkdir::{self, WalkDir}; use crate::dir::{Ignore, IgnoreBuilder}; use crate::gitignore::GitignoreBuilder; use crate::overrides::Override; use crate::types::Types; use crate::{Error, PartialErrorBuilder}; /// A directory entry with a possible error attached. /// /// The error typically refers to a problem parsing ignore files in a /// particular directory. #[derive(Clone, Debug)] pub struct DirEntry { dent: DirEntryInner, err: Option, } impl DirEntry { /// The full path that this entry represents. pub fn path(&self) -> &Path { self.dent.path() } /// The full path that this entry represents. /// Analogous to [`path`], but moves ownership of the path. /// /// [`path`]: struct.DirEntry.html#method.path pub fn into_path(self) -> PathBuf { self.dent.into_path() } /// Whether this entry corresponds to a symbolic link or not. pub fn path_is_symlink(&self) -> bool { self.dent.path_is_symlink() } /// Returns true if and only if this entry corresponds to stdin. /// /// i.e., The entry has depth 0 and its file name is `-`. pub fn is_stdin(&self) -> bool { self.dent.is_stdin() } /// Return the metadata for the file that this entry points to. pub fn metadata(&self) -> Result { self.dent.metadata() } /// Return the file type for the file that this entry points to. /// /// This entry doesn't have a file type if it corresponds to stdin. pub fn file_type(&self) -> Option { self.dent.file_type() } /// Return the file name of this entry. /// /// If this entry has no file name (e.g., `/`), then the full path is /// returned. pub fn file_name(&self) -> &OsStr { self.dent.file_name() } /// Returns the depth at which this entry was created relative to the root. pub fn depth(&self) -> usize { self.dent.depth() } /// Returns the underlying inode number if one exists. /// /// If this entry doesn't have an inode number, then `None` is returned. #[cfg(unix)] pub fn ino(&self) -> Option { self.dent.ino() } /// Returns an error, if one exists, associated with processing this entry. /// /// An example of an error is one that occurred while parsing an ignore /// file. Errors related to traversing a directory tree itself are reported /// as part of yielding the directory entry, and not with this method. pub fn error(&self) -> Option<&Error> { self.err.as_ref() } /// Returns true if and only if this entry points to a directory. pub(crate) fn is_dir(&self) -> bool { self.dent.is_dir() } fn new_stdin() -> DirEntry { DirEntry { dent: DirEntryInner::Stdin, err: None } } fn new_walkdir(dent: walkdir::DirEntry, err: Option) -> DirEntry { DirEntry { dent: DirEntryInner::Walkdir(dent), err: err } } fn new_raw(dent: DirEntryRaw, err: Option) -> DirEntry { DirEntry { dent: DirEntryInner::Raw(dent), err: err } } } /// DirEntryInner is the implementation of DirEntry. /// /// It specifically represents three distinct sources of directory entries: /// /// 1. From the walkdir crate. /// 2. Special entries that represent things like stdin. /// 3. From a path. /// /// Specifically, (3) has to essentially re-create the DirEntry implementation /// from WalkDir. #[derive(Clone, Debug)] enum DirEntryInner { Stdin, Walkdir(walkdir::DirEntry), Raw(DirEntryRaw), } impl DirEntryInner { fn path(&self) -> &Path { use self::DirEntryInner::*; match *self { Stdin => Path::new(""), Walkdir(ref x) => x.path(), Raw(ref x) => x.path(), } } fn into_path(self) -> PathBuf { use self::DirEntryInner::*; match self { Stdin => PathBuf::from(""), Walkdir(x) => x.into_path(), Raw(x) => x.into_path(), } } fn path_is_symlink(&self) -> bool { use self::DirEntryInner::*; match *self { Stdin => false, Walkdir(ref x) => x.path_is_symlink(), Raw(ref x) => x.path_is_symlink(), } } fn is_stdin(&self) -> bool { match *self { DirEntryInner::Stdin => true, _ => false, } } fn metadata(&self) -> Result { use self::DirEntryInner::*; match *self { Stdin => { let err = Error::Io(io::Error::new( io::ErrorKind::Other, " has no metadata", )); Err(err.with_path("")) } Walkdir(ref x) => x.metadata().map_err(|err| { Error::Io(io::Error::from(err)).with_path(x.path()) }), Raw(ref x) => x.metadata(), } } fn file_type(&self) -> Option { use self::DirEntryInner::*; match *self { Stdin => None, Walkdir(ref x) => Some(x.file_type()), Raw(ref x) => Some(x.file_type()), } } fn file_name(&self) -> &OsStr { use self::DirEntryInner::*; match *self { Stdin => OsStr::new(""), Walkdir(ref x) => x.file_name(), Raw(ref x) => x.file_name(), } } fn depth(&self) -> usize { use self::DirEntryInner::*; match *self { Stdin => 0, Walkdir(ref x) => x.depth(), Raw(ref x) => x.depth(), } } #[cfg(unix)] fn ino(&self) -> Option { use self::DirEntryInner::*; use walkdir::DirEntryExt; match *self { Stdin => None, Walkdir(ref x) => Some(x.ino()), Raw(ref x) => Some(x.ino()), } } /// Returns true if and only if this entry points to a directory. fn is_dir(&self) -> bool { self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) } } /// DirEntryRaw is essentially copied from the walkdir crate so that we can /// build `DirEntry`s from whole cloth in the parallel iterator. #[derive(Clone)] struct DirEntryRaw { /// The path as reported by the `fs::ReadDir` iterator (even if it's a /// symbolic link). path: PathBuf, /// The file type. Necessary for recursive iteration, so store it. ty: FileType, /// Is set when this entry was created from a symbolic link and the user /// expects the iterator to follow symbolic links. follow_link: bool, /// The depth at which this entry was generated relative to the root. depth: usize, /// The underlying inode number (Unix only). #[cfg(unix)] ino: u64, /// The underlying metadata (Windows only). We store this on Windows /// because this comes for free while reading a directory. #[cfg(windows)] metadata: fs::Metadata, } impl fmt::Debug for DirEntryRaw { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Leaving out FileType because it doesn't have a debug impl // in Rust 1.9. We could add it if we really wanted to by manually // querying each possibly file type. Meh. ---AG f.debug_struct("DirEntryRaw") .field("path", &self.path) .field("follow_link", &self.follow_link) .field("depth", &self.depth) .finish() } } impl DirEntryRaw { fn path(&self) -> &Path { &self.path } fn into_path(self) -> PathBuf { self.path } fn path_is_symlink(&self) -> bool { self.ty.is_symlink() || self.follow_link } fn metadata(&self) -> Result { self.metadata_internal() } #[cfg(windows)] fn metadata_internal(&self) -> Result { if self.follow_link { fs::metadata(&self.path) } else { Ok(self.metadata.clone()) } .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) } #[cfg(not(windows))] fn metadata_internal(&self) -> Result { if self.follow_link { fs::metadata(&self.path) } else { fs::symlink_metadata(&self.path) } .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) } fn file_type(&self) -> FileType { self.ty } fn file_name(&self) -> &OsStr { self.path.file_name().unwrap_or_else(|| self.path.as_os_str()) } fn depth(&self) -> usize { self.depth } #[cfg(unix)] fn ino(&self) -> u64 { self.ino } fn from_entry( depth: usize, ent: &fs::DirEntry, ) -> Result { let ty = ent.file_type().map_err(|err| { let err = Error::Io(io::Error::from(err)).with_path(ent.path()); Error::WithDepth { depth: depth, err: Box::new(err) } })?; DirEntryRaw::from_entry_os(depth, ent, ty) } #[cfg(windows)] fn from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result { let md = ent.metadata().map_err(|err| { let err = Error::Io(io::Error::from(err)).with_path(ent.path()); Error::WithDepth { depth: depth, err: Box::new(err) } })?; Ok(DirEntryRaw { path: ent.path(), ty: ty, follow_link: false, depth: depth, metadata: md, }) } #[cfg(unix)] fn from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result { use std::os::unix::fs::DirEntryExt; Ok(DirEntryRaw { path: ent.path(), ty: ty, follow_link: false, depth: depth, ino: ent.ino(), }) } // Placeholder implementation to allow compiling on non-standard platforms // (e.g. wasm32). #[cfg(not(any(windows, unix)))] fn from_entry_os( depth: usize, ent: &fs::DirEntry, ty: fs::FileType, ) -> Result { Err(Error::Io(io::Error::new( io::ErrorKind::Other, "unsupported platform", ))) } #[cfg(windows)] fn from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result { let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; Ok(DirEntryRaw { path: pb, ty: md.file_type(), follow_link: link, depth: depth, metadata: md, }) } #[cfg(unix)] fn from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result { use std::os::unix::fs::MetadataExt; let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; Ok(DirEntryRaw { path: pb, ty: md.file_type(), follow_link: link, depth: depth, ino: md.ino(), }) } // Placeholder implementation to allow compiling on non-standard platforms // (e.g. wasm32). #[cfg(not(any(windows, unix)))] fn from_path( depth: usize, pb: PathBuf, link: bool, ) -> Result { Err(Error::Io(io::Error::new( io::ErrorKind::Other, "unsupported platform", ))) } } /// WalkBuilder builds a recursive directory iterator. /// /// The builder supports a large number of configurable options. This includes /// specific glob overrides, file type matching, toggling whether hidden /// files are ignored or not, and of course, support for respecting gitignore /// files. /// /// By default, all ignore files found are respected. This includes `.ignore`, /// `.gitignore`, `.git/info/exclude` and even your global gitignore /// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. /// /// Some standard recursive directory options are also supported, such as /// limiting the recursive depth or whether to follow symbolic links (disabled /// by default). /// /// # Ignore rules /// /// There are many rules that influence whether a particular file or directory /// is skipped by this iterator. Those rules are documented here. Note that /// the rules assume a default configuration. /// /// * First, glob overrides are checked. If a path matches a glob override, /// then matching stops. The path is then only skipped if the glob that matched /// the path is an ignore glob. (An override glob is a whitelist glob unless it /// starts with a `!`, in which case it is an ignore glob.) /// * Second, ignore files are checked. Ignore files currently only come from /// git ignore files (`.gitignore`, `.git/info/exclude` and the configured /// global gitignore file), plain `.ignore` files, which have the same format /// as gitignore files, or explicitly added ignore files. The precedence order /// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and /// finally explicitly added ignore files. Note that precedence between /// different types of ignore files is not impacted by the directory hierarchy; /// any `.ignore` file overrides all `.gitignore` files. Within each precedence /// level, more nested ignore files have a higher precedence than less nested /// ignore files. /// * Third, if the previous step yields an ignore match, then all matching /// is stopped and the path is skipped. If it yields a whitelist match, then /// matching continues. A whitelist match can be overridden by a later matcher. /// * Fourth, unless the path is a directory, the file type matcher is run on /// the path. As above, if it yields an ignore match, then all matching is /// stopped and the path is skipped. If it yields a whitelist match, then /// matching continues. /// * Fifth, if the path hasn't been whitelisted and it is hidden, then the /// path is skipped. /// * Sixth, unless the path is a directory, the size of the file is compared /// against the max filesize limit. If it exceeds the limit, it is skipped. /// * Seventh, if the path has made it this far then it is yielded in the /// iterator. #[derive(Clone)] pub struct WalkBuilder { paths: Vec, ig_builder: IgnoreBuilder, max_depth: Option, max_filesize: Option, follow_links: bool, same_file_system: bool, sorter: Option, threads: usize, skip: Option>, filter: Option, } #[derive(Clone)] enum Sorter { ByName( Arc cmp::Ordering + Send + Sync + 'static>, ), ByPath(Arc cmp::Ordering + Send + Sync + 'static>), } #[derive(Clone)] struct Filter(Arc bool + Send + Sync + 'static>); impl fmt::Debug for WalkBuilder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("WalkBuilder") .field("paths", &self.paths) .field("ig_builder", &self.ig_builder) .field("max_depth", &self.max_depth) .field("max_filesize", &self.max_filesize) .field("follow_links", &self.follow_links) .field("threads", &self.threads) .field("skip", &self.skip) .finish() } } impl WalkBuilder { /// Create a new builder for a recursive directory iterator for the /// directory given. /// /// Note that if you want to traverse multiple different directories, it /// is better to call `add` on this builder than to create multiple /// `Walk` values. pub fn new>(path: P) -> WalkBuilder { WalkBuilder { paths: vec![path.as_ref().to_path_buf()], ig_builder: IgnoreBuilder::new(), max_depth: None, max_filesize: None, follow_links: false, same_file_system: false, sorter: None, threads: 0, skip: None, filter: None, } } /// Build a new `Walk` iterator. pub fn build(&self) -> Walk { let follow_links = self.follow_links; let max_depth = self.max_depth; let sorter = self.sorter.clone(); let its = self .paths .iter() .map(move |p| { if p == Path::new("-") { (p.to_path_buf(), None) } else { let mut wd = WalkDir::new(p); wd = wd.follow_links(follow_links || p.is_file()); wd = wd.same_file_system(self.same_file_system); if let Some(max_depth) = max_depth { wd = wd.max_depth(max_depth); } if let Some(ref sorter) = sorter { match sorter.clone() { Sorter::ByName(cmp) => { wd = wd.sort_by(move |a, b| { cmp(a.file_name(), b.file_name()) }); } Sorter::ByPath(cmp) => { wd = wd.sort_by(move |a, b| { cmp(a.path(), b.path()) }); } } } (p.to_path_buf(), Some(WalkEventIter::from(wd))) } }) .collect::>() .into_iter(); let ig_root = self.ig_builder.build(); Walk { its: its, it: None, ig_root: ig_root.clone(), ig: ig_root.clone(), max_filesize: self.max_filesize, skip: self.skip.clone(), filter: self.filter.clone(), } } /// Build a new `WalkParallel` iterator. /// /// Note that this *doesn't* return something that implements `Iterator`. /// Instead, the returned value must be run with a closure. e.g., /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`. pub fn build_parallel(&self) -> WalkParallel { WalkParallel { paths: self.paths.clone().into_iter(), ig_root: self.ig_builder.build(), max_depth: self.max_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, same_file_system: self.same_file_system, threads: self.threads, skip: self.skip.clone(), filter: self.filter.clone(), } } /// Add a file path to the iterator. /// /// Each additional file path added is traversed recursively. This should /// be preferred over building multiple `Walk` iterators since this /// enables reusing resources across iteration. pub fn add>(&mut self, path: P) -> &mut WalkBuilder { self.paths.push(path.as_ref().to_path_buf()); self } /// The maximum depth to recurse. /// /// The default, `None`, imposes no depth restriction. pub fn max_depth(&mut self, depth: Option) -> &mut WalkBuilder { self.max_depth = depth; self } /// Whether to follow symbolic links or not. pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { self.follow_links = yes; self } /// Whether to ignore files above the specified limit. pub fn max_filesize(&mut self, filesize: Option) -> &mut WalkBuilder { self.max_filesize = filesize; self } /// The number of threads to use for traversal. /// /// Note that this only has an effect when using `build_parallel`. /// /// The default setting is `0`, which chooses the number of threads /// automatically using heuristics. pub fn threads(&mut self, n: usize) -> &mut WalkBuilder { self.threads = n; self } /// Add a global ignore file to the matcher. /// /// This has lower precedence than all other sources of ignore rules. /// /// If there was a problem adding the ignore file, then an error is /// returned. Note that the error may indicate *partial* failure. For /// example, if an ignore file contains an invalid glob, all other globs /// are still applied. pub fn add_ignore>(&mut self, path: P) -> Option { let mut builder = GitignoreBuilder::new(""); let mut errs = PartialErrorBuilder::default(); errs.maybe_push(builder.add(path)); match builder.build() { Ok(gi) => { self.ig_builder.add_ignore(gi); } Err(err) => { errs.push(err); } } errs.into_error_option() } /// Add a custom ignore file name /// /// These ignore files have higher precedence than all other ignore files. /// /// When specifying multiple names, earlier names have lower precedence than /// later names. pub fn add_custom_ignore_filename>( &mut self, file_name: S, ) -> &mut WalkBuilder { self.ig_builder.add_custom_ignore_filename(file_name); self } /// Add an override matcher. /// /// By default, no override matcher is used. /// /// This overrides any previous setting. pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { self.ig_builder.overrides(overrides); self } /// Add a file type matcher. /// /// By default, no file type matcher is used. /// /// This overrides any previous setting. pub fn types(&mut self, types: Types) -> &mut WalkBuilder { self.ig_builder.types(types); self } /// Enables all the standard ignore filters. /// /// This toggles, as a group, all the filters that are enabled by default: /// /// - [hidden()](#method.hidden) /// - [parents()](#method.parents) /// - [ignore()](#method.ignore) /// - [git_ignore()](#method.git_ignore) /// - [git_global()](#method.git_global) /// - [git_exclude()](#method.git_exclude) /// /// They may still be toggled individually after calling this function. /// /// This is (by definition) enabled by default. pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder { self.hidden(yes) .parents(yes) .ignore(yes) .git_ignore(yes) .git_global(yes) .git_exclude(yes) } /// Enables ignoring hidden files. /// /// This is enabled by default. pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.hidden(yes); self } /// Enables reading ignore files from parent directories. /// /// If this is enabled, then .gitignore files in parent directories of each /// file path given are respected. Otherwise, they are ignored. /// /// This is enabled by default. pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.parents(yes); self } /// Enables reading `.ignore` files. /// /// `.ignore` files have the same semantics as `gitignore` files and are /// supported by search tools such as ripgrep and The Silver Searcher. /// /// This is enabled by default. pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.ignore(yes); self } /// Enables reading a global gitignore file, whose path is specified in /// git's `core.excludesFile` config option. /// /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` /// does not exist or does not specify `core.excludesFile`, then /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not /// set or is empty, then `$HOME/.config/git/ignore` is used instead. /// /// This is enabled by default. pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.git_global(yes); self } /// Enables reading `.gitignore` files. /// /// `.gitignore` files have match semantics as described in the `gitignore` /// man page. /// /// This is enabled by default. pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.git_ignore(yes); self } /// Enables reading `.git/info/exclude` files. /// /// `.git/info/exclude` files have match semantics as described in the /// `gitignore` man page. /// /// This is enabled by default. pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.git_exclude(yes); self } /// Whether a git repository is required to apply git-related ignore /// rules (global rules, .gitignore and local exclude rules). /// /// When disabled, git-related ignore rules are applied even when searching /// outside a git repository. pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.require_git(yes); self } /// Process ignore files case insensitively /// /// This is disabled by default. pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder { self.ig_builder.ignore_case_insensitive(yes); self } /// Set a function for sorting directory entries by their path. /// /// If a compare function is set, the resulting iterator will return all /// paths in sorted order. The compare function will be called to compare /// entries from the same directory. /// /// This is like `sort_by_file_name`, except the comparator accepts /// a `&Path` instead of the base file name, which permits it to sort by /// more criteria. /// /// This method will override any previous sorter set by this method or /// by `sort_by_file_name`. /// /// Note that this is not used in the parallel iterator. pub fn sort_by_file_path(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static, { self.sorter = Some(Sorter::ByPath(Arc::new(cmp))); self } /// Set a function for sorting directory entries by file name. /// /// If a compare function is set, the resulting iterator will return all /// paths in sorted order. The compare function will be called to compare /// names from entries from the same directory using only the name of the /// entry. /// /// This method will override any previous sorter set by this method or /// by `sort_by_file_path`. /// /// Note that this is not used in the parallel iterator. pub fn sort_by_file_name(&mut self, cmp: F) -> &mut WalkBuilder where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static, { self.sorter = Some(Sorter::ByName(Arc::new(cmp))); self } /// Do not cross file system boundaries. /// /// When this option is enabled, directory traversal will not descend into /// directories that are on a different file system from the root path. /// /// Currently, this option is only supported on Unix and Windows. If this /// option is used on an unsupported platform, then directory traversal /// will immediately return an error and will not yield any entries. pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { self.same_file_system = yes; self } /// Do not yield directory entries that are believed to correspond to /// stdout. /// /// This is useful when a command is invoked via shell redirection to a /// file that is also being read. For example, `grep -r foo ./ > results` /// might end up trying to search `results` even though it is also writing /// to it, which could cause an unbounded feedback loop. Setting this /// option prevents this from happening by skipping over the `results` /// file. /// /// This is disabled by default. pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder { if yes { self.skip = stdout_handle().map(Arc::new); } else { self.skip = None; } self } /// Yields only entries which satisfy the given predicate and skips /// descending into directories that do not satisfy the given predicate. /// /// The predicate is applied to all entries. If the predicate is /// true, iteration carries on as normal. If the predicate is false, the /// entry is ignored and if it is a directory, it is not descended into. /// /// Note that the errors for reading entries that may not satisfy the /// predicate will still be yielded. pub fn filter_entry

(&mut self, filter: P) -> &mut WalkBuilder where P: Fn(&DirEntry) -> bool + Send + Sync + 'static, { self.filter = Some(Filter(Arc::new(filter))); self } } /// Walk is a recursive directory iterator over file paths in one or more /// directories. /// /// Only file and directory paths matching the rules are returned. By default, /// ignore files like `.gitignore` are respected. The precise matching rules /// and precedence is explained in the documentation for `WalkBuilder`. pub struct Walk { its: vec::IntoIter<(PathBuf, Option)>, it: Option, ig_root: Ignore, ig: Ignore, max_filesize: Option, skip: Option>, filter: Option, } impl Walk { /// Creates a new recursive directory iterator for the file path given. /// /// Note that this uses default settings, which include respecting /// `.gitignore` files. To configure the iterator, use `WalkBuilder` /// instead. pub fn new>(path: P) -> Walk { WalkBuilder::new(path).build() } fn skip_entry(&self, ent: &DirEntry) -> Result { if ent.depth() == 0 { return Ok(false); } // We ensure that trivial skipping is done before any other potentially // expensive operations (stat, filesystem other) are done. This seems // like an obvious optimization but becomes critical when filesystem // operations even as simple as stat can result in significant // overheads; an example of this was a bespoke filesystem layer in // Windows that hosted files remotely and would download them on-demand // when particular filesystem operations occurred. Users of this system // who ensured correct file-type fileters were being used could still // get unnecessary file access resulting in large downloads. if should_skip_entry(&self.ig, ent) { return Ok(true); } if let Some(ref stdout) = self.skip { if path_equals(ent, stdout)? { return Ok(true); } } if self.max_filesize.is_some() && !ent.is_dir() { return Ok(skip_filesize( self.max_filesize.unwrap(), ent.path(), &ent.metadata().ok(), )); } if let Some(Filter(filter)) = &self.filter { if !filter(ent) { return Ok(true); } } Ok(false) } } impl Iterator for Walk { type Item = Result; #[inline(always)] fn next(&mut self) -> Option> { loop { let ev = match self.it.as_mut().and_then(|it| it.next()) { Some(ev) => ev, None => { match self.its.next() { None => return None, Some((_, None)) => { return Some(Ok(DirEntry::new_stdin())); } Some((path, Some(it))) => { self.it = Some(it); if path.is_dir() { let (ig, err) = self.ig_root.add_parents(path); self.ig = ig; if let Some(err) = err { return Some(Err(err)); } } else { self.ig = self.ig_root.clone(); } } } continue; } }; match ev { Err(err) => { return Some(Err(Error::from_walkdir(err))); } Ok(WalkEvent::Exit) => { self.ig = self.ig.parent().unwrap(); } Ok(WalkEvent::Dir(ent)) => { let mut ent = DirEntry::new_walkdir(ent, None); let should_skip = match self.skip_entry(&ent) { Err(err) => return Some(Err(err)), Ok(should_skip) => should_skip, }; if should_skip { self.it.as_mut().unwrap().it.skip_current_dir(); // Still need to push this on the stack because // we'll get a WalkEvent::Exit event for this dir. // We don't care if it errors though. let (igtmp, _) = self.ig.add_child(ent.path()); self.ig = igtmp; continue; } let (igtmp, err) = self.ig.add_child(ent.path()); self.ig = igtmp; ent.err = err; return Some(Ok(ent)); } Ok(WalkEvent::File(ent)) => { let ent = DirEntry::new_walkdir(ent, None); let should_skip = match self.skip_entry(&ent) { Err(err) => return Some(Err(err)), Ok(should_skip) => should_skip, }; if should_skip { continue; } return Some(Ok(ent)); } } } } } /// WalkEventIter transforms a WalkDir iterator into an iterator that more /// accurately describes the directory tree. Namely, it emits events that are /// one of three types: directory, file or "exit." An "exit" event means that /// the entire contents of a directory have been enumerated. struct WalkEventIter { depth: usize, it: walkdir::IntoIter, next: Option>, } #[derive(Debug)] enum WalkEvent { Dir(walkdir::DirEntry), File(walkdir::DirEntry), Exit, } impl From for WalkEventIter { fn from(it: WalkDir) -> WalkEventIter { WalkEventIter { depth: 0, it: it.into_iter(), next: None } } } impl Iterator for WalkEventIter { type Item = walkdir::Result; #[inline(always)] fn next(&mut self) -> Option> { let dent = self.next.take().or_else(|| self.it.next()); let depth = match dent { None => 0, Some(Ok(ref dent)) => dent.depth(), Some(Err(ref err)) => err.depth(), }; if depth < self.depth { self.depth -= 1; self.next = dent; return Some(Ok(WalkEvent::Exit)); } self.depth = depth; match dent { None => None, Some(Err(err)) => Some(Err(err)), Some(Ok(dent)) => { if walkdir_is_dir(&dent) { self.depth += 1; Some(Ok(WalkEvent::Dir(dent))) } else { Some(Ok(WalkEvent::File(dent))) } } } } } /// WalkState is used in the parallel recursive directory iterator to indicate /// whether walking should continue as normal, skip descending into a /// particular directory or quit the walk entirely. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum WalkState { /// Continue walking as normal. Continue, /// If the directory entry given is a directory, don't descend into it. /// In all other cases, this has no effect. Skip, /// Quit the entire iterator as soon as possible. /// /// Note that this is an inherently asynchronous action. It is possible /// for more entries to be yielded even after instructing the iterator /// to quit. Quit, } impl WalkState { fn is_continue(&self) -> bool { *self == WalkState::Continue } fn is_quit(&self) -> bool { *self == WalkState::Quit } } /// A builder for constructing a visitor when using /// [`WalkParallel::visit`](struct.WalkParallel.html#method.visit). The builder /// will be called for each thread started by `WalkParallel`. The visitor /// returned from each builder is then called for every directory entry. pub trait ParallelVisitorBuilder<'s> { /// Create per-thread `ParallelVisitor`s for `WalkParallel`. fn build(&mut self) -> Box; } impl<'a, 's, P: ParallelVisitorBuilder<'s>> ParallelVisitorBuilder<'s> for &'a mut P { fn build(&mut self) -> Box { (**self).build() } } /// Receives files and directories for the current thread. /// /// Setup for the traversal can be implemented as part of /// [`ParallelVisitorBuilder::build`](trait.ParallelVisitorBuilder.html#tymethod.build). /// Teardown when traversal finishes can be implemented by implementing the /// `Drop` trait on your traversal type. pub trait ParallelVisitor: Send { /// Receives files and directories for the current thread. This is called /// once for every directory entry visited by traversal. fn visit(&mut self, entry: Result) -> WalkState; } struct FnBuilder { builder: F, } impl<'s, F: FnMut() -> FnVisitor<'s>> ParallelVisitorBuilder<'s> for FnBuilder { fn build(&mut self) -> Box { let visitor = (self.builder)(); Box::new(FnVisitorImp { visitor }) } } type FnVisitor<'s> = Box) -> WalkState + Send + 's>; struct FnVisitorImp<'s> { visitor: FnVisitor<'s>, } impl<'s> ParallelVisitor for FnVisitorImp<'s> { fn visit(&mut self, entry: Result) -> WalkState { (self.visitor)(entry) } } /// WalkParallel is a parallel recursive directory iterator over files paths /// in one or more directories. /// /// Only file and directory paths matching the rules are returned. By default, /// ignore files like `.gitignore` are respected. The precise matching rules /// and precedence is explained in the documentation for `WalkBuilder`. /// /// Unlike `Walk`, this uses multiple threads for traversing a directory. pub struct WalkParallel { paths: vec::IntoIter, ig_root: Ignore, max_filesize: Option, max_depth: Option, follow_links: bool, same_file_system: bool, threads: usize, skip: Option>, filter: Option, } impl WalkParallel { /// Execute the parallel recursive directory iterator. `mkf` is called /// for each thread used for iteration. The function produced by `mkf` /// is then in turn called for each visited file path. pub fn run<'s, F>(self, mkf: F) where F: FnMut() -> FnVisitor<'s>, { self.visit(&mut FnBuilder { builder: mkf }) } /// Execute the parallel recursive directory iterator using a custom /// visitor. /// /// The builder given is used to construct a visitor for every thread /// used by this traversal. The visitor returned from each builder is then /// called for every directory entry seen by that thread. /// /// Typically, creating a custom visitor is useful if you need to perform /// some kind of cleanup once traversal is finished. This can be achieved /// by implementing `Drop` for your builder (or for your visitor, if you /// want to execute cleanup for every thread that is launched). /// /// For example, each visitor might build up a data structure of results /// corresponding to the directory entries seen for each thread. Since each /// visitor runs on only one thread, this build-up can be done without /// synchronization. Then, once traversal is complete, all of the results /// can be merged together into a single data structure. pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) { let threads = self.threads(); let stack = Arc::new(Mutex::new(vec![])); { let mut stack = stack.lock().unwrap(); let mut visitor = builder.build(); let mut paths = Vec::new().into_iter(); std::mem::swap(&mut paths, &mut self.paths); // Send the initial set of root paths to the pool of workers. Note // that we only send directories. For files, we send to them the // callback directly. for path in paths { let (dent, root_device) = if path == Path::new("-") { (DirEntry::new_stdin(), None) } else { let root_device = if !self.same_file_system { None } else { match device_num(&path) { Ok(root_device) => Some(root_device), Err(err) => { let err = Error::Io(err).with_path(path); if visitor.visit(Err(err)).is_quit() { return; } continue; } } }; match DirEntryRaw::from_path(0, path, false) { Ok(dent) => { (DirEntry::new_raw(dent, None), root_device) } Err(err) => { if visitor.visit(Err(err)).is_quit() { return; } continue; } } }; stack.push(Message::Work(Work { dent: dent, ignore: self.ig_root.clone(), root_device: root_device, })); } // ... but there's no need to start workers if we don't need them. if stack.is_empty() { return; } } // Create the workers and then wait for them to finish. let quit_now = Arc::new(AtomicBool::new(false)); let num_pending = Arc::new(AtomicUsize::new(stack.lock().unwrap().len())); crossbeam_utils::thread::scope(|s| { let mut handles = vec![]; for _ in 0..threads { let worker = Worker { visitor: builder.build(), stack: stack.clone(), quit_now: quit_now.clone(), num_pending: num_pending.clone(), max_depth: self.max_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, skip: self.skip.clone(), filter: self.filter.clone(), }; handles.push(s.spawn(|_| worker.run())); } for handle in handles { handle.join().unwrap(); } }) .unwrap(); // Pass along panics from threads } fn threads(&self) -> usize { if self.threads == 0 { 2 } else { self.threads } } } /// Message is the set of instructions that a worker knows how to process. enum Message { /// A work item corresponds to a directory that should be descended into. /// Work items for entries that should be skipped or ignored should not /// be produced. Work(Work), /// This instruction indicates that the worker should quit. Quit, } /// A unit of work for each worker to process. /// /// Each unit of work corresponds to a directory that should be descended /// into. struct Work { /// The directory entry. dent: DirEntry, /// Any ignore matchers that have been built for this directory's parents. ignore: Ignore, /// The root device number. When present, only files with the same device /// number should be considered. root_device: Option, } impl Work { /// Returns true if and only if this work item is a directory. fn is_dir(&self) -> bool { self.dent.is_dir() } /// Returns true if and only if this work item is a symlink. fn is_symlink(&self) -> bool { self.dent.file_type().map_or(false, |ft| ft.is_symlink()) } /// Adds ignore rules for parent directories. /// /// Note that this only applies to entries at depth 0. On all other /// entries, this is a no-op. fn add_parents(&mut self) -> Option { if self.dent.depth() > 0 { return None; } // At depth 0, the path of this entry is a root path, so we can // use it directly to add parent ignore rules. let (ig, err) = self.ignore.add_parents(self.dent.path()); self.ignore = ig; err } /// Reads the directory contents of this work item and adds ignore /// rules for this directory. /// /// If there was a problem with reading the directory contents, then /// an error is returned. If there was a problem reading the ignore /// rules for this directory, then the error is attached to this /// work item's directory entry. fn read_dir(&mut self) -> Result { let readdir = match fs::read_dir(self.dent.path()) { Ok(readdir) => readdir, Err(err) => { let err = Error::from(err) .with_path(self.dent.path()) .with_depth(self.dent.depth()); return Err(err); } }; let (ig, err) = self.ignore.add_child(self.dent.path()); self.ignore = ig; self.dent.err = err; Ok(readdir) } } /// A worker is responsible for descending into directories, updating the /// ignore matchers, producing new work and invoking the caller's callback. /// /// Note that a worker is *both* a producer and a consumer. struct Worker<'s> { /// The caller's callback. visitor: Box, /// A stack of work to do. /// /// We use a stack instead of a channel because a stack lets us visit /// directories in depth first order. This can substantially reduce peak /// memory usage by keeping both the number of files path and gitignore /// matchers in memory lower. stack: Arc>>, /// Whether all workers should terminate at the next opportunity. Note /// that we need this because we don't want other `Work` to be done after /// we quit. We wouldn't need this if have a priority channel. quit_now: Arc, /// The number of outstanding work items. num_pending: Arc, /// The maximum depth of directories to descend. A value of `0` means no /// descension at all. max_depth: Option, /// The maximum size a searched file can be (in bytes). If a file exceeds /// this size it will be skipped. max_filesize: Option, /// Whether to follow symbolic links or not. When this is enabled, loop /// detection is performed. follow_links: bool, /// A file handle to skip, currently is either `None` or stdout, if it's /// a file and it has been requested to skip files identical to stdout. skip: Option>, /// A predicate applied to dir entries. If true, the entry and all /// children will be skipped. filter: Option, } impl<'s> Worker<'s> { /// Runs this worker until there is no more work left to do. /// /// The worker will call the caller's callback for all entries that aren't /// skipped by the ignore matcher. fn run(mut self) { while let Some(work) = self.get_work() { if let WalkState::Quit = self.run_one(work) { self.quit_now(); } self.work_done(); } } fn run_one(&mut self, mut work: Work) -> WalkState { // If the work is not a directory, then we can just execute the // caller's callback immediately and move on. if work.is_symlink() || !work.is_dir() { return self.visitor.visit(Ok(work.dent)); } if let Some(err) = work.add_parents() { let state = self.visitor.visit(Err(err)); if state.is_quit() { return state; } } let descend = if let Some(root_device) = work.root_device { match is_same_file_system(root_device, work.dent.path()) { Ok(true) => true, Ok(false) => false, Err(err) => { let state = self.visitor.visit(Err(err)); if state.is_quit() { return state; } false } } } else { true }; // Try to read the directory first before we transfer ownership // to the provided closure. Do not unwrap it immediately, though, // as we may receive an `Err` value e.g. in the case when we do not // have sufficient read permissions to list the directory. // In that case we still want to provide the closure with a valid // entry before passing the error value. let readdir = work.read_dir(); let depth = work.dent.depth(); let state = self.visitor.visit(Ok(work.dent)); if !state.is_continue() { return state; } if !descend { return WalkState::Skip; } let readdir = match readdir { Ok(readdir) => readdir, Err(err) => { return self.visitor.visit(Err(err)); } }; if self.max_depth.map_or(false, |max| depth >= max) { return WalkState::Skip; } for result in readdir { let state = self.generate_work( &work.ignore, depth + 1, work.root_device, result, ); if state.is_quit() { return state; } } WalkState::Continue } /// Decides whether to submit the given directory entry as a file to /// search. /// /// If the entry is a path that should be ignored, then this is a no-op. /// Otherwise, the entry is pushed on to the queue. (The actual execution /// of the callback happens in `run_one`.) /// /// If an error occurs while reading the entry, then it is sent to the /// caller's callback. /// /// `ig` is the `Ignore` matcher for the parent directory. `depth` should /// be the depth of this entry. `result` should be the item yielded by /// a directory iterator. fn generate_work( &mut self, ig: &Ignore, depth: usize, root_device: Option, result: Result, ) -> WalkState { let fs_dent = match result { Ok(fs_dent) => fs_dent, Err(err) => { return self .visitor .visit(Err(Error::from(err).with_depth(depth))); } }; let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) { Ok(dent) => DirEntry::new_raw(dent, None), Err(err) => { return self.visitor.visit(Err(err)); } }; let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink()); if self.follow_links && is_symlink { let path = dent.path().to_path_buf(); dent = match DirEntryRaw::from_path(depth, path, true) { Ok(dent) => DirEntry::new_raw(dent, None), Err(err) => { return self.visitor.visit(Err(err)); } }; if dent.is_dir() { if let Err(err) = check_symlink_loop(ig, dent.path(), depth) { return self.visitor.visit(Err(err)); } } } // N.B. See analogous call in the single-threaded implementation about // why it's important for this to come before the checks below. if should_skip_entry(ig, &dent) { return WalkState::Continue; } if let Some(ref stdout) = self.skip { let is_stdout = match path_equals(&dent, stdout) { Ok(is_stdout) => is_stdout, Err(err) => return self.visitor.visit(Err(err)), }; if is_stdout { return WalkState::Continue; } } let should_skip_filesize = if self.max_filesize.is_some() && !dent.is_dir() { skip_filesize( self.max_filesize.unwrap(), dent.path(), &dent.metadata().ok(), ) } else { false }; let should_skip_filtered = if let Some(Filter(predicate)) = &self.filter { !predicate(&dent) } else { false }; if !should_skip_filesize && !should_skip_filtered { self.send(Work { dent, ignore: ig.clone(), root_device }); } WalkState::Continue } /// Returns the next directory to descend into. /// /// If all work has been exhausted, then this returns None. The worker /// should then subsequently quit. fn get_work(&mut self) -> Option { let mut value = self.recv(); loop { // Simulate a priority channel: If quit_now flag is set, we can // receive only quit messages. if self.is_quit_now() { value = Some(Message::Quit) } match value { Some(Message::Work(work)) => { return Some(work); } Some(Message::Quit) => { // Repeat quit message to wake up sleeping threads, if // any. The domino effect will ensure that every thread // will quit. self.send_quit(); return None; } None => { // Once num_pending reaches 0, it is impossible for it to // ever increase again. Namely, it only reaches 0 once // all jobs have run such that no jobs have produced more // work. We have this guarantee because num_pending is // always incremented before each job is submitted and only // decremented once each job is completely finished. // Therefore, if this reaches zero, then there can be no // other job running. if self.num_pending() == 0 { // Every other thread is blocked at the next recv(). // Send the initial quit message and quit. self.send_quit(); return None; } // Wait for next `Work` or `Quit` message. loop { if let Some(v) = self.recv() { value = Some(v); break; } // Our stack isn't blocking. Instead of burning the // CPU waiting, we let the thread sleep for a bit. In // general, this tends to only occur once the search is // approaching termination. thread::sleep(Duration::from_millis(1)); } } } } } /// Indicates that all workers should quit immediately. fn quit_now(&self) { self.quit_now.store(true, Ordering::SeqCst); } /// Returns true if this worker should quit immediately. fn is_quit_now(&self) -> bool { self.quit_now.load(Ordering::SeqCst) } /// Returns the number of pending jobs. fn num_pending(&self) -> usize { self.num_pending.load(Ordering::SeqCst) } /// Send work. fn send(&self, work: Work) { self.num_pending.fetch_add(1, Ordering::SeqCst); let mut stack = self.stack.lock().unwrap(); stack.push(Message::Work(work)); } /// Send a quit message. fn send_quit(&self) { let mut stack = self.stack.lock().unwrap(); stack.push(Message::Quit); } /// Receive work. fn recv(&self) -> Option { let mut stack = self.stack.lock().unwrap(); stack.pop() } /// Signal that work has been received. fn work_done(&self) { self.num_pending.fetch_sub(1, Ordering::SeqCst); } } fn check_symlink_loop( ig_parent: &Ignore, child_path: &Path, child_depth: usize, ) -> Result<(), Error> { let hchild = Handle::from_path(child_path).map_err(|err| { Error::from(err).with_path(child_path).with_depth(child_depth) })?; for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) { let h = Handle::from_path(ig.path()).map_err(|err| { Error::from(err).with_path(child_path).with_depth(child_depth) })?; if hchild == h { return Err(Error::Loop { ancestor: ig.path().to_path_buf(), child: child_path.to_path_buf(), } .with_depth(child_depth)); } } Ok(()) } // Before calling this function, make sure that you ensure that is really // necessary as the arguments imply a file stat. fn skip_filesize( max_filesize: u64, path: &Path, ent: &Option, ) -> bool { let filesize = match *ent { Some(ref md) => Some(md.len()), None => None, }; if let Some(fs) = filesize { if fs > max_filesize { log::debug!("ignoring {}: {} bytes", path.display(), fs); true } else { false } } else { false } } fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool { let m = ig.matched_dir_entry(dent); if m.is_ignore() { log::debug!("ignoring {}: {:?}", dent.path().display(), m); true } else if m.is_whitelist() { log::debug!("whitelisting {}: {:?}", dent.path().display(), m); false } else { false } } /// Returns a handle to stdout for filtering search. /// /// A handle is returned if and only if stdout is being redirected to a file. /// The handle returned corresponds to that file. /// /// This can be used to ensure that we do not attempt to search a file that we /// may also be writing to. fn stdout_handle() -> Option { let h = match Handle::stdout() { Err(_) => return None, Ok(h) => h, }; let md = match h.as_file().metadata() { Err(_) => return None, Ok(md) => md, }; if !md.is_file() { return None; } Some(h) } /// Returns true if and only if the given directory entry is believed to be /// equivalent to the given handle. If there was a problem querying the path /// for information to determine equality, then that error is returned. fn path_equals(dent: &DirEntry, handle: &Handle) -> Result { #[cfg(unix)] fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { dent.ino() != Some(handle.ino()) } #[cfg(not(unix))] fn never_equal(_: &DirEntry, _: &Handle) -> bool { false } // If we know for sure that these two things aren't equal, then avoid // the costly extra stat call to determine equality. if dent.is_stdin() || never_equal(dent, handle) { return Ok(false); } Handle::from_path(dent.path()) .map(|h| &h == handle) .map_err(|err| Error::Io(err).with_path(dent.path())) } /// Returns true if the given walkdir entry corresponds to a directory. /// /// This is normally just `dent.file_type().is_dir()`, but when we aren't /// following symlinks, the root directory entry may be a symlink to a /// directory that we *do* follow---by virtue of it being specified by the user /// explicitly. In that case, we need to follow the symlink and query whether /// it's a directory or not. But we only do this for root entries to avoid an /// additional stat check in most cases. fn walkdir_is_dir(dent: &walkdir::DirEntry) -> bool { if dent.file_type().is_dir() { return true; } if !dent.file_type().is_symlink() || dent.depth() > 0 { return false; } dent.path().metadata().ok().map_or(false, |md| md.file_type().is_dir()) } /// Returns true if and only if the given path is on the same device as the /// given root device. fn is_same_file_system(root_device: u64, path: &Path) -> Result { let dent_device = device_num(path).map_err(|err| Error::Io(err).with_path(path))?; Ok(root_device == dent_device) } #[cfg(unix)] fn device_num>(path: P) -> io::Result { use std::os::unix::fs::MetadataExt; path.as_ref().metadata().map(|md| md.dev()) } #[cfg(windows)] fn device_num>(path: P) -> io::Result { use winapi_util::{file, Handle}; let h = Handle::from_path_any(path)?; file::information(h).map(|info| info.volume_serial_number()) } #[cfg(not(any(unix, windows)))] fn device_num>(_: P) -> io::Result { Err(io::Error::new( io::ErrorKind::Other, "walkdir: same_file_system option not supported on this platform", )) } #[cfg(test)] mod tests { use std::ffi::OsStr; use std::fs::{self, File}; use std::io::Write; use std::path::Path; use std::sync::{Arc, Mutex}; use super::{DirEntry, WalkBuilder, WalkState}; use crate::tests::TempDir; fn wfile>(path: P, contents: &str) { let mut file = File::create(path).unwrap(); file.write_all(contents.as_bytes()).unwrap(); } fn wfile_size>(path: P, size: u64) { let file = File::create(path).unwrap(); file.set_len(size).unwrap(); } #[cfg(unix)] fn symlink, Q: AsRef>(src: P, dst: Q) { use std::os::unix::fs::symlink; symlink(src, dst).unwrap(); } fn mkdirp>(path: P) { fs::create_dir_all(path).unwrap(); } fn normal_path(unix: &str) -> String { if cfg!(windows) { unix.replace("\\", "/") } else { unix.to_string() } } fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec { let mut paths = vec![]; for result in builder.build() { let dent = match result { Err(_) => continue, Ok(dent) => dent, }; let path = dent.path().strip_prefix(prefix).unwrap(); if path.as_os_str().is_empty() { continue; } paths.push(normal_path(path.to_str().unwrap())); } paths.sort(); paths } fn walk_collect_parallel( prefix: &Path, builder: &WalkBuilder, ) -> Vec { let mut paths = vec![]; for dent in walk_collect_entries_parallel(builder) { let path = dent.path().strip_prefix(prefix).unwrap(); if path.as_os_str().is_empty() { continue; } paths.push(normal_path(path.to_str().unwrap())); } paths.sort(); paths } fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec { let dents = Arc::new(Mutex::new(vec![])); builder.build_parallel().run(|| { let dents = dents.clone(); Box::new(move |result| { if let Ok(dent) = result { dents.lock().unwrap().push(dent); } WalkState::Continue }) }); let dents = dents.lock().unwrap(); dents.to_vec() } fn mkpaths(paths: &[&str]) -> Vec { let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); paths.sort(); paths } fn tmpdir() -> TempDir { TempDir::new().unwrap() } fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) { let got = walk_collect(prefix, builder); assert_eq!(got, mkpaths(expected), "single threaded"); let got = walk_collect_parallel(prefix, builder); assert_eq!(got, mkpaths(expected), "parallel"); } #[test] fn no_ignores() { let td = tmpdir(); mkdirp(td.path().join("a/b/c")); mkdirp(td.path().join("x/y")); wfile(td.path().join("a/b/foo"), ""); wfile(td.path().join("x/y/foo"), ""); assert_paths( td.path(), &WalkBuilder::new(td.path()), &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], ); } #[test] fn custom_ignore() { let td = tmpdir(); let custom_ignore = ".customignore"; mkdirp(td.path().join("a")); wfile(td.path().join(custom_ignore), "foo"); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("bar"), ""); wfile(td.path().join("a/bar"), ""); let mut builder = WalkBuilder::new(td.path()); builder.add_custom_ignore_filename(&custom_ignore); assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); } #[test] fn custom_ignore_exclusive_use() { let td = tmpdir(); let custom_ignore = ".customignore"; mkdirp(td.path().join("a")); wfile(td.path().join(custom_ignore), "foo"); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("bar"), ""); wfile(td.path().join("a/bar"), ""); let mut builder = WalkBuilder::new(td.path()); builder.ignore(false); builder.git_ignore(false); builder.git_global(false); builder.git_exclude(false); builder.add_custom_ignore_filename(&custom_ignore); assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); } #[test] fn gitignore() { let td = tmpdir(); mkdirp(td.path().join(".git")); mkdirp(td.path().join("a")); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("bar"), ""); wfile(td.path().join("a/bar"), ""); assert_paths( td.path(), &WalkBuilder::new(td.path()), &["bar", "a", "a/bar"], ); } #[test] fn explicit_ignore() { let td = tmpdir(); let igpath = td.path().join(".not-an-ignore"); mkdirp(td.path().join("a")); wfile(&igpath, "foo"); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("bar"), ""); wfile(td.path().join("a/bar"), ""); let mut builder = WalkBuilder::new(td.path()); assert!(builder.add_ignore(&igpath).is_none()); assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]); } #[test] fn explicit_ignore_exclusive_use() { let td = tmpdir(); let igpath = td.path().join(".not-an-ignore"); mkdirp(td.path().join("a")); wfile(&igpath, "foo"); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("bar"), ""); wfile(td.path().join("a/bar"), ""); let mut builder = WalkBuilder::new(td.path()); builder.standard_filters(false); assert!(builder.add_ignore(&igpath).is_none()); assert_paths( td.path(), &builder, &[".not-an-ignore", "bar", "a", "a/bar"], ); } #[test] fn gitignore_parent() { let td = tmpdir(); mkdirp(td.path().join(".git")); mkdirp(td.path().join("a")); wfile(td.path().join(".gitignore"), "foo"); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("a/bar"), ""); let root = td.path().join("a"); assert_paths(&root, &WalkBuilder::new(&root), &["bar"]); } #[test] fn max_depth() { let td = tmpdir(); mkdirp(td.path().join("a/b/c")); wfile(td.path().join("foo"), ""); wfile(td.path().join("a/foo"), ""); wfile(td.path().join("a/b/foo"), ""); wfile(td.path().join("a/b/c/foo"), ""); let mut builder = WalkBuilder::new(td.path()); assert_paths( td.path(), &builder, &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"], ); assert_paths(td.path(), builder.max_depth(Some(0)), &[]); assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]); assert_paths( td.path(), builder.max_depth(Some(2)), &["a", "a/b", "foo", "a/foo"], ); } #[test] fn max_filesize() { let td = tmpdir(); mkdirp(td.path().join("a/b")); wfile_size(td.path().join("foo"), 0); wfile_size(td.path().join("bar"), 400); wfile_size(td.path().join("baz"), 600); wfile_size(td.path().join("a/foo"), 600); wfile_size(td.path().join("a/bar"), 500); wfile_size(td.path().join("a/baz"), 200); let mut builder = WalkBuilder::new(td.path()); assert_paths( td.path(), &builder, &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], ); assert_paths( td.path(), builder.max_filesize(Some(0)), &["a", "a/b", "foo"], ); assert_paths( td.path(), builder.max_filesize(Some(500)), &["a", "a/b", "foo", "bar", "a/bar", "a/baz"], ); assert_paths( td.path(), builder.max_filesize(Some(50000)), &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"], ); } #[cfg(unix)] // because symlinks on windows are weird #[test] fn symlinks() { let td = tmpdir(); mkdirp(td.path().join("a/b")); symlink(td.path().join("a/b"), td.path().join("z")); wfile(td.path().join("a/b/foo"), ""); let mut builder = WalkBuilder::new(td.path()); assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]); assert_paths( td.path(), &builder.follow_links(true), &["a", "a/b", "a/b/foo", "z", "z/foo"], ); } #[cfg(unix)] // because symlinks on windows are weird #[test] fn first_path_not_symlink() { let td = tmpdir(); mkdirp(td.path().join("foo")); let dents = WalkBuilder::new(td.path().join("foo")) .build() .into_iter() .collect::, _>>() .unwrap(); assert_eq!(1, dents.len()); assert!(!dents[0].path_is_symlink()); let dents = walk_collect_entries_parallel(&WalkBuilder::new( td.path().join("foo"), )); assert_eq!(1, dents.len()); assert!(!dents[0].path_is_symlink()); } #[cfg(unix)] // because symlinks on windows are weird #[test] fn symlink_loop() { let td = tmpdir(); mkdirp(td.path().join("a/b")); symlink(td.path().join("a"), td.path().join("a/b/c")); let mut builder = WalkBuilder::new(td.path()); assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]); assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]); } // It's a little tricky to test the 'same_file_system' option since // we need an environment with more than one file system. We adopt a // heuristic where /sys is typically a distinct volume on Linux and roll // with that. #[test] #[cfg(target_os = "linux")] fn same_file_system() { use super::device_num; // If for some reason /sys doesn't exist or isn't a directory, just // skip this test. if !Path::new("/sys").is_dir() { return; } // If our test directory actually isn't a different volume from /sys, // then this test is meaningless and we shouldn't run it. let td = tmpdir(); if device_num(td.path()).unwrap() == device_num("/sys").unwrap() { return; } mkdirp(td.path().join("same_file")); symlink("/sys", td.path().join("same_file").join("alink")); // Create a symlink to sys and enable following symlinks. If the // same_file_system option doesn't work, then this probably will hit a // permission error. Otherwise, it should just skip over the symlink // completely. let mut builder = WalkBuilder::new(td.path()); builder.follow_links(true).same_file_system(true); assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]); } #[cfg(target_os = "linux")] #[test] fn no_read_permissions() { let dir_path = Path::new("/root"); // There's no /etc/sudoers.d, skip the test. if !dir_path.is_dir() { return; } // We're the root, so the test won't check what we want it to. if fs::read_dir(&dir_path).is_ok() { return; } // Check that we can't descend but get an entry for the parent dir. let builder = WalkBuilder::new(&dir_path); assert_paths(dir_path.parent().unwrap(), &builder, &["root"]); } #[test] fn filter() { let td = tmpdir(); mkdirp(td.path().join("a/b/c")); mkdirp(td.path().join("x/y")); wfile(td.path().join("a/b/foo"), ""); wfile(td.path().join("x/y/foo"), ""); assert_paths( td.path(), &WalkBuilder::new(td.path()), &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"], ); assert_paths( td.path(), &WalkBuilder::new(td.path()) .filter_entry(|entry| entry.file_name() != OsStr::new("a")), &["x", "x/y", "x/y/foo"], ); } } ignore-0.4.18/tests/gitignore_matched_path_or_any_parents_tests.gitignore000064400000000000000000000044610072674642500253260ustar 00000000000000# Based on https://github.com/behnam/gitignore-test/blob/master/.gitignore ### file in root # MATCH /file_root_1 file_root_00 # NO_MATCH file_root_01/ # NO_MATCH file_root_02/* # NO_MATCH file_root_03/** # MATCH /file_root_10 /file_root_10 # NO_MATCH /file_root_11/ # NO_MATCH /file_root_12/* # NO_MATCH /file_root_13/** # NO_MATCH */file_root_20 # NO_MATCH */file_root_21/ # NO_MATCH */file_root_22/* # NO_MATCH */file_root_23/** # MATCH /file_root_30 **/file_root_30 # NO_MATCH **/file_root_31/ # NO_MATCH **/file_root_32/* # NO_MATCH **/file_root_33/** ### file in sub-dir # MATCH /parent_dir/file_deep_1 file_deep_00 # NO_MATCH file_deep_01/ # NO_MATCH file_deep_02/* # NO_MATCH file_deep_03/** # NO_MATCH /file_deep_10 # NO_MATCH /file_deep_11/ # NO_MATCH /file_deep_12/* # NO_MATCH /file_deep_13/** # MATCH /parent_dir/file_deep_20 */file_deep_20 # NO_MATCH */file_deep_21/ # NO_MATCH */file_deep_22/* # NO_MATCH */file_deep_23/** # MATCH /parent_dir/file_deep_30 **/file_deep_30 # NO_MATCH **/file_deep_31/ # NO_MATCH **/file_deep_32/* # NO_MATCH **/file_deep_33/** ### dir in root # MATCH /dir_root_00 dir_root_00 # MATCH /dir_root_01 dir_root_01/ # MATCH /dir_root_02 dir_root_02/* # MATCH /dir_root_03 dir_root_03/** # MATCH /dir_root_10 /dir_root_10 # MATCH /dir_root_11 /dir_root_11/ # MATCH /dir_root_12 /dir_root_12/* # MATCH /dir_root_13 /dir_root_13/** # NO_MATCH */dir_root_20 # NO_MATCH */dir_root_21/ # NO_MATCH */dir_root_22/* # NO_MATCH */dir_root_23/** # MATCH /dir_root_30 **/dir_root_30 # MATCH /dir_root_31 **/dir_root_31/ # MATCH /dir_root_32 **/dir_root_32/* # MATCH /dir_root_33 **/dir_root_33/** ### dir in sub-dir # MATCH /parent_dir/dir_deep_00 dir_deep_00 # MATCH /parent_dir/dir_deep_01 dir_deep_01/ # NO_MATCH dir_deep_02/* # NO_MATCH dir_deep_03/** # NO_MATCH /dir_deep_10 # NO_MATCH /dir_deep_11/ # NO_MATCH /dir_deep_12/* # NO_MATCH /dir_deep_13/** # MATCH /parent_dir/dir_deep_20 */dir_deep_20 # MATCH /parent_dir/dir_deep_21 */dir_deep_21/ # MATCH /parent_dir/dir_deep_22 */dir_deep_22/* # MATCH /parent_dir/dir_deep_23 */dir_deep_23/** # MATCH /parent_dir/dir_deep_30 **/dir_deep_30 # MATCH /parent_dir/dir_deep_31 **/dir_deep_31/ # MATCH /parent_dir/dir_deep_32 **/dir_deep_32/* # MATCH /parent_dir/dir_deep_33 **/dir_deep_33/** ignore-0.4.18/tests/gitignore_matched_path_or_any_parents_tests.rs000064400000000000000000000301040072674642500237540ustar 00000000000000use std::path::Path; use ignore::gitignore::{Gitignore, GitignoreBuilder}; const IGNORE_FILE: &'static str = "tests/gitignore_matched_path_or_any_parents_tests.gitignore"; fn get_gitignore() -> Gitignore { let mut builder = GitignoreBuilder::new("ROOT"); let error = builder.add(IGNORE_FILE); assert!(error.is_none(), "failed to open gitignore file"); builder.build().unwrap() } #[test] #[should_panic(expected = "path is expected to be under the root")] fn test_path_should_be_under_root() { let gitignore = get_gitignore(); let path = "/tmp/some_file"; gitignore.matched_path_or_any_parents(Path::new(path), false); assert!(false); } #[test] fn test_files_in_root() { let gitignore = get_gitignore(); let m = |path: &str| { gitignore.matched_path_or_any_parents(Path::new(path), false) }; // 0x assert!(m("ROOT/file_root_00").is_ignore()); assert!(m("ROOT/file_root_01").is_none()); assert!(m("ROOT/file_root_02").is_none()); assert!(m("ROOT/file_root_03").is_none()); // 1x assert!(m("ROOT/file_root_10").is_ignore()); assert!(m("ROOT/file_root_11").is_none()); assert!(m("ROOT/file_root_12").is_none()); assert!(m("ROOT/file_root_13").is_none()); // 2x assert!(m("ROOT/file_root_20").is_none()); assert!(m("ROOT/file_root_21").is_none()); assert!(m("ROOT/file_root_22").is_none()); assert!(m("ROOT/file_root_23").is_none()); // 3x assert!(m("ROOT/file_root_30").is_ignore()); assert!(m("ROOT/file_root_31").is_none()); assert!(m("ROOT/file_root_32").is_none()); assert!(m("ROOT/file_root_33").is_none()); } #[test] fn test_files_in_deep() { let gitignore = get_gitignore(); let m = |path: &str| { gitignore.matched_path_or_any_parents(Path::new(path), false) }; // 0x assert!(m("ROOT/parent_dir/file_deep_00").is_ignore()); assert!(m("ROOT/parent_dir/file_deep_01").is_none()); assert!(m("ROOT/parent_dir/file_deep_02").is_none()); assert!(m("ROOT/parent_dir/file_deep_03").is_none()); // 1x assert!(m("ROOT/parent_dir/file_deep_10").is_none()); assert!(m("ROOT/parent_dir/file_deep_11").is_none()); assert!(m("ROOT/parent_dir/file_deep_12").is_none()); assert!(m("ROOT/parent_dir/file_deep_13").is_none()); // 2x assert!(m("ROOT/parent_dir/file_deep_20").is_ignore()); assert!(m("ROOT/parent_dir/file_deep_21").is_none()); assert!(m("ROOT/parent_dir/file_deep_22").is_none()); assert!(m("ROOT/parent_dir/file_deep_23").is_none()); // 3x assert!(m("ROOT/parent_dir/file_deep_30").is_ignore()); assert!(m("ROOT/parent_dir/file_deep_31").is_none()); assert!(m("ROOT/parent_dir/file_deep_32").is_none()); assert!(m("ROOT/parent_dir/file_deep_33").is_none()); } #[test] fn test_dirs_in_root() { let gitignore = get_gitignore(); let m = |path: &str, is_dir: bool| { gitignore.matched_path_or_any_parents(Path::new(path), is_dir) }; // 00 assert!(m("ROOT/dir_root_00", true).is_ignore()); assert!(m("ROOT/dir_root_00/file", false).is_ignore()); assert!(m("ROOT/dir_root_00/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_00/child_dir/file", false).is_ignore()); // 01 assert!(m("ROOT/dir_root_01", true).is_ignore()); assert!(m("ROOT/dir_root_01/file", false).is_ignore()); assert!(m("ROOT/dir_root_01/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_01/child_dir/file", false).is_ignore()); // 02 assert!(m("ROOT/dir_root_02", true).is_none()); // dir itself doesn't match assert!(m("ROOT/dir_root_02/file", false).is_ignore()); assert!(m("ROOT/dir_root_02/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_02/child_dir/file", false).is_ignore()); // 03 assert!(m("ROOT/dir_root_03", true).is_none()); // dir itself doesn't match assert!(m("ROOT/dir_root_03/file", false).is_ignore()); assert!(m("ROOT/dir_root_03/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_03/child_dir/file", false).is_ignore()); // 10 assert!(m("ROOT/dir_root_10", true).is_ignore()); assert!(m("ROOT/dir_root_10/file", false).is_ignore()); assert!(m("ROOT/dir_root_10/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_10/child_dir/file", false).is_ignore()); // 11 assert!(m("ROOT/dir_root_11", true).is_ignore()); assert!(m("ROOT/dir_root_11/file", false).is_ignore()); assert!(m("ROOT/dir_root_11/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_11/child_dir/file", false).is_ignore()); // 12 assert!(m("ROOT/dir_root_12", true).is_none()); // dir itself doesn't match assert!(m("ROOT/dir_root_12/file", false).is_ignore()); assert!(m("ROOT/dir_root_12/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_12/child_dir/file", false).is_ignore()); // 13 assert!(m("ROOT/dir_root_13", true).is_none()); assert!(m("ROOT/dir_root_13/file", false).is_ignore()); assert!(m("ROOT/dir_root_13/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_13/child_dir/file", false).is_ignore()); // 20 assert!(m("ROOT/dir_root_20", true).is_none()); assert!(m("ROOT/dir_root_20/file", false).is_none()); assert!(m("ROOT/dir_root_20/child_dir", true).is_none()); assert!(m("ROOT/dir_root_20/child_dir/file", false).is_none()); // 21 assert!(m("ROOT/dir_root_21", true).is_none()); assert!(m("ROOT/dir_root_21/file", false).is_none()); assert!(m("ROOT/dir_root_21/child_dir", true).is_none()); assert!(m("ROOT/dir_root_21/child_dir/file", false).is_none()); // 22 assert!(m("ROOT/dir_root_22", true).is_none()); assert!(m("ROOT/dir_root_22/file", false).is_none()); assert!(m("ROOT/dir_root_22/child_dir", true).is_none()); assert!(m("ROOT/dir_root_22/child_dir/file", false).is_none()); // 23 assert!(m("ROOT/dir_root_23", true).is_none()); assert!(m("ROOT/dir_root_23/file", false).is_none()); assert!(m("ROOT/dir_root_23/child_dir", true).is_none()); assert!(m("ROOT/dir_root_23/child_dir/file", false).is_none()); // 30 assert!(m("ROOT/dir_root_30", true).is_ignore()); assert!(m("ROOT/dir_root_30/file", false).is_ignore()); assert!(m("ROOT/dir_root_30/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_30/child_dir/file", false).is_ignore()); // 31 assert!(m("ROOT/dir_root_31", true).is_ignore()); assert!(m("ROOT/dir_root_31/file", false).is_ignore()); assert!(m("ROOT/dir_root_31/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_31/child_dir/file", false).is_ignore()); // 32 assert!(m("ROOT/dir_root_32", true).is_none()); // dir itself doesn't match assert!(m("ROOT/dir_root_32/file", false).is_ignore()); assert!(m("ROOT/dir_root_32/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_32/child_dir/file", false).is_ignore()); // 33 assert!(m("ROOT/dir_root_33", true).is_none()); // dir itself doesn't match assert!(m("ROOT/dir_root_33/file", false).is_ignore()); assert!(m("ROOT/dir_root_33/child_dir", true).is_ignore()); assert!(m("ROOT/dir_root_33/child_dir/file", false).is_ignore()); } #[test] fn test_dirs_in_deep() { let gitignore = get_gitignore(); let m = |path: &str, is_dir: bool| { gitignore.matched_path_or_any_parents(Path::new(path), is_dir) }; // 00 assert!(m("ROOT/parent_dir/dir_deep_00", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_00/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_00/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file", false).is_ignore()); // 01 assert!(m("ROOT/parent_dir/dir_deep_01", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_01/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_01/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file", false).is_ignore()); // 02 assert!(m("ROOT/parent_dir/dir_deep_02", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_02/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_02/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file", false).is_none()); // 03 assert!(m("ROOT/parent_dir/dir_deep_03", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_03/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_03/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file", false).is_none()); // 10 assert!(m("ROOT/parent_dir/dir_deep_10", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_10/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_10/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_10/child_dir/file", false).is_none()); // 11 assert!(m("ROOT/parent_dir/dir_deep_11", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_11/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_11/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_11/child_dir/file", false).is_none()); // 12 assert!(m("ROOT/parent_dir/dir_deep_12", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_12/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_12/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_12/child_dir/file", false).is_none()); // 13 assert!(m("ROOT/parent_dir/dir_deep_13", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_13/file", false).is_none()); assert!(m("ROOT/parent_dir/dir_deep_13/child_dir", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_13/child_dir/file", false).is_none()); // 20 assert!(m("ROOT/parent_dir/dir_deep_20", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_20/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_20/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file", false).is_ignore()); // 21 assert!(m("ROOT/parent_dir/dir_deep_21", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_21/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_21/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file", false).is_ignore()); // 22 // dir itself doesn't match assert!(m("ROOT/parent_dir/dir_deep_22", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_22/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_22/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file", false).is_ignore()); // 23 // dir itself doesn't match assert!(m("ROOT/parent_dir/dir_deep_23", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_23/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_23/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file", false).is_ignore()); // 30 assert!(m("ROOT/parent_dir/dir_deep_30", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_30/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_30/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file", false).is_ignore()); // 31 assert!(m("ROOT/parent_dir/dir_deep_31", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_31/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_31/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file", false).is_ignore()); // 32 // dir itself doesn't match assert!(m("ROOT/parent_dir/dir_deep_32", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_32/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_32/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file", false).is_ignore()); // 33 // dir itself doesn't match assert!(m("ROOT/parent_dir/dir_deep_33", true).is_none()); assert!(m("ROOT/parent_dir/dir_deep_33/file", false).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_33/child_dir", true).is_ignore()); assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file", false).is_ignore()); }