backdown-1.1.2/.cargo_vcs_info.json0000644000000001360000000000100126250ustar { "git": { "sha1": "3b8702894a144e27dacd0806fcdbaebf260a0c71" }, "path_in_vcs": "" }backdown-1.1.2/.github/FUNDING.yml000064400000000000000000000000201046102023000145620ustar 00000000000000github: [Canop] backdown-1.1.2/.github/workflows/rust.yml000064400000000000000000000005001046102023000165250ustar 00000000000000name: Rust on: push: branches: [ "master" ] pull_request: branches: [ "master" ] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Build run: cargo build --verbose - name: Run tests run: cargo test --verbose backdown-1.1.2/.gitignore000064400000000000000000000000531046102023000134030ustar 00000000000000/target /build /pub /releases /trav* *.zip backdown-1.1.2/CHANGELOG.md000064400000000000000000000020121046102023000132210ustar 00000000000000 ### v1.1.2 - 2024-09-10 - sort files by name - Thanks @nc7s ### v1.1.1 - 2023-07-01 - updated dependencies - stripped binary (smaller) ### v1.1.0 - 2021-12-05 - option to replace staged files with symlinks (unix only) - Fix #2 ### v1.0.1 - 2021-12-05 - option to write the report in a JSON file after staging phase - Fix #3 ### v1.0.0 - 2021-10-02 No reason not to call this a 1.0 ### v0.2.1 - 2021-07-14 - backdown logs a few things. To have log generated launch backdown with `BACKDOWN_LOG=debug backdown your/dir` - change hash algorithm from SHA-256 to BLAKE3, which is slightly faster with same guarantees ### v0.2.0 - 2021-07-12 - backdown proposes to remove in 1 question all duplicates with name like "thing (2).AVI" or "thing (3rd copy).png" when they're in the same directory than the "source" ### v0.1.0 - 2021-07-11 - first public release backdown-1.1.2/Cargo.lock0000644000000624000000000000100106020ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anyhow" version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a03e93e97a28fbc9f42fbc5ba0886a3c67eb637b476dbee711f80a6ffe8223d" [[package]] name = "argh" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e7317a549bc17c5278d9e72bb6e62c6aa801ac2567048e39ebc1c194249323e" dependencies = [ "argh_derive", "argh_shared", ] [[package]] name = "argh_derive" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60949c42375351e9442e354434b0cba2ac402c1237edf673cac3a4bf983b8d3c" dependencies = [ "argh_shared", "heck", "proc-macro2", "quote", "syn 1.0.73", ] [[package]] name = "argh_shared" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a61eb019cb8f415d162cb9f12130ee6bbe9168b7d953c17f4ad049e4051ca00" [[package]] name = "arrayref" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" [[package]] name = "arrayvec" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backdown" version = "1.1.2" dependencies = [ "anyhow", "argh", "blake3", "chrono", "cli-log", "crossbeam", "file-size", "fnv", "lazy-regex", "phf", "rayon", "serde", "serde_json", "termimad", ] [[package]] name = "bitflags" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "blake3" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "729b71f35bd3fa1a4c86b85d32c8b9069ea7fe14f7a53cfabb65f62d4265b888" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", "digest", ] [[package]] name = "block-buffer" version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] [[package]] name = "cc" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" dependencies = [ "libc", "num-integer", "num-traits", "time", "winapi", ] [[package]] name = "cli-log" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d2ab00dc4c82ec28af25ac085aecc11ffeabf353755715a3113a7aa044ca5cc" dependencies = [ "chrono", "file-size", "log", "proc-status", ] [[package]] name = "constant_time_eq" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21a53c0a4d288377e7415b53dcfc3c04da5cdc2cc95c8d5ac178b58f0b861ad6" [[package]] name = "coolor" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "691defa50318376447a73ced869862baecfab35f6aabaa91a4cd726b315bfe1a" dependencies = [ "crossterm", ] [[package]] name = "crokey" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "520e83558f4c008ac06fa6a86e5c1d4357be6f994cce7434463ebcdaadf47bb1" dependencies = [ "crokey-proc_macros", "crossterm", "once_cell", "serde", "strict", ] [[package]] name = "crokey-proc_macros" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "370956e708a1ce65fe4ac5bb7185791e0ece7485087f17736d54a23a0895049f" dependencies = [ "crossterm", "proc-macro2", "quote", "strict", "syn 1.0.73", ] [[package]] name = "crossbeam" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" dependencies = [ "cfg-if", "crossbeam-channel", "crossbeam-deque", "crossbeam-epoch", "crossbeam-queue", "crossbeam-utils", ] [[package]] name = "crossbeam-channel" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" dependencies = [ "cfg-if", "crossbeam-utils", "lazy_static", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-queue" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ "cfg-if", "lazy_static", ] [[package]] name = "crossterm" version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ "bitflags", "crossterm_winapi", "mio", "parking_lot", "rustix", "signal-hook", "signal-hook-mio", "winapi", ] [[package]] name = "crossterm_winapi" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" dependencies = [ "winapi", ] [[package]] name = "crypto-common" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", "typenum", ] [[package]] name = "digest" version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", "subtle", ] [[package]] name = "either" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "errno" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys", ] [[package]] name = "file-size" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9544f10105d33957765016b8a9baea7e689bf1f0f2f32c2fa2f568770c38d2b3" [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "generic-array" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" dependencies = [ "typenum", "version_check", ] [[package]] name = "heck" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "hermit-abi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "itoa" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "lazy-regex" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d8e41c97e6bc7ecb552016274b99fbb5d035e8de288c582d9b933af6677bfda" dependencies = [ "lazy-regex-proc_macros", "once_cell", "regex", ] [[package]] name = "lazy-regex-proc_macros" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76e1d8b05d672c53cb9c7b920bbba8783845ae4f0b076e02a3db1d02c81b4163" dependencies = [ "proc-macro2", "quote", "regex", "syn 2.0.22", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", ] [[package]] name = "log" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ "cfg-if", ] [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memoffset" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" dependencies = [ "autocfg", ] [[package]] name = "minimad" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9c5d708226d186590a7b6d4a9780e2bdda5f689e0d58cd17012a298efd745d2" dependencies = [ "once_cell", ] [[package]] name = "mio" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ "hermit-abi 0.3.9", "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys", ] [[package]] name = "num-integer" version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" dependencies = [ "autocfg", "num-traits", ] [[package]] name = "num-traits" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" dependencies = [ "hermit-abi 0.1.19", "libc", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "parking_lot" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", "windows-targets", ] [[package]] name = "phf" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ "phf_macros", "phf_shared", ] [[package]] name = "phf_generator" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" dependencies = [ "phf_shared", "rand", ] [[package]] name = "phf_macros" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" dependencies = [ "phf_generator", "phf_shared", "proc-macro2", "quote", "syn 2.0.22", ] [[package]] name = "phf_shared" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ "siphasher", ] [[package]] name = "proc-macro2" version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] [[package]] name = "proc-status" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0e0c0ac915e7b76b47850ba4ffc377abde6c6ff9eeace61d0a89623db449712" dependencies = [ "thiserror", ] [[package]] name = "quote" version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" [[package]] name = "rayon" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" dependencies = [ "autocfg", "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "lazy_static", "num_cpus", ] [[package]] name = "redox_syscall" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags", ] [[package]] name = "regex" version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rustix" version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "ryu" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" dependencies = [ "proc-macro2", "quote", "syn 1.0.73", ] [[package]] name = "serde_json" version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "signal-hook" version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" dependencies = [ "libc", "signal-hook-registry", ] [[package]] name = "signal-hook-mio" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" dependencies = [ "libc", "mio", "signal-hook", ] [[package]] name = "signal-hook-registry" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ "libc", ] [[package]] name = "siphasher" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" [[package]] name = "smallvec" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" [[package]] name = "strict" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006" [[package]] name = "subtle" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "syn" version = "2.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "termimad" version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "920e7c4671e79f3d9df269da9c8edf0dbc580044fd727d3594f7bfba5eb6107a" dependencies = [ "coolor", "crokey", "crossbeam", "lazy-regex", "minimad", "serde", "thiserror", "unicode-width", ] [[package]] name = "thiserror" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" dependencies = [ "proc-macro2", "quote", "syn 1.0.73", ] [[package]] name = "time" version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] [[package]] name = "typenum" version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "unicode-ident" version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-segmentation" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" [[package]] name = "unicode-width" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" [[package]] name = "unicode-xid" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" backdown-1.1.2/Cargo.toml0000644000000030310000000000100106200ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.59" name = "backdown" version = "1.1.2" authors = ["Canop "] build = false autobins = false autoexamples = false autotests = false autobenches = false description = "A smart CLI for removing thousands of duplicates on your disks" readme = "README.md" license = "MIT" repository = "https://github.com/Canop/backdown" [profile.release] strip = true [lib] name = "backdown" path = "src/lib.rs" [[bin]] name = "backdown" path = "src/main.rs" [dependencies.anyhow] version = "1.0.49" [dependencies.argh] version = "0.1.4" [dependencies.blake3] version = "1.4" [dependencies.chrono] version = "0.4" [dependencies.cli-log] version = "2.0" [dependencies.crossbeam] version = "0.8" [dependencies.file-size] version = "1.0" [dependencies.fnv] version = "1.0.7" [dependencies.lazy-regex] version = "3.3" [dependencies.phf] version = "0.11" features = ["macros"] [dependencies.rayon] version = "1.3" [dependencies.serde] version = "1.0" [dependencies.serde_json] version = "1.0" [dependencies.termimad] version = "0.30" backdown-1.1.2/Cargo.toml.orig0000644000000012700000000000100115620ustar [package] name = "backdown" version = "1.1.2" authors = ["Canop "] edition = "2021" rust-version = "1.59" description = "A smart CLI for removing thousands of duplicates on your disks" repository = "https://github.com/Canop/backdown" license = "MIT" readme = "README.md" [dependencies] argh = "0.1.4" anyhow = "1.0.49" blake3 = "1.4" chrono = "0.4" cli-log = "2.0" crossbeam = "0.8" file-size = "1.0" fnv = "1.0.7" lazy-regex = "3.3" phf = { version = "0.11", features = ["macros"] } rayon = "1.3" serde ="1.0" serde_json = "1.0" termimad = "0.30" [profile.release] strip = true [patch.crates-io] #minimad = { path = "../minimad" } #termimad = { path = "../termimad" } backdown-1.1.2/Cargo.toml.orig000064400000000000000000000012701046102023000143040ustar 00000000000000[package] name = "backdown" version = "1.1.2" authors = ["Canop "] edition = "2021" rust-version = "1.59" description = "A smart CLI for removing thousands of duplicates on your disks" repository = "https://github.com/Canop/backdown" license = "MIT" readme = "README.md" [dependencies] argh = "0.1.4" anyhow = "1.0.49" blake3 = "1.4" chrono = "0.4" cli-log = "2.0" crossbeam = "0.8" file-size = "1.0" fnv = "1.0.7" lazy-regex = "3.3" phf = { version = "0.11", features = ["macros"] } rayon = "1.3" serde ="1.0" serde_json = "1.0" termimad = "0.30" [profile.release] strip = true [patch.crates-io] #minimad = { path = "../minimad" } #termimad = { path = "../termimad" } backdown-1.1.2/LICENSE000064400000000000000000000020461046102023000124240ustar 00000000000000MIT License Copyright (c) 2021 Canop Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. backdown-1.1.2/README.md000064400000000000000000000107551046102023000127040ustar 00000000000000# backdown [![MIT][s2]][l2] [![Latest Version][s1]][l1] [![Build][s3]][l3] [![Chat on Miaou][s4]][l4] [s1]: https://img.shields.io/crates/v/backdown.svg [l1]: https://crates.io/crates/backdown [s2]: https://img.shields.io/badge/license-MIT-blue.svg [l2]: LICENSE [s3]: https://github.com/Canop/backdown/actions/workflows/rust.yml/badge.svg [l3]: https://github.com/Canop/backdown/actions/workflows/rust.yml [s4]: https://miaou.dystroy.org/static/shields/room.svg [l4]: https://miaou.dystroy.org/3768?Rust **Backdown** helps you safely and ergonomically remove duplicate files. Its design is based upon my observation of frequent patterns regarding build-up of duplicates with time, especially images and other media files. Finding duplicates is easy. Cleaning the disk when there are thousands of them is the hard part. What Backdown brings is the easy way to select and remove the duplicates you don't want to keep. A Backdown session goes through the following phases: 1. Backdown analyzes the directory of your choice and find sets of duplicates (files whose content is exactly the same). Backdown ignores symlinks and files or directories whose name starts with a dot. 2. Backdown asks you a few questions depending on the analysis. Nothing is removed at this point: you only stage files for removal. Backdown never lets you stage all items in a set of identical files 3. After having maybe looked at the list of staged files, you confirm the removals 4. Backdown does the removals on disk # What it looks like Analysis and first question: ![screen 1](doc/screen-1.png) Another kind of question: ![screen 2](doc/screen-2.png) Yet another one: ![screen 3](doc/screen-3.png) Yet another one: ![screen 4](doc/screen-4.png) Review and Confirm: ![screen 5](doc/screen-5.png) At this point you may also export the report as JSON, and you may decide to replace each removed file with a link to one of the kept ones. # Installation ## From the crates.io repository You must have the Rust env installed: https://rustup.rs Run ```bash cargo install --locked backdown ``` ## From Source You must have the Rust env installed: https://rustup.rs Download this repository then run ```bash cargo install --path . ``` ## Precompiled binaries Unless you're a Rust developer, I recommend you just download the precompiled binaries, as this will save a lot of space on your disk. Binaries are made available at https://dystroy.org/backdown/download/ # Usage ## Deduplicate any kind of files ```bash backdown /some/directory ``` ## Deduplicate images ```bash backdown -i /some/directory ``` ## JSON report After the staging phase, you may decide to export a report as JSON. This doesn't prevent doing also the removals. The JSON looks like this: ```JSON { "dup_sets": [ { "file_len": 1212746, "files": { "trav-copy/2006-05 (mai)/HPIM0530.JPG": "remove", "trav-copy/2006-06 (juin)/HPIM0530 (another copy).JPG": "remove", "trav-copy/2006-06 (juin)/HPIM0530 (copy).JPG": "remove", "trav-copy/2006-06 (juin)/HPIM0530.JPG": "keep" } }, { "file_len": 1980628, "files": { "trav-copy/2006-03 (mars)/HPIM0608.JPG": "keep", "trav-copy/2006-05 (mai)/HPIM0608.JPG": "remove", "trav-copy/2006-06 (juin)/HPIM0608.JPG": "keep" } }, { "file_len": 1124764, "files": { "trav-copy/2006-05 (mai)/HPIM0529.JPG": "remove", "trav-copy/2006-06 (juin)/HPIM0529.JPG": "keep" } }, { "file_len": 1706672, "files": { "trav-copy/2006-05 (mai)/test.jpg": "remove", "trav-copy/2006-06 (juin)/HPIM0598.JPG": "keep" } } ], "len_to_remove": 8450302 } ``` # Advice * If you launch backdown on a big directory, it may find more duplicates you suspect there are. Don't force yourself to answer *all* questions at first: if you stage the removals of the first dozen questions you'll gain already a lot and you may do the other ones another day * Don't launch backdown at the root of your disk because you don't want to try and deal with duplicates in system resources, programs, build artefacts, etc. Launch backdown where you store your images, or your videos or musics * Backdown isn't designed for dev directories and doesn't respect .gitignore rules * If you launch backdown in a directory with millions files on a slow disk, you'll have to wait a long time while the content is hashed. Try with a smaller directory first if you have an HDD * If you're only interested in images, use the -i option backdown-1.1.2/compile-all-targets.sh000075500000000000000000000023651046102023000156270ustar 00000000000000# WARNING: This script is NOT meant for normal installation, it's dedicated # to the compilation of all supported targets, from a linux machine. H1="\n\e[30;104;1m\e[2K\n\e[A" # style first header H2="\n\e[30;104m\e[1K\n\e[A" # style second header EH="\e[00m\n\e[2K" # end header version=$(sed 's/version = "\([0-9.]\{1,\}\(-[a-z]\+\)\?\)"/\1/;t;d' Cargo.toml | head -1) echo -e "${H1}Compilation of all targets for backdown $version${EH}" # clean previous build rm -rf build mkdir build echo " build cleaned" # build the linux version echo -e "${H2}Compiling the linux version${EH}" cargo clean cargo build --release strip target/release/backdown mkdir build/x86_64-linux/ cp target/release/backdown build/x86_64-linux/ # build a musl version echo -e "${H2}Compiling the MUSL version${EH}" cargo clean cross build --release --target x86_64-unknown-linux-musl mkdir build/x86_64-unknown-linux-musl cp target/x86_64-unknown-linux-musl/release/backdown build/x86_64-unknown-linux-musl # build the windows version # use cargo cross echo -e "${H2}Compiling the Windows version${EH}" cargo clean cross build --target x86_64-pc-windows-gnu --release mkdir build/x86_64-pc-windows-gnu cp target/x86_64-pc-windows-gnu/release/backdown.exe build/x86_64-pc-windows-gnu/ backdown-1.1.2/deploy.sh000075500000000000000000000005111046102023000132450ustar 00000000000000# build the release zip ./release.sh version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1) # deploy on dystroy.org rm -rf ~/dev/www/dystroy/backdown/download/* cp -r build/* ~/dev/www/dystroy/backdown/download/ cp "backdown_$version.zip" ~/dev/www/dystroy/backdown/download/ ~/dev/www/dystroy/deploy.sh backdown-1.1.2/release.sh000075500000000000000000000013411046102023000133730ustar 00000000000000# build a new release of backdown # This isn't used for normal compilation but for the building of the official releases version=$(sed 's/version = "\([0-9.]\{1,\}\)"/\1/;t;d' Cargo.toml | head -1) echo "Building release $version" # make the build directory and compile for all targets ./compile-all-targets.sh # add the readme and changelog in the build directory echo "This is backdown. More info and installation instructions on https://github.com/Canop/backdown" > build/README.md cp CHANGELOG.md build # publish version number echo "$version" > build/version # prepare the release archive rm backdown_*.zip zip -r "backdown_$version.zip" build/* # copy it to releases folder mkdir releases cp "backdown_$version.zip" releases backdown-1.1.2/src/args.rs000064400000000000000000000010231046102023000135020ustar 00000000000000use { argh::FromArgs, std::path::PathBuf, }; #[derive(FromArgs)] /// Help you remove duplicate files from your disks /// /// /// Source and doc at https://github.com/Canop/backdown pub struct Args { /// print the version #[argh(switch, short = 'v')] pub version: bool, /// whether to only handle image files #[argh(switch, short = 'i')] pub only_images: bool, #[argh(positional)] /// where to look for duplicates (will use . if no directory is provided) pub path: Option, } backdown-1.1.2/src/ask.rs000064400000000000000000000360041046102023000133330ustar 00000000000000use { crate::*, fnv::FnvHashMap, minimad::*, termimad::*, }; const MAX_LISTED_FILES: usize = 5; pub fn ask_on_dirs<'d>( dirs_report: &'d DirsReport, dups: &'d [DupSet], skin: &MadSkin, ) -> anyhow::Result> { let mut rr = RemovalReport::default(); let mut question_idx = 0; let mut questions = dirs_report.dup_dirs.len() + dirs_report.brotherhoods.len() + dirs_report.dir_pairs.len(); let ask_about_autosolve = dirs_report.auto_solvable_brotherhoods_count > 1; if ask_about_autosolve { questions += 1; } static MD: &str = r#" I'll now ask you up to *${questions}* questions to determine what files should be removed.\ No file will be removed until you have the possibility to review them after the staging step.\ You don't have to answer all questions:\ you may end the staging phase at any time and then either do the removals or quit. "#; let mut expander = OwningTemplateExpander::new(); expander.set("questions", questions); skin.print_owning_expander(&expander, &TextTemplate::from(MD)); // return true if break let check = |rr: &RemovalReport| { if rr.quit { return true; } mad_print_inline!( skin, " -> currently staged: **$0** duplicate files for a removed size of **$1**\n", // two following lines used for some screenshots so that I don't redo the staging // 1042, // "5.5G", rr.staged_removals.len(), file_size::fit_4(rr.len_to_remove), ); rr.broken }; let skip_auto_solvable_brotherhoods = ask_about_autosolve && { let solved = ask_auto_solve( question_idx, questions, dirs_report, dups, skin, &mut rr, )?; if check(&rr) { return Ok(rr); } question_idx += 1; solved }; for dup_dir in &dirs_report.dup_dirs { ask_on_dup_dir( question_idx, questions, dup_dir, dups, skin, &mut rr, )?; if check(&rr) { break; } question_idx += 1; } if rr.broken || rr.quit { return Ok(rr); } for brotherhood in &dirs_report.brotherhoods { if skip_auto_solvable_brotherhoods && brotherhood.is_auto_solvable { mad_print_inline!(skin, "skipping question *$0*\n", question_idx); } else { ask_on_brotherhood( question_idx, questions, brotherhood, dups, skin, &mut rr, )?; if check(&rr) { break; } } question_idx += 1; } if rr.broken || rr.quit { return Ok(rr); } for dir_pair in &dirs_report.dir_pairs { ask_on_dir_pair( question_idx, questions, dir_pair, dups, skin, &mut rr, )?; if check(&rr) { break; } question_idx += 1; } Ok(rr) } static MD_AUTO_SOLVE: &str = r#" ## Staging Question **${num}**/${questions} You have several duplicates with "copy" names in the same directory than their identical "source" (for example *${example_1}* and *${example_2}*). I can automatically stage those **${file_count}** duplicates, which would let you gain **${size}**. If you accept, you'll skip *${skippable_questions}* questions. "#; /// return whether auto solvable brotherhoods are solved (we'll skip their questions then) fn ask_auto_solve<'d>( question_idx: usize, questions: usize, dirs_report: &'d DirsReport, dups: &'d [DupSet], skin: &MadSkin, rr: &mut RemovalReport<'d>, ) -> anyhow::Result { debug_assert!(question_idx == 0); let mut removable_count = 0; let mut removable_len = 0; let mut skippable_questions = 0; let mut example_names = Vec::new(); for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) { removable_count += brotherhood.files.len() - 1; removable_len += (brotherhood.files.len() - 1) as u64 * dups[brotherhood.dup_set_idx].file_len; skippable_questions += 1; if example_names.len() < 2 { example_names.push( brotherhood.files.iter() .map(|&dup_file_idx| DupFileRef { dup_set_idx: brotherhood.dup_set_idx, dup_file_idx, }) .filter_map(|dup_file_ref| dup_file_ref.copy_name(dups)) .next() .unwrap() // SAFETY: it's not auto solvable if there's no copy named file ); } } let mut expander = OwningTemplateExpander::new(); expander .set("num", question_idx + 1) .set("questions", questions) .set("example_1", example_names[0]) .set("example_2", example_names[1]) .set("skippable_questions", skippable_questions) .set("file_count", removable_count) .set("size", file_size::fit_4(removable_len)); skin.print_owning_expander(&expander, &TextTemplate::from(MD_AUTO_SOLVE)); Ok(ask!(skin, "Do you want me to automatically stage those copies ?", ('y') { ('y', "**Y**es") => { for brotherhood in dirs_report.brotherhoods.iter().filter(|b| b.is_auto_solvable) { let dup_file_refs = brotherhood.files.iter() .map(|&dup_file_idx| DupFileRef { dup_set_idx: brotherhood.dup_set_idx, dup_file_idx, }) .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups)); for dup_file_ref in dup_file_refs { rr.stage_file(dup_file_ref, dups); } } true } ('n', "**N**o") => { false } ('e', "**E**nd staging and quit") => { rr.quit = true; false } })) } static MD_DUP_DIR: &str = r#" ## Staging Question **${num}**/${questions} The *${directory}* directory contains **${file_count}** files which are all present elsewhere.\ You can remove the whole directory without losing anything.\ This would let you gain **${size}**.\ "#; /// ask for a dir which contains only duplicates fn ask_on_dup_dir<'d>( question_idx: usize, questions: usize, dup_dir: &'d DupDir, dups: &'d [DupSet], skin: &MadSkin, rr: &mut RemovalReport<'d>, ) -> anyhow::Result<()> { // first we must make sure the dir doesn't contain the last file(s) of a dupset let mut file_idxs_per_dupset: FnvHashMap> = FnvHashMap::default(); for file_ref in &dup_dir.files { file_idxs_per_dupset.entry(file_ref.dup_set_idx) .or_default() .push(file_ref.dup_file_idx); } for (&dup_set_idx, file_idxs) in &file_idxs_per_dupset { let dup_set = &dups[dup_set_idx]; let not_here_or_staged_count = (0..dup_set.files.len()) .filter(|&dup_file_idx| { !rr.staged_removals.contains(&DupFileRef { dup_set_idx, dup_file_idx }) && !file_idxs.contains(&dup_file_idx) }) .count(); if not_here_or_staged_count == 0 { // dup_set would be removed -> skipping return Ok(()); } } // now we know we can stage the whole directory let removable_len = dup_dir.files.iter() .map(|dup_file_ref| dups[dup_file_ref.dup_set_idx].file_len) .sum(); let mut expander = OwningTemplateExpander::new(); expander .set("num", question_idx + 1) .set("questions", questions) .set("directory", dup_dir.path.to_string_lossy()) .set("file_count", dup_dir.files.len()) .set("size", file_size::fit_4(removable_len)); skin.print_owning_expander(&expander, &TextTemplate::from(MD_DUP_DIR)); ask!(skin, "What do you want to do with this directory?", ('s') { ('r', "Stage the whole directory for **r**emoval") => { for &file_ref in &dup_dir.files { rr.stage_file(file_ref, dups); } rr.staged_dir_removals.push(dup_dir.path); } ('s', "**S**kip and go to next question") => {} ('e', "**E**nd staging phase") => { rr.broken = true; } }); Ok(()) } static MD_BROTHERHOOD: &str = r#" ## Staging Question **${num}**/${questions} The *${parent}* directory contains **${file_count}** identical files, each one of size **${size}**. "#; // ask for a set of identical files in the same directory fn ask_on_brotherhood( question_idx: usize, questions: usize, brotherhood: &Brotherhood, dups: &[DupSet], skin: &MadSkin, rr: &mut RemovalReport, ) -> anyhow::Result<()> { // we check nothing because questions for brotherhoods come before the other ones // FIXME we must check it's not autosolved! let dup_set = &dups[brotherhood.dup_set_idx]; let mut expander = OwningTemplateExpander::new(); expander .set("num", question_idx + 1) .set("questions", questions) .set("parent", brotherhood.parent.to_string_lossy()) .set("file_count", brotherhood.files.len()) .set("size", file_size::fit_4(dup_set.file_len)); skin.print_owning_expander(&expander, &TextTemplate::from(MD_BROTHERHOOD)); let mut q = Question::new("What do you want to do with these duplicates?"); struct F<'f> { idx: usize, name: &'f str } let mut candidates: Vec = brotherhood.files.iter() .map(|&idx| F{ idx, name: dup_set.files[idx].path.file_name().unwrap().to_str().unwrap() }) .collect(); candidates.sort_by(|a, b| a.name.cmp(b.name)); for (i, f) in candidates.iter().enumerate() { q.add_answer( i + 1, format!("keep *{}* and stage other one(s) for removal", f.name), ); } q.add_answer('s', "**S**kip and go to next question"); q.add_answer('e', "**E**nd staging phase"); q.set_default("s"); match q.ask(skin)?.as_str() { "s" => {} "e" => { rr.broken = true; } a => { if let Ok(a) = a.parse::() { if a == 0 { println!("Options start at 1 - skipping"); } else { let chosen = &candidates[a - 1]; for i in 0..brotherhood.files.len() { if i != chosen.idx { rr.stage_file(brotherhood.file_ref(i), dups); } } } } } } Ok(()) } static MD_DIR_PAIR: &str = r#" ## Staging Question **${num}**/${questions} Left and right directories have **${file_count}** common files for a total duplicate size of **${removable_len}**. |-:|:-:|:-:| | |left|right| |-:|:-:|:-:| |directory|*${left_path}*|*${right_path}*| ${common_files |common files|${file_count}|${file_count}| } ${removable_files |removable file #${removable_file_idx}|**${left_file_name}**|**${right_file_name}**| } |already staged for removal|${removed_left_count}|${removed_right_count}| |other files|${left_other_count}|${right_other_count}| |-: "#; /// asking the question when left dir and right dir are different fn ask_on_dir_pair( question_idx: usize, questions: usize, dir_pair: &DirPair, dups: &[DupSet], skin: &MadSkin, rr: &mut RemovalReport, ) -> anyhow::Result<()> { // we must recount now because files may have been already // staged for removals let (mut removed_left_count, mut removed_right_count) = (0, 0); let (mut removable_left_count, mut removable_right_count) = (0, 0); let mut removable_pairs: Vec = Vec::new(); let mut removable_len: u64 = 0; for file_pair in &dir_pair.file_pairs { let removed_left = rr.staged_removals.contains(&file_pair.left_ref()); let removed_right = rr.staged_removals.contains(&file_pair.right_ref()); if removed_left { removed_left_count += 1; } else { removable_left_count += 1; } if removed_right { removed_right_count += 1; } else { removable_right_count += 1; } if !removed_left && !removed_right { removable_pairs.push(*file_pair); removable_len += dups[file_pair.dup_set_idx].file_len; } } if removable_pairs.is_empty() { mad_print_inline!(skin, "*skipping question because of previously staged removals*\n"); return Ok(()); } let left_dir_count = dir_pair.key.left_dir.read_dir()?.count(); if left_dir_count < removed_left_count + removable_left_count { println!("skipping question because some files were removed on disk"); return Ok(()); } let left_other_count = left_dir_count - removed_left_count - removable_left_count; let right_dir_count = dir_pair.key.right_dir.read_dir()?.count(); if right_dir_count < removed_right_count + removable_right_count { println!("skipping question because some files were removed on disk"); return Ok(()); } let right_other_count = right_dir_count - removed_right_count - removable_right_count; let mut expander = OwningTemplateExpander::new(); expander .set("num", question_idx + 1) .set("questions", questions) .set("file_count", removable_pairs.len()) .set("removable_len", file_size::fit_4(removable_len)) .set("left_path", dir_pair.key.left_dir.to_string_lossy()) .set("right_path", dir_pair.key.right_dir.to_string_lossy()) .set("removed_left_count", removed_left_count) .set("removed_right_count", removed_right_count) .set("left_other_count", left_other_count) .set("right_other_count", right_other_count); if removable_pairs.len() <= MAX_LISTED_FILES { for (removable_file_idx, file_pair) in removable_pairs.iter().enumerate() { expander.sub("removable_files") .set("removable_file_idx", removable_file_idx + 1) .set("left_file_name", file_pair.left_ref().file_name(dups)) .set("right_file_name", file_pair.right_ref().file_name(dups)); } } else { expander.sub("common_files"); } skin.print_owning_expander(&expander, &TextTemplate::from(MD_DIR_PAIR)); ask!(skin, "What do you want to do here?", ('s') { ('l', "Stage **l**eft files for removal") => { for file_pair in removable_pairs { rr.stage_file(file_pair.left_ref(), dups); } } ('r', "Stage **r**ight files for removal") => { for file_pair in removable_pairs { rr.stage_file(file_pair.right_ref(), dups); } } ('s', "**S**kip and go to next question") => { println!("skipped"); } ('e', "**E**nd staging phase") => { rr.broken = true; } }); Ok(()) } backdown-1.1.2/src/dirs.rs000064400000000000000000000152451046102023000135220ustar 00000000000000use { crate::*, fnv::FnvHashMap, std::{ cmp::{Ord, Ordering, Reverse}, path::Path, }, }; #[derive(Debug)] pub struct DirsReport<'d> { pub dup_dirs: Vec>, pub brotherhoods: Vec>, pub auto_solvable_brotherhoods_count: usize, pub dir_pairs: Vec>, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct DirPairKey<'d> { pub left_dir: &'d Path, pub right_dir: &'d Path, } #[derive(Debug)] pub struct DirPair<'d> { pub key: DirPairKey<'d>, pub file_pairs: Vec, } /// a brotherhood gather duplicates having the same parent #[derive(Debug)] pub struct Brotherhood<'d> { pub parent: &'d Path, pub dup_set_idx: usize, /// file indexes pub files: Vec, /// when all files have names like "thing (copy).png", "thing (another copy).png", etc. /// except one file, we can propose an automated resolution. /// Note that we don't check the start of filenames are identical because we /// don't, in fact, care. pub is_auto_solvable: bool, } /// a directory which contains only duplicates #[derive(Debug)] pub struct DupDir<'d> { pub path: &'d Path, pub files: Vec, } impl<'d> Brotherhood<'d> { fn maybe_add_files(&mut self, a_idx: usize, b_idx: usize) { if !self.files.contains(&a_idx) { self.files.push(a_idx); } if !self.files.contains(&b_idx) { self.files.push(b_idx); } } pub fn file_ref(&self, i: usize) -> DupFileRef { DupFileRef { dup_set_idx: self.dup_set_idx, dup_file_idx: self.files[i], } } pub fn gain(&self, dups: &[DupSet]) -> u64 { (self.files.len() - 1) as u64 * dups[self.dup_set_idx].file_len } } impl<'d> DirPairKey<'d> { pub fn new(a: &'d Path, b: &'d Path) -> (Self, bool) { if a.cmp(b) == Ordering::Less { ( DirPairKey { left_dir: a, right_dir: b, }, false ) } else { ( DirPairKey { left_dir: b, right_dir: a, }, true ) } } } impl<'d> DirPair<'d> { pub fn new( key: DirPairKey<'d>, file_pairs: Vec, ) -> Self { Self { key, file_pairs } } } impl<'d> DirsReport<'d> { pub fn compute(dups: &'d[DupSet]) -> anyhow::Result { let mut brotherhoods = Vec::new(); let mut auto_solvable_brotherhoods_count = 0; let mut dp_map: FnvHashMap> = FnvHashMap::default(); let mut dir_map: FnvHashMap<&Path, Vec> = FnvHashMap::default(); let mut brotherhood_per_parent: FnvHashMap<&Path, Brotherhood<'d>> = FnvHashMap::default(); for (dup_set_idx, dup) in dups.iter().enumerate() { brotherhood_per_parent.clear(); for (a_file_idx, a) in dup.files.iter().enumerate() { let a_parent = a.path.parent().unwrap(); // adding to the dir_map dir_map.entry(a_parent) .or_default() .push(DupFileRef { dup_set_idx, dup_file_idx: a_file_idx }); // building dir pair for b_file_idx in a_file_idx+1..dup.files.len() { let b = &dup.files[b_file_idx]; let b_parent = b.path.parent().unwrap(); let (dpk, inverted) = DirPairKey::new( a_parent, b_parent, ); let (left_file_idx, right_file_idx) = if inverted { (b_file_idx, a_file_idx) } else { (a_file_idx, b_file_idx) }; if a_parent == b_parent { // brotherhood brotherhood_per_parent .entry(a_parent) .or_insert_with(|| Brotherhood { parent: a_parent, dup_set_idx, files: Vec::new(), is_auto_solvable: false, }) .maybe_add_files(left_file_idx, right_file_idx); } else { // dir_pair dp_map.entry(dpk) .or_default() .push(FilePair { dup_set_idx, left_file_idx, right_file_idx, }); } } } for (_, mut brotherhood) in brotherhood_per_parent.drain() { let copy_count = brotherhood.files .iter() .map(|&dup_file_idx| DupFileRef { dup_set_idx: brotherhood.dup_set_idx, dup_file_idx, }) .filter(|dup_file_ref| dup_file_ref.is_copy_named(dups)) .count(); if copy_count + 1 == brotherhood.files.len() { brotherhood.is_auto_solvable = true; auto_solvable_brotherhoods_count += 1; } brotherhoods.push(brotherhood); } } // we remove the parent of brotherhoods from dir_map // because we don't want them in dup_dirs for brotherhood in &brotherhoods { dir_map.remove(brotherhood.parent); } let mut dup_dirs = Vec::new(); for (path, files) in dir_map.drain() { if files.len() < 3 { // small directories aren't interesting, we'll handle // the dups by comparing dup dirs continue; } let total_child_count = path.read_dir()?.count(); if total_child_count == files.len() { dup_dirs.push(DupDir { path, files }); } } // ordering dup_dirs.sort_by_key(|dd| Reverse(dd.files.len())); brotherhoods.sort_by_key(|b| Reverse(b.gain(dups))); let mut dir_pairs: Vec<_> = dp_map .drain() .map(|(key, file_pairs)| DirPair::new(key, file_pairs)) .collect(); dir_pairs.sort_by_key(|dp| Reverse(dp.file_pairs.len())); Ok(Self { dup_dirs, brotherhoods, auto_solvable_brotherhoods_count, dir_pairs, }) } } backdown-1.1.2/src/dup.rs000064400000000000000000000062031046102023000133430ustar 00000000000000use { lazy_regex::*, std::{ collections::HashSet, path::{Path, PathBuf}, }, }; // TODO virer et utiliser PathBuf directement ? #[derive(Debug)] pub struct DupFile { pub path: PathBuf, // pub staged_for_removal: bool, } /// the list of files having a hash #[derive(Debug, Default)] pub struct DupSet { pub files: Vec, // identical files pub file_len: u64, } #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq,)] pub struct DupFileRef { pub dup_set_idx: usize, pub dup_file_idx: usize, } impl DupFile { pub fn new(path: PathBuf) -> Self { Self { path, //staged_for_removal: false, } } } pub fn reference_file<'a>( dup_set_idx: usize, dup_set: &'a DupSet, staged_removals: &HashSet, ) -> Option<&'a Path> { let mut best: Option<&Path> = None; for (dup_file_idx, file) in dup_set.files.iter().enumerate() { let path = &file.path; let dup_file_ref = DupFileRef { dup_set_idx, dup_file_idx }; if staged_removals.contains(&dup_file_ref) { continue; } if let Some(previous) = best { if previous.to_string_lossy().len() > path.to_string_lossy().len() { best = Some(path); } } else { best = Some(path); } } best } impl DupFileRef { pub fn path(self, dups: &[DupSet]) -> &Path { &dups[self.dup_set_idx].files[self.dup_file_idx].path } pub fn file_name(self, dups:&[DupSet]) -> String { self.path(dups) .file_name() .map_or_else( || "".to_string(), |n| n.to_string_lossy().to_string() ) } /// get the file name when the file has a name like "thing (3).jpg" /// or "thing (3rd copy).png" pub fn copy_name(self, dups:&[DupSet]) -> Option<&str> { copy_name(self.path(dups)) } /// tells whether the file has a name like "thing (3).jpg" /// or "thing (3rd copy).png" pub fn is_copy_named(self, dups:&[DupSet]) -> bool { self.copy_name(dups).is_some() } } /// get the name if this path is of a "copy" file, that is an usual name for a copy pub fn copy_name(path: &Path) -> Option<&str> { path .file_name() .and_then(std::ffi::OsStr::to_str) .filter(|n| regex_is_match!(r#"(?x) .+ \(( \d+ | [^)]* copy )\) (\.\w+)? $ "#, n)) } #[test] fn test_is_copy_named() { use std::path::PathBuf; let copies = &[ "/some/path/to/bla (3).jpg", "bla (3455).jpg", "uuuuu (copy).rs", "/home/dys/Images/pink hexapodes (another copy).jpeg", "~/uuuuu (copy)", "uuuuu (3rd copy)", ]; for s in copies { assert!(copy_name(&PathBuf::from(s)).is_some()); } let not_copies = &[ "copy", "copy.txt", "bla.png", "/home/dys/not a copy", "(don't copy)", ]; for s in not_copies { assert!(copy_name(&PathBuf::from(s)).is_none()); } } backdown-1.1.2/src/dup_report.rs000064400000000000000000000112571046102023000147430ustar 00000000000000use { anyhow::Result, crossbeam::channel, crate::*, fnv::FnvHashMap, minimad::*, rayon::{ prelude::ParallelIterator, iter::ParallelBridge, }, std::{ cmp::Reverse, fs, path::PathBuf, }, termimad::*, }; #[derive(Default)] pub struct DupReport { pub dups: Vec, pub seen: usize, /// number of files which could be removed /// when keeping one of each set pub duplicate_count: usize, pub duplicate_len_sum: u64, } impl DupReport { pub fn len(&self) -> usize { self.dups.len() } pub fn build( root: PathBuf, only_images: bool, ) -> Result { let (s_matching_files, r_matching_files) = channel::unbounded(); let (s_hashed_files, r_hashed_files) = channel::unbounded::<(PathBuf, FileHash)>(); let file_generator = std::thread::spawn(move||{ let mut dirs = Vec::new(); dirs.push(root); while let Some(dir) = dirs.pop() { if let Ok(entries) = fs::read_dir(&dir) { for e in entries.flatten() { let path = e.path(); let name = match path.file_name().and_then(|s| s.to_str()) { Some(s) => s, None => { continue; }, }; if name.starts_with('.') { continue; } if let Ok(md) = path.symlink_metadata() { if md.is_dir() { // we add the directory to the channel of dirs needing processing dirs.push(path); continue; } if md.is_file() { if only_images { let ext = match path.extension().and_then(|s| s.to_str()) { Some(s) => s, None => { continue; }, }; if !ext::is_image(ext) { continue; } } s_matching_files.send(path).unwrap(); } } } } } }); // parallel computation of the hashes r_matching_files.into_iter().par_bridge() .for_each_with(s_hashed_files, |s, path| { if let Ok(hash) = FileHash::new(&path) { s.send((path, hash)).unwrap(); } }); let mut map: FnvHashMap> = FnvHashMap::default(); let mut seen = 0; r_hashed_files.iter() .for_each(|(path, hash)| { let e = map.entry(hash).or_default(); e.push(DupFile::new(path)); seen += 1; }); file_generator.join().unwrap(); let mut dups = Vec::new(); let mut duplicate_count = 0; let mut duplicate_len_sum = 0; for (_hash, files) in map.drain() { if files.len() < 2 { continue; } if let Ok(md) = fs::metadata(&files[0].path) { duplicate_count += files.len() - 1; let file_len = md.len(); if file_len > 0 { duplicate_len_sum += (files.len() - 1) as u64 * file_len; dups.push(DupSet { files, file_len, }); } } } dups.sort_by_key(|dup| Reverse(dup.files.len())); Ok(Self{ dups, seen, duplicate_count, duplicate_len_sum, }) } pub fn print_summary( &self, skin: &MadSkin, ) { static MD: &str = r#" I've hashed *${seen}* files and found *${set_count}* sets of duplicates.\ *${removable_count}* files can be removed to gain **${gain}**.\ "#; let mut expander = OwningTemplateExpander::new(); expander .set("seen", self.seen) .set("set_count", self.dups.len()) .set("removable_count", self.duplicate_count) .set("gain", file_size::fit_4(self.duplicate_len_sum)); skin.print_owning_expander(&expander, &TextTemplate::from(MD)); } pub fn is_empty(&self) -> bool { self.dups.is_empty() } } backdown-1.1.2/src/ext.rs000064400000000000000000000003411046102023000133500ustar 00000000000000use { phf::{phf_set, Set}, }; static IMAGE_EXTENSIONS: Set<&'static str> = phf_set! { "jpg", "JPG", "jpeg", "JPEG", "png", "PNG", }; pub fn is_image(ext: &str) -> bool { IMAGE_EXTENSIONS.contains(ext) } backdown-1.1.2/src/file_pair.rs000064400000000000000000000010141046102023000145000ustar 00000000000000use { crate::*, }; #[derive(Debug, Clone, Copy)] pub struct FilePair { pub dup_set_idx: usize, pub left_file_idx: usize, pub right_file_idx: usize, } impl FilePair { pub fn left_ref(self) -> DupFileRef { DupFileRef { dup_set_idx: self.dup_set_idx, dup_file_idx: self.left_file_idx, } } pub fn right_ref(self) -> DupFileRef { DupFileRef { dup_set_idx: self.dup_set_idx, dup_file_idx: self.right_file_idx, } } } backdown-1.1.2/src/hash.rs000064400000000000000000000007461046102023000135040ustar 00000000000000 use { anyhow::Result, std::{ fs::File, io, path::Path, }, }; #[derive(Debug, PartialEq, Eq, Hash)] pub struct FileHash { hash: blake3::Hash, } impl FileHash { pub fn new>(path: P) -> Result { let mut file = File::open(path)?; let mut hasher = blake3::Hasher::new(); io::copy(&mut file, &mut hasher)?; let hash = hasher.finalize(); Ok(Self { hash, }) } } backdown-1.1.2/src/json.rs000064400000000000000000000020651046102023000135260ustar 00000000000000use { serde_json::Value, std::{ fs, io::Write, path::PathBuf, }, }; fn available_path(name: &str) -> PathBuf { let mut count = 1; let ext = "json"; loop { let cmp = if count > 1 { format!("-{}", count) } else { "".to_string() }; let file_name = format!( "{}-{}{}.{}", chrono::Local::now().format("%F-%Hh%M"), name, cmp, ext, ); let path = PathBuf::from(file_name); if !path.exists() { return path; } count += 1; } } /// write a JSON value in a file whose name will be based on the provided /// name, with a date and if necessary with an additional number to avoid /// collision. pub fn write_in_file( name: &str, value: &Value, ) -> anyhow::Result { let path = available_path(name); let mut file = fs::File::create(&path)?; let json = serde_json::to_string_pretty(value)?; writeln!(&mut file, "{}", json)?; Ok(path) } backdown-1.1.2/src/lib.rs000064400000000000000000000005401046102023000133170ustar 00000000000000#[macro_use] extern crate cli_log; pub mod args; pub mod ask; pub mod dirs; pub mod dup; pub mod dup_report; pub mod ext; pub mod file_pair; pub mod hash; mod json; pub mod removal_report; pub use { args::*, ask::*, dirs::*, dup::*, dup_report::*, file_pair::*, ext::*, hash::*, json::*, removal_report::*, }; backdown-1.1.2/src/main.rs000064400000000000000000000065301046102023000135020ustar 00000000000000#[macro_use] extern crate cli_log; use { backdown::*, anyhow::Result, crossterm::style::{Attribute::*, Color::*}, termimad::*, }; fn run_app() -> Result<()> { let args: Args = argh::from_env(); if args.version { println!("backdown {}", env!("CARGO_PKG_VERSION")); return Ok(()); } let root = args.path .unwrap_or_else(|| std::env::current_dir().unwrap()); let skin = make_skin(); info!("root: {:?}", &root); skin.print_text("\n# Phase 1) Analysis"); mad_print_inline!(skin, "Analyzing directory *$0*...\n", root.to_string_lossy()); let dup_report = time!( Info, "computing dup sets", DupReport::build(root, args.only_images)?, ); dup_report.print_summary(&skin); if dup_report.is_empty() { println!("There's nothing to remove"); return Ok(()); } let dirs_report = time!( Info, "computing dirs report", DirsReport::compute(&dup_report.dups)?, ); skin.print_text("\n# Phase 2) Staging: choose files to remove"); let rr = ask_on_dirs(&dirs_report, &dup_report.dups, &skin)?; if rr.is_empty() || rr.quit { return Ok(()); } skin.print_text("\n# Phase 3) Review and confirm removals"); let mut exported = false; loop { let mut question = Question::new("What do you want to do now?"); question.add_answer('s', "Review touched **s**ets of identical files"); if !exported { question.add_answer( 'j', "Export touched sets of identical files in a **J**SON file", ); } question.add_answer('f', "Review all **f**iles staged for removal"); question.add_answer('r', "Do the **r**emovals now"); #[cfg(unix)] question.add_answer('l', "Replace removed files with **l**inks"); question.add_answer('q', "**Q**uit *backdown*, removing nothing"); match question.ask(&skin)?.as_ref() { "s" => { rr.list_dup_sets(&dup_report.dups, &skin); } "j" => { let value = rr.dup_sets_as_json(&dup_report.dups); let path = write_in_file("backdown-report", &value)?; mad_print_inline!(skin, "Wrote *$0*\n", path.to_string_lossy()); exported = true; } "f" => { rr.list_staged_removals(&dup_report.dups, &skin); } "r" => { rr.do_the_removal(&dup_report.dups, &skin)?; break; } "l" => { #[cfg(unix)] rr.replace_staged_with_links(&dup_report.dups, &skin)?; break; } "q" => { break; } _ => {} // should not happen } } Ok(()) } fn main() { init_cli_log!(); if let Err(e) = run_app() { eprintln!("{}", e); } info!("bye"); } fn make_skin() -> MadSkin { let mut skin = MadSkin::default(); skin.table.align = Alignment::Left; skin.headers[0].align = Alignment::Left; skin.set_headers_fg(AnsiValue(178)); skin.bold.set_fg(Yellow); skin.italic.set_fg(AnsiValue(204)); skin.italic.remove_attr(Italic); skin.scrollbar.thumb.set_fg(AnsiValue(178)); skin.code_block.align = Alignment::Center; skin } backdown-1.1.2/src/removal_report.rs000064400000000000000000000221071046102023000156140ustar 00000000000000use { crate::*, minimad::*, serde_json::{json, Value}, std::{ collections::{HashMap, HashSet}, fs, path::Path, }, termimad::*, }; #[derive(Debug, Clone, Default)] pub struct RemovalReport<'d> { pub dup_sets_with_staged: HashSet, pub staged_removals: HashSet, pub staged_dir_removals: Vec<&'d Path>, pub len_to_remove: u64, pub broken: bool, pub quit: bool, } impl<'d> RemovalReport<'d> { pub fn stage_file(&mut self, dup_file_ref: DupFileRef, dups: &[DupSet]) { self.len_to_remove += dups[dup_file_ref.dup_set_idx].file_len; self.dup_sets_with_staged.insert(dup_file_ref.dup_set_idx); self.staged_removals.insert(dup_file_ref); // println!("staged {:?}", &dups[dup_file_ref.dup_set_idx].files[dup_file_ref.dup_file_idx].path); } pub fn is_empty(&self) -> bool { self.staged_removals.is_empty() } pub fn list_staged_removals( &self, dups: &[DupSet], skin: &MadSkin, ) { mad_print_inline!( skin, "**$0** files planned for removal for a total size of **$1**:\n", self.staged_removals.len(), file_size::fit_4(self.len_to_remove), ); for (idx, file_ref) in self.staged_removals.iter().enumerate() { let path = file_ref.path(dups); let size = dups[file_ref.dup_set_idx].file_len; mad_print_inline!( skin, "#$0 : *$1* (**$2**)\n", idx + 1, path.to_string_lossy(), file_size::fit_4(size), ); } } /// write the report as a JSON file pub fn dup_sets_as_json( &self, dups: &[DupSet], ) -> Value { json!({ "len_to_remove": self.len_to_remove, "dup_sets": dups.iter().enumerate() .filter_map(|(dup_set_idx, dup_set)| { if !self.dup_sets_with_staged.contains(&dup_set_idx) { return None; } Some(json!({ "file_len": dup_set.file_len, "files": dup_set.files.iter() .enumerate() .map(|(dup_file_idx, file)| { let file = file.path.to_string_lossy().to_string(); let file_ref = DupFileRef { dup_set_idx, dup_file_idx }; let action = if self.staged_removals.contains(&file_ref) { "remove" } else { "keep" }; (file, action) }) .collect::>() })) }) .collect::>(), }) } pub fn list_dup_sets( &self, dups: &[DupSet], skin: &MadSkin, ) { static MD: &str = r#" |:-|:-| |Set #*${set_num}* : each file is **${file_len}**|action| |:-|:-:| ${files |${path}|**${action}**| } |- "#; let template = TextTemplate::from(MD); for (dup_set_idx, dup_set) in dups.iter().enumerate() { if !self.dup_sets_with_staged.contains(&dup_set_idx) { continue; } let mut expander = OwningTemplateExpander::new(); expander .set("set_num", dup_set_idx + 1) .set("file_len", file_size::fit_4(dup_set.file_len)); for (dup_file_idx, file) in dup_set.files.iter().enumerate() { let file_ref = DupFileRef { dup_set_idx, dup_file_idx }; expander.sub("files") .set("path", file.path.to_string_lossy()) .set_md( "action", if self.staged_removals.contains(&file_ref) { "*remove*" } else { "keep" } ); } skin.print_owning_expander(&expander, &template); } } /// "Normally" the algorithms of backdown never remove all files /// in a set of identical files. But if I change those algorithms /// and make them more complex, I may make an error. So this /// function will check there's at least one kept file in each /// touched set, and will raise an error if a set is totally /// emptied. /// This *must* be called just before starting the real removals. pub fn check_no_emptied_set( &self, dups: &[DupSet], ) -> anyhow::Result<()> { for (dup_set_idx, dup_set) in dups.iter().enumerate() { let mut staged_count = 0; for dup_file_idx in 0..dup_set.files.len() { if self.staged_removals.contains(&DupFileRef{ dup_set_idx, dup_file_idx }) { staged_count += 1; } } if staged_count >= dup_set.files.len() { anyhow::bail!("We staged all files in set for removal! Abort!"); } } Ok(()) } #[cfg(unix)] pub fn replace_staged_with_links( &self, dups: &[DupSet], skin: &MadSkin, ) -> anyhow::Result<()> { use std::os::unix::fs::symlink; self.check_no_emptied_set(dups)?; skin.print_text("\n# Phase 4) Replace staged duplicates with links"); println!("Replacing..."); let mut removed_len = 0; let mut removed_count = 0; // file removals for dup_file_ref in &self.staged_removals { let dup_set = &dups[dup_file_ref.dup_set_idx]; let path = dup_file_ref.path(dups); let link_destination = match reference_file(dup_file_ref.dup_set_idx, dup_set, &self.staged_removals) { Some(p) => p, None => { anyhow::bail!("unexpected lack of kept file in dup set"); } }; let link_destination = link_destination.canonicalize()?; match fs::remove_file(path) { Ok(()) => { removed_count += 1; removed_len += dups[dup_file_ref.dup_set_idx].file_len; match symlink(&link_destination, path) { Ok(()) => { // println!("link {:?} -> {:?}", path, link_destination); } Err(e) => { mad_print_inline!( skin, " Failed to remove create link *$1* -> *$2* : $3\n", path.to_string_lossy(), link_destination.to_string_lossy(), e, ); } } } Err(e) => { mad_print_inline!( skin, " Failed to remove *$1* : $2\n", path.to_string_lossy(), e, ); } } } mad_print_inline!( skin, "Removed *$0* files with a total size of **$1**\n", removed_count, file_size::fit_4(removed_len), ); Ok(()) } pub fn do_the_removal( &self, dups: &[DupSet], skin: &MadSkin, ) -> anyhow::Result<()> { self.check_no_emptied_set(dups)?; skin.print_text("\n# Phase 4) Removal"); println!("Removing..."); let mut removed_len = 0; let mut removed_count = 0; // file removals for dup_file_ref in &self.staged_removals { let path = dup_file_ref.path(dups); match fs::remove_file(path) { Ok(()) => { removed_count += 1; removed_len += dups[dup_file_ref.dup_set_idx].file_len; } Err(e) => { mad_print_inline!( skin, " Failed to remove *$1* : $2\n", path.to_string_lossy(), e, ); } } } // directory removals for path in &self.staged_dir_removals { debug!("removing {:?}", path); if let Err(e) = fs::remove_dir(path) { mad_print_inline!( skin, " Failed to remove directory *$1* : $2\n", path.to_string_lossy(), e, ); } } mad_print_inline!( skin, "Removed *$0* files with a total size of **$1**\n", removed_count, file_size::fit_4(removed_len), ); Ok(()) } }