piz-0.5.1/.cargo_vcs_info.json0000644000000001360000000000100116410ustar { "git": { "sha1": "0fcb6f2d610d2f34a4d9838becfe929875f3e855" }, "path_in_vcs": "" }piz-0.5.1/.github/workflows/ci.yml000064400000000000000000000011451046102023000151450ustar 00000000000000name: CI on: push: branches: [ master ] pull_request: branches: [ master ] env: CARGO_TERM_COLOR: always jobs: format_and_docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Cargo fmt run: cargo fmt -- --check - name: Cargo doc run: cargo doc build_and_test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Build lib, tests, and examples run: cargo build --all-targets --release - name: Build test inputs run: ./tests/create-inputs.sh - name: Run tests run: cargo test --release piz-0.5.1/.gitignore000064400000000000000000000000231046102023000124140ustar 00000000000000target/ Cargo.lock piz-0.5.1/Cargo.lock0000644000000451520000000000100076230ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" version = "0.7.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" dependencies = [ "memchr", ] [[package]] name = "android_system_properties" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ "libc", ] [[package]] name = "ansi_term" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" dependencies = [ "winapi", ] [[package]] name = "anyhow" version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9a8f622bcf6ff3df478e9deba3e03e4e04b300f8e6a139e192c05fa3490afc7" [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bstr" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", "regex-automata", "serde", ] [[package]] name = "bumpalo" version = "3.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" [[package]] name = "camino" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88ad0e1e3e88dd237a156ab9f571021b8a158caa0ae44b1968a241efb5144c1e" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" dependencies = [ "iana-time-zone", "js-sys", "num-integer", "num-traits", "time", "wasm-bindgen", "winapi", ] [[package]] name = "clap" version = "2.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", "bitflags", "strsim", "textwrap", "unicode-width", "vec_map", ] [[package]] name = "codepage-437" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e40c1169585d8d08e5675a39f2fc056cd19a258fc4cba5e3bbf4a9c1026de535" dependencies = [ "csv", ] [[package]] name = "core-foundation-sys" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", "once_cell", "scopeguard", ] [[package]] name = "crossbeam-utils" version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" dependencies = [ "cfg-if", "once_cell", ] [[package]] name = "csv" version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ "bstr", "csv-core", "itoa", "ryu", "serde", ] [[package]] name = "csv-core" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" dependencies = [ "memchr", ] [[package]] name = "either" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] name = "env_logger" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" dependencies = [ "atty", "humantime", "log", "regex", "termcolor", ] [[package]] name = "flate2" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" dependencies = [ "crc32fast", "miniz_oxide", ] [[package]] name = "heck" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" dependencies = [ "unicode-segmentation", ] [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" version = "0.1.47" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c495f162af0bf17656d0014a0eded5f3cd2f365fdd204548c2869db89359dc7" dependencies = [ "android_system_properties", "core-foundation-sys", "js-sys", "once_cell", "wasm-bindgen", "winapi", ] [[package]] name = "itoa" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "js-sys" version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.132" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" [[package]] name = "log" version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ "libc", "winapi", ] [[package]] name = "memoffset" version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" dependencies = [ "autocfg", ] [[package]] name = "miniz_oxide" version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" dependencies = [ "adler", ] [[package]] name = "num-integer" version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", ] [[package]] name = "num-traits" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", "libc", ] [[package]] name = "once_cell" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0" [[package]] name = "piz" version = "0.5.1" dependencies = [ "anyhow", "camino", "chrono", "codepage-437", "crc32fast", "env_logger", "flate2", "log", "memchr", "memmap", "rayon", "stderrlog", "structopt", "thiserror", ] [[package]] name = "proc-macro-error" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", "syn", "version_check", ] [[package]] name = "proc-macro-error-attr" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", "version_check", ] [[package]] name = "proc-macro2" version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" dependencies = [ "autocfg", "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "num_cpus", ] [[package]] name = "regex" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" version = "0.6.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" [[package]] name = "ryu" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" version = "1.0.144" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" [[package]] name = "stderrlog" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af95cb8a5f79db5b2af2a46f44da7594b5adbcbb65cbf87b8da0959bfdd82460" dependencies = [ "atty", "chrono", "log", "termcolor", "thread_local", ] [[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "structopt" version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" dependencies = [ "clap", "lazy_static", "structopt-derive", ] [[package]] name = "structopt-derive" version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ "heck", "proc-macro-error", "proc-macro2", "quote", "syn", ] [[package]] name = "syn" version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "termcolor" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ "winapi-util", ] [[package]] name = "textwrap" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ "unicode-width", ] [[package]] name = "thiserror" version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c1b05ca9d106ba7d2e31a9dab4a64e7be2cce415321966ea3132c49a656e252" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8f2591983642de85c921015f3f070c665a197ed69e417af436115e3a1407487" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "thread_local" version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" dependencies = [ "once_cell", ] [[package]] name = "time" version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", "wasi", "winapi", ] [[package]] name = "unicode-ident" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" [[package]] name = "unicode-segmentation" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "vec_map" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasm-bindgen" version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" piz-0.5.1/Cargo.toml0000644000000032420000000000100076400ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "piz" version = "0.5.1" authors = ["Matt Kline "] description = """ piz (a Parallel Implementation of Zip) is a ZIP archive reader designed to concurrently decompress files using a simple API. """ documentation = "https://docs.rs/piz" readme = "README.md" keywords = [ "zip", "archive", "parallel", "thread", "concurrency", ] categories = [ "compression", "concurrency", ] license = "Zlib" repository = "https://github.com/mrkline/piz-rs" resolver = "2" [profile.release] debug = true [dependencies.camino] version = "1.0" [dependencies.chrono] version = "0.4" [dependencies.codepage-437] version = "0.1" [dependencies.crc32fast] version = "1.0" [dependencies.flate2] version = "1.0" [dependencies.log] version = "0.4" [dependencies.memchr] version = "2.0" [dependencies.thiserror] version = "1.0" [dev-dependencies.anyhow] version = "1.0" [dev-dependencies.env_logger] version = "0.8" [dev-dependencies.memmap] version = "0.7" [dev-dependencies.rayon] version = "1.0" [dev-dependencies.stderrlog] version = "0.5.1" [dev-dependencies.structopt] version = "0.3" [features] check-local-metadata = [] default = ["check-local-metadata"] piz-0.5.1/Cargo.toml.orig000064400000000000000000000015461046102023000133260ustar 00000000000000[package] name = "piz" version = "0.5.1" edition = "2021" authors = ["Matt Kline "] license = "Zlib" repository = "https://github.com/mrkline/piz-rs" readme = "README.md" documentation = "https://docs.rs/piz" description = """ piz (a Parallel Implementation of Zip) is a ZIP archive reader designed to concurrently decompress files using a simple API. """ keywords = ["zip", "archive", "parallel", "thread", "concurrency"] categories = ["compression", "concurrency"] #Profiling! [profile.release] debug = true [dependencies] camino = "1.0" codepage-437 = "0.1" crc32fast = "1.0" flate2 = "1.0" log = "0.4" thiserror = "1.0" memchr = "2.0" chrono = "0.4" [dev-dependencies] anyhow = "1.0" env_logger = "0.8" rayon = "1.0" memmap = "0.7" stderrlog = "0.5.1" structopt = "0.3" [features] default = ["check-local-metadata"] check-local-metadata = [] piz-0.5.1/LICENSE.md000064400000000000000000000015241046102023000120370ustar 00000000000000 Copyright © 2022 Matt Kline This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. piz-0.5.1/README.md000064400000000000000000000070541046102023000117160ustar 00000000000000# piz: A Parallel Implementation of Zip (in Rust) ![CI status](https://github.com/mrkline/piz-rs/workflows/CI/badge.svg) piz is a Zip archive reader designed to decompress any number of files concurrently using a simple API: ```rust // For smaller files, // // let bytes = fs::read("foo.zip") // let archive = ZipArchive::new(&bytes)?; // // works just fine. Memory map larger files! let zip_file = File::open("foo.zip")?; let mapping = unsafe { Mmap::map(&zip_file)? }; let archive = ZipArchive::new(&mapping)?; // We can iterate through the entries in the archive directly... // // for entry in archive.entries() { // let mut reader = archive.read(entry)?; // // Read away! // } // // ...but ZIP doesn't guarantee that entries are in any particular order, // that there aren't duplicates, that an entry has a valid file path, etc. // Let's do some validation and organize them into a tree of files and folders. let tree = as_tree(archive.entries())?; // With that done, we can get a file (or directory)'s metadata from its path. let metadata = tree.lookup("some/specific/file")?; // And read the file out, if we'd like: let mut reader = archive.read(metadata)?; let mut save_to = File::create(&metadata.file_name)?; io::copy(&mut reader, &mut save_to)?; // Readers are `Send`, so we can read out as many as we'd like in parallel. // Here we'll use Rayon to read out the whole archive with all cores: tree.files() .par_bridge() .try_for_each(|entry| { if let Some(parent) = entry.file_name.parent() { // Create parent directories as needed. fs::create_dir_all(parent)?; } let mut reader = archive.read(entry)?; let mut save_to = File::create(&entry.file_name)?; io::copy(&mut reader, &mut save_to)?; Ok(()) })?; ``` Zip is an interesting archive format: unlike compressed tarballs often seen in Linux land (`*.tar.gz`, `*.tar.zst`, ...), each file in a Zip archive is compressed independently, with a central directory telling us where to find it. This allows us to extract multiple files simultaneously so long as we can read from multiple places at once. Users can either read the entire archive into memory, or, for larger archives, [memory-map](https://docs.rs/memmap/0.7.0/memmap/struct.Mmap.html) the file. (On 64-bit systems, this allows us to treat archives as a contiguous byte range even if the file is _much_ larger than physical RAM. 32-bit systems are limited by address space to archives under 4 GB, but piz _should_ be well-behaved if the archive is small enough.) ## Examples See `unzip/` for a simple CLI example that unzips a provided file into the current directory. ## Tests `test_harness/` contains some smoke tests against a few inputs, e.g.: - A basic, "Hello, Zip!" archive of a few text files - The same, but with some junk prepended to it - A Zip64 archive with files > 2^32 bytes If it doesn't find these files, it creates them with a shell script (which assumes a Unix-y environment). ## Future plans Piz currently provides limited metadata for each file (path, size, CRC32, last-modified time, etc.). Additional info - like file permissions - should be added later. Support for compression algorithms besides DEFLATE (like Bzip2) could also be added. ## Thanks Many thanks to - Hans Wennborg for their fantastic article, [Zip Files: History, Explanation and Implementation](https://www.hanshq.net/zip.html) - Mathijs van de Nes's [zip-rs](https://github.com/mvdnes/zip-rs), the main inspiration of this project and a great example of a Zip decoder in Rust piz-0.5.1/examples/unzip.rs000064400000000000000000000044341046102023000137670ustar 00000000000000use std::fs::{self, File}; use std::io; use std::path::PathBuf; use anyhow::*; use log::*; use memmap::Mmap; use rayon::prelude::*; use structopt::*; use piz::read::*; #[derive(Debug, StructOpt)] #[structopt(name = "unzip", about = "Dumps a .zip file into the current directory")] struct Opt { /// Pass multiple times for additional verbosity (info, debug, trace) #[structopt(short, long, parse(from_occurrences))] verbosity: usize, /// Change to the given directory before perfoming any operations. #[structopt(short = "C", long)] directory: Option, /// Prints the tree of files in the ZIP archive instead of extracting them. #[structopt(short = "n", long)] dry_run: bool, #[structopt(name("ZIP file"))] zip_path: PathBuf, } fn main() -> Result<()> { let args = Opt::from_args(); let mut errlog = stderrlog::new(); errlog.verbosity(args.verbosity + 1); errlog.init()?; if let Some(chto) = args.directory { std::env::set_current_dir(&chto) .with_context(|| format!("Couldn't set working directory to {}", chto.display()))?; } info!("Memory mapping {:#?}", &args.zip_path); let zip_file = File::open(&args.zip_path).context("Couldn't open zip file")?; let mapping = unsafe { Mmap::map(&zip_file).context("Couldn't mmap zip file")? }; let archive = ZipArchive::with_prepended_data(&mapping) .context("Couldn't load archive")? .0; let tree = as_tree(archive.entries())?; if args.dry_run { print_tree(&tree) } else { read_zip(&tree, &archive) } } fn print_tree(tree: &DirectoryContents) -> Result<()> { for entry in tree.traverse() { println!("{}", entry.metadata().path); } Ok(()) } fn read_zip(tree: &DirectoryContents, archive: &ZipArchive) -> Result<()> { tree.files().par_bridge().try_for_each(|entry| { if let Some(parent) = entry.path.parent() { fs::create_dir_all(parent) .with_context(|| format!("Couldn't create directory {}", parent))?; } let mut reader = archive.read(entry)?; let mut sink = File::create(&*entry.path) .with_context(|| format!("Couldn't create file {}", entry.path))?; io::copy(&mut reader, &mut sink)?; Ok(()) }) } piz-0.5.1/piz/.gitignore000064400000000000000000000000231046102023000132160ustar 00000000000000/target Cargo.lock piz-0.5.1/src/arch.rs000064400000000000000000000010261046102023000125020ustar 00000000000000//! (Hopefully) handles 64-bit offsets gracefully on 32-bit platforms use crate::result::*; /// A checked cast from u64 to usize /// /// We could use the `cast` crate /// (), /// but this is the only one we really need. pub fn usize>(i: I) -> ZipResult { let i: u64 = i.into(); if cfg!(target_pointer_width = "64") { Ok(i as usize) } else if i > usize::MAX as u64 { Err(ZipError::InsufficientAddressSpace) } else { Ok(i as usize) } } piz-0.5.1/src/crc_reader.rs000064400000000000000000000050171046102023000136620ustar 00000000000000//! Helper module to compute a CRC32 checksum //! //! Borrowed from zip-rs: //! use std::io; use std::io::prelude::*; use crc32fast::Hasher; /// Reader that validates the CRC32 when it reaches the EOF. pub struct Crc32Reader { inner: R, hasher: Hasher, provided_checksum: u32, } impl Crc32Reader { pub fn new(inner: R, provided_checksum: u32) -> Crc32Reader { Crc32Reader { inner, hasher: Hasher::new(), provided_checksum, } } /// Returns true if the final checksum matches the one provided by `new()` fn check_matches(&self) -> bool { self.provided_checksum == self.hasher.clone().finalize() } } impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { let count = match self.inner.read(buf) { Ok(0) if !buf.is_empty() && !self.check_matches() => { return Err(io::Error::new(io::ErrorKind::Other, "Invalid checksum")) } Ok(n) => n, Err(e) => return Err(e), }; self.hasher.update(&buf[0..count]); Ok(count) } } #[cfg(test)] mod test { use super::*; use std::io::Read; #[test] fn test_empty_reader() { let data: &[u8] = b""; let mut buf = [0; 1]; let mut reader = Crc32Reader::new(data, 0); assert_eq!(reader.read(&mut buf).unwrap(), 0); let mut reader = Crc32Reader::new(data, 1); assert!(reader .read(&mut buf) .unwrap_err() .to_string() .contains("Invalid checksum")); } #[test] fn test_byte_by_byte() { let data: &[u8] = b"1234"; let mut buf = [0; 1]; let mut reader = Crc32Reader::new(data, 0x9be3e0a3); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 0); // Can keep reading 0 bytes after the end assert_eq!(reader.read(&mut buf).unwrap(), 0); } #[test] fn test_zero_read() { let data: &[u8] = b"1234"; let mut buf = [0; 5]; let mut reader = Crc32Reader::new(data, 0x9be3e0a3); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } } piz-0.5.1/src/lib.rs000064400000000000000000000062671046102023000123470ustar 00000000000000//! piz is a Zip archive reader designed to decompress any number of files //! concurrently using a simple API: //! //! ```no_run //! # use std::fs; //! # use piz::*; //! // For smaller files, //! let bytes = fs::read("foo.zip")?; //! let archive = ZipArchive::new(&bytes)?; //! # Ok::<(), Box>(()) //! ``` //! works just fine. Memory map larger files! //! ```no_run //! # use std::fs::{self, File}; //! # use std::io; //! # extern crate anyhow; //! # extern crate rayon; //! # use rayon::prelude::*; //! # extern crate memmap; //! # use memmap::Mmap; //! # use piz::*; //! # use piz::read::*; //! # //! let zip_file = File::open("foo.zip")?; //! let mapping = unsafe { Mmap::map(&zip_file)? }; //! let archive = ZipArchive::new(&mapping)?; //! //! // We can iterate through the entries in the archive directly... //! // //! // for entry in archive.entries() { //! // let mut reader = archive.read(entry)?; //! // // Read away! //! // } //! // //! // ...but ZIP doesn't guarantee that entries are in any particular order, //! // that there aren't duplicates, that an entry has a valid file path, etc. //! // Let's do some validation and organize them into a tree of files and folders. //! let tree = as_tree(archive.entries())?; //! //! // With that done, we can get a file (or directory)'s metadata from its path. //! let metadata = tree.lookup("some/specific/file")?; //! // And read the file out, if we'd like: //! let mut reader = archive.read(metadata)?; //! let mut save_to = File::create(&*metadata.path)?; //! io::copy(&mut reader, &mut save_to)?; //! //! // Readers are `Send`, so we can read out as many as we'd like in parallel. //! // Here we'll use Rayon to read out the whole archive with all cores: //! tree.files() //! .par_bridge() //! .try_for_each(|entry| { //! if let Some(parent) = entry.path.parent() { //! // Create parent directories as needed. //! fs::create_dir_all(parent)?; //! } //! let mut reader = archive.read(entry)?; //! let mut save_to = File::create(&*entry.path)?; //! io::copy(&mut reader, &mut save_to)?; //! # return Ok::<(), anyhow::Error>(()); //! Ok(()) //! })?; //! # Ok::<(), Box>(()) //! ``` //! //! Zip is an interesting archive format: unlike compressed tarballs often seen //! in Linux land (`*.tar.gz`, `*.tar.zst`, ...), //! each file in a Zip archive is compressed independently, //! with a central directory telling us where to find each file. //! This allows us to extract multiple files simultaneously so long as we can //! read from multiple places at once. //! //! Users can either read the entire archive into memory, or, for larger archives, //! [memory-map](https://docs.rs/memmap/0.7.0/memmap/struct.Mmap.html) the file. //! (On 64-bit systems, this allows us to treat archives as a contiguous byte range //! even if the file is _much_ larger than physical RAM. 32-bit systems are limited //! by address space to archives under 4 GB, but piz _should_ be well-behaved //! if the archive is small enough.) pub mod read; pub mod result; pub use read::CompressionMethod; pub use read::ZipArchive; mod arch; mod crc_reader; mod spec; piz-0.5.1/src/read.rs000064400000000000000000000556241046102023000125150ustar 00000000000000//! Tools for reading a ZIP archive. //! //! To start reading an archive, first create a [`ZipArchive`] from the file. //! //! Current versions of this library don't do any writing, //! but it was arranged to resemble the structure of the [Zip crate] //! and make room for potential future writers. //! //! [Zip crate]: https://crates.io/crates/zip //! [`ZipArchive`]: struct.ZipArchive.html use std::borrow::Cow; use std::collections::{btree_map, BTreeMap}; use std::io; use camino::{Utf8Component, Utf8Path}; use chrono::NaiveDateTime; use flate2::read::DeflateDecoder; use log::*; use crate::arch::usize; use crate::crc_reader::Crc32Reader; use crate::result::*; use crate::spec; // Move types into some submodule if we have a handful? /// The compression method used to store a file #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum CompressionMethod { /// The file is uncompressed None, /// The file is [DEFLATE](https://en.wikipedia.org/wiki/DEFLATE)d. /// This is the most common format used by ZIP archives. Deflate, /// The file is compressed with a yet-unsupported format. /// (The u16 indicates the internal format code.) Unsupported(u16), } /// Metadata for a file or directory in the archive, /// retrieved from its central directory #[derive(Debug, PartialEq, Eq)] pub struct FileMetadata<'a> { /// Uncompressed size of the file in bytes pub size: usize, /// Compressed size of the file in bytes pub compressed_size: usize, /// Compression algorithm used to store the file pub compression_method: CompressionMethod, /// The CRC-32 of the decompressed file pub crc32: u32, /// True if the file is encrypted (decryption is unsupported) pub encrypted: bool, /// The provided path of the file. pub path: Cow<'a, Utf8Path>, /// The ISO 8601 combined date and time the file was last modified pub last_modified: NaiveDateTime, /// Unix mode bits, if the file was archived in a Unix OS. /// /// This library does _not_ try to convert DOS permission bits into /// roughly-equivalent Unix mode bits, or do other cross-OS handwaving. /// Future versions might provide an enum here of different OS's metadata. pub unix_mode: Option, // TODO: Add other fields the user might want to know about: // time, etc. /// The offset to the local file header in the archive pub(crate) header_offset: usize, } impl<'a> FileMetadata<'a> { /// Returns true if the given entry is a directory pub fn is_dir(&self) -> bool { // Path::ends_with() doesn't consider separators, // so we need a different approach. self.size == 0 && self.path.as_str().ends_with('/') } /// Returns true if the given entry is a file pub fn is_file(&self) -> bool { !self.is_dir() } pub fn into_owned(self) -> FileMetadata<'static> { FileMetadata { path: Cow::Owned(self.path.into_owned()), ..self } } } /// A ZIP archive to be read pub struct ZipArchive<'a> { /// The contents of the ZIP archive, as a byte slice. mapping: &'a [u8], /// A list of entries from the ZIP's central directory entries: Vec>, } impl<'a> ZipArchive<'a> { /// Reads a ZIP archive from a byte slice. /// Smaller files can be read into a buffer. /// /// ```no_run /// # use std::fs; /// # use piz::*; /// let bytes = fs::read("foo.zip")?; /// let archive = ZipArchive::new(&bytes)?; /// # Ok::<(), Box>(()) /// ``` /// /// For larger ones, memory map! /// ```no_run /// # use std::fs::{self, File}; /// # extern crate memmap; /// # use memmap::Mmap; /// # use piz::*; /// let zip_file = File::open("foo.zip")?; /// let mapping = unsafe { Mmap::map(&zip_file)? }; /// let archive = ZipArchive::new(&mapping)?; /// # Ok::<(), Box>(()) /// ``` pub fn new(mapping: &'a [u8]) -> ZipResult { let (new_archive, archive_offset) = Self::with_prepended_data(mapping)?; if archive_offset != 0 { return Err(ZipError::PrependedWithUnknownBytes(archive_offset)); } Ok(new_archive) } /// Like `ZipArchive::new()`, but allows arbitrary data to prepend the archive. /// Returns the ZipArchive and the number of bytes prepended to the archive. /// /// Since a ZIP archive's metadata sits at the back of the file, /// many formats consist of ZIP archives prepended with some other data. /// For example, a self-extracting archive is one with an executable in the front. pub fn with_prepended_data(mut mapping: &'a [u8]) -> ZipResult<(Self, usize)> { let eocdr_posit = spec::find_eocdr(mapping)?; let eocdr = spec::EndOfCentralDirectory::parse(&mapping[eocdr_posit..])?; trace!("{:?}", eocdr); if eocdr.disk_number != eocdr.disk_with_central_directory { return Err(ZipError::UnsupportedArchive(format!( "No support for multi-disk archives: disk ({}) != disk with central directory ({})", eocdr.disk_number, eocdr.disk_with_central_directory ))); } if eocdr.entries != eocdr.entries_on_this_disk { return Err(ZipError::UnsupportedArchive(format!( "No support for multi-disk archives: entries ({}) != entries this disk ({})", eocdr.entries, eocdr.entries_on_this_disk ))); } let nominal_central_directory_offset: usize; let entry_count: u64; // Zip files can be prepended by arbitrary junk, // so all the given positions might be off. // Calculate the offset. let archive_offset; let zip64_eocdr_locator_posit = eocdr_posit .checked_sub(spec::Zip64EndOfCentralDirectoryLocator::size_in_file()) .ok_or(ZipError::InvalidArchive( "Too small for anything but End Of Central Directory Record", ))?; if let Some(zip64_eocdr_locator) = spec::Zip64EndOfCentralDirectoryLocator::parse(&mapping[zip64_eocdr_locator_posit..]) { trace!("{:?}", zip64_eocdr_locator); if eocdr.disk_number as u32 != zip64_eocdr_locator.disk_with_central_directory { return Err(ZipError::UnsupportedArchive(format!( "No support for multi-disk archives: disk ({}) != disk with zip64 central directory ({})", eocdr.disk_number, zip64_eocdr_locator.disk_with_central_directory ))); } if zip64_eocdr_locator.disks != 1 { return Err(ZipError::UnsupportedArchive(format!( "No support for multi-disk archives: Zip64 EOCDR locator reports {} disks", zip64_eocdr_locator.disks ))); } // Search for the zip64 EOCDR, from its nominal starting position // to the end of where it could be. let zip64_eocdr_search_start = usize(zip64_eocdr_locator.zip64_eocdr_offset)?; let zip64_eocdr_search_end = eocdr_posit .checked_sub(spec::Zip64EndOfCentralDirectoryLocator::size_in_file()) .ok_or(ZipError::InvalidArchive( "Too small for Zip64 End Of Central Directory Record", ))?; let zip64_eocdr_search_space = &mapping[zip64_eocdr_search_start..zip64_eocdr_search_end]; let zip64_eocdr_posit = spec::find_zip64_eocdr(zip64_eocdr_search_space)?; // Since we're searching starting at the provided offset, // the returned position is the archive offset. archive_offset = zip64_eocdr_posit; let zip64_eocdr = spec::Zip64EndOfCentralDirectory::parse( &zip64_eocdr_search_space[zip64_eocdr_posit..], )?; trace!("{:?}", zip64_eocdr); nominal_central_directory_offset = usize(zip64_eocdr.central_directory_offset)?; entry_count = zip64_eocdr.entries; } else { // The offset is the actual position versus the stored one. let actual_cdr_posit = eocdr_posit.checked_sub(usize(eocdr.central_directory_size)?); let nominal_offset = usize(eocdr.central_directory_offset)?; archive_offset = actual_cdr_posit .and_then(|off| off.checked_sub(nominal_offset)) .ok_or(ZipError::InvalidArchive( "Invalid central directory size or offset", ))?; nominal_central_directory_offset = usize(eocdr.central_directory_offset)?; entry_count = eocdr.entries as u64; } mapping = &mapping[archive_offset..]; trace!( "{} entries at nominal offset {}", entry_count, nominal_central_directory_offset ); let mut central_directory = &mapping[nominal_central_directory_offset..]; let mut entries = Vec::new(); entries.reserve(usize(entry_count)?); for _ in 0..entry_count { let dir_entry = spec::CentralDirectoryEntry::parse_and_consume(&mut central_directory)?; trace!("{:?}", dir_entry); let file_metadata = FileMetadata::from_cde(&dir_entry)?; debug!("{:?}", file_metadata); entries.push(file_metadata); } Ok((ZipArchive { mapping, entries }, archive_offset)) } /// Returns the entries found in the ZIP archive's central directory. /// /// No effort is made to deduplicate or otherwise validate these entries. /// To do that, create a [`FileTree`]. /// /// [`FileTree`]: struct.FileTree.html pub fn entries(&self) -> &[FileMetadata] { &self.entries } /// Reads the given file from the ZIP archive. /// /// Since each file in a ZIP archive is compressed independently, /// multiple files can be read in parallel. pub fn read(&self, metadata: &FileMetadata) -> ZipResult> { let mut file_slice = &self.mapping[metadata.header_offset..]; let local_header = spec::LocalFileHeader::parse_and_consume(&mut file_slice)?; trace!("{:?}", local_header); let local_metadata = FileMetadata::from_local_header(&local_header, metadata)?; debug!("Reading {:?}", local_metadata); if cfg!(feature = "check-local-metadata") && *metadata != local_metadata { return Err(ZipError::InvalidArchive( "Central directory entry doesn't match local file header", )); } if metadata.encrypted { return Err(ZipError::UnsupportedArchive(format!( "Can't read encrypted file {}", metadata.path ))); } make_reader( metadata.compression_method, metadata.crc32, io::Cursor::new(&file_slice[0..metadata.compressed_size]), ) } } /// Returns a boxed read trait for a compressed file, /// given its compression method and expected CRC. fn make_reader<'a, R: io::Read + Send + 'a>( compression_method: CompressionMethod, crc32: u32, reader: R, ) -> ZipResult> { match compression_method { CompressionMethod::None => Ok(Box::new(Crc32Reader::new(reader, crc32))), CompressionMethod::Deflate => { let deflate_reader = DeflateDecoder::new(reader); Ok(Box::new(Crc32Reader::new(deflate_reader, crc32))) } _ => Err(ZipError::UnsupportedArchive(String::from( "Compression method not supported", ))), } } /// Maps a directory's child paths to the respective entries. pub type DirectoryContents<'a> = BTreeMap<&'a str, DirectoryEntry<'a>>; /// A directory in a ZipArchive, including its metadata and its contents. #[derive(Debug)] pub struct Directory<'a> { pub metadata: &'a FileMetadata<'a>, pub children: DirectoryContents<'a>, } impl<'a> Directory<'a> { fn new(metadata: &'a FileMetadata<'a>) -> Self { Self { metadata, children: DirectoryContents::new(), } } } /// A file or directory in a [`FileTree`] /// /// [`FileTree`]: struct.FileTree.html #[derive(Debug)] pub enum DirectoryEntry<'a> { File(&'a FileMetadata<'a>), Directory(Directory<'a>), } impl<'a> DirectoryEntry<'a> { /// Returns the metadata of the entry. pub fn metadata(&self) -> &'a FileMetadata<'a> { match &self { DirectoryEntry::File(metadata) => metadata, DirectoryEntry::Directory(dir) => dir.metadata, } } fn name(&self) -> &'a str { let path = &self.metadata().path; path.file_name().expect("Path ended in ..") } } /// Given metadata from [`ZipArchive::entries()`], /// organize them into a tree of nested directories and files. /// /// This does two things: /// /// 1. It makes files easier to look up by path /// /// 2. It validates the archive, making sure each `FileMetadata` has a valid path, /// no duplicates, etc. (The ZIP file format makes no promises here.) /// /// [`ZipArchive::entries()`]: struct.ZipArchive.html#method.entries pub fn as_tree<'a>(entries: &'a [FileMetadata<'a>]) -> ZipResult> { let mut contents = DirectoryContents::new(); for entry in entries { entree_entry(entry, &mut contents)?; } Ok(contents) } pub trait FileTree<'a> { /// Looks up a file or directory by its path. fn lookup>(&self, path: P) -> ZipResult<&'a FileMetadata<'a>>; /// Returns an iterator over the entries in the tree, sorted by path. fn traverse<'b>(&'b self) -> TreeIterator<'a, 'b>; /// Returns an iterator over the files in the tree, sorted by path. fn files<'b>(&'b self) -> FileTreeIterator<'a, 'b>; /// Returns an iterator over the directories in the tree, sorted by path. fn directories<'b>(&'b self) -> DirectoryTreeIterator<'a, 'b>; } impl<'a> FileTree<'a> for DirectoryContents<'a> { fn lookup>(&self, path: P) -> ZipResult<&'a FileMetadata<'a>> { let path = path.as_ref(); let parent_dir = if let Some(parent) = path.parent() { match walk_parent_directories(parent, self) { Err(ZipError::NoSuchFile(_)) => Err(ZipError::NoSuchFile(path.to_owned())), other_result => other_result, }? } else { self }; let base = path .file_name() .ok_or_else(|| ZipError::InvalidPath(format!("Path {} ended in ..", path)))?; parent_dir .get(base) .ok_or_else(|| ZipError::NoSuchFile(path.to_owned())) .map(|dir_entry| dir_entry.metadata()) } fn traverse<'b>(&'b self) -> TreeIterator<'a, 'b> { TreeIterator::new(self) } fn files<'b>(&'b self) -> FileTreeIterator<'a, 'b> { FileTreeIterator::new(self) } fn directories<'b>(&'b self) -> DirectoryTreeIterator<'a, 'b> { DirectoryTreeIterator::new(self) } } /// Places the given entry in the given directory tree. fn entree_entry<'a>( entry: &'a FileMetadata<'a>, tree: &mut DirectoryContents<'a>, ) -> ZipResult<()> { let path = &entry.path; let parent_dir = if let Some(parent) = path.parent() { walk_parent_directories_mut(parent, tree)? } else { tree }; // Check: Path doesn't end in something weird. let _base = path .file_name() .ok_or_else(|| ZipError::Hierarchy(format!("Path {path} ended in ..")))?; let to_insert: DirectoryEntry = if entry.is_dir() { DirectoryEntry::Directory(Directory::new(entry)) } else { DirectoryEntry::File(entry) }; if parent_dir.insert(to_insert.name(), to_insert).is_some() { return Err(ZipError::Hierarchy(format!("Duplicate entry for {path}",))); } Ok(()) } /// Used by `entree_entry()` to reach the directory where we'll insert a new entry. fn walk_parent_directories_mut<'a, 'b>( path: &Utf8Path, tree: &'b mut DirectoryContents<'a>, ) -> ZipResult<&'b mut DirectoryContents<'a>> { let mut current = tree; for component in path.components() { match component { Utf8Component::Prefix(prefix) => { let prefix = prefix.as_os_str(); return Err(ZipError::Hierarchy(format!( "Prefix {} found in path {path}", prefix.to_string_lossy(), ))); } Utf8Component::RootDir => { warn!("Root directory found in path {path}"); // Huh. Keep going. } Utf8Component::CurDir => { warn!("Current dir (.) found in path {path}"); // Huh. Keep going. } Utf8Component::ParentDir => { // We could canonicalize it somewhere down the road. // Path::canonicalize() doesn't work because it tries // to actually resolve the path // (and failing if something doesn't exist there). // Maybe try https://crates.io/crates/path-clean some time? return Err(ZipError::Hierarchy(format!( "Parent dir (..) found in path {path}", ))); } Utf8Component::Normal(component) => { if let Some(child) = current.get_mut(component) { match child { DirectoryEntry::Directory(dir) => { current = &mut dir.children; } _ => { return Err(ZipError::Hierarchy(format!( "{path} is a file, expected a directory", ))); } } } else { return Err(ZipError::Hierarchy(format!( "{path} found before parent directories", ))); } } } } Ok(current) } /// Used by `FileTree::get()` to walk the tree to the parent directory /// where the desired file lives. /// /// Consequently, this assumes that `path` is provided by the user, /// and emits errors accordingly. fn walk_parent_directories<'a, 'b>( path: &Utf8Path, tree: &'b DirectoryContents<'a>, ) -> ZipResult<&'b DirectoryContents<'a>> { let mut current = tree; for component in path.components() { // The path is coming from the user, not the ZIP archive. // So, unlike walk_parent_directories_mut(), revolt over weird stuff. match component { Utf8Component::Prefix(prefix) => { return Err(ZipError::InvalidPath(format!( "Prefix {prefix} found in path {path}", ))); } Utf8Component::RootDir => { return Err(ZipError::InvalidPath(format!( "Root directory found in path {path}", ))); } Utf8Component::CurDir => { return Err(ZipError::InvalidPath(format!( "Current dir (.) found in path {path}", ))); } Utf8Component::ParentDir => { return Err(ZipError::InvalidPath(format!( "Parent dir (..) found in path {path}", ))); } Utf8Component::Normal(component) => { if let Some(child) = current.get(component) { match child { DirectoryEntry::Directory(dir) => { current = &dir.children; } _ => { return Err(ZipError::InvalidPath(format!( "{path} is a file, expected a directory", ))); } } } else { return Err(ZipError::NoSuchFile(path.to_owned())); } } } } Ok(current) } /// Iterates over all files and directories in a [`FileTree`] /// /// [`FileTree`]: struct.FileTree.html pub struct TreeIterator<'a, 'b> { stack: Vec>>, } impl<'a, 'b> TreeIterator<'a, 'b> { fn new(tree: &'b DirectoryContents<'a>) -> Self { let stack = vec![tree.values()]; Self { stack } } } impl<'a, 'b> Iterator for TreeIterator<'a, 'b> { type Item = &'b DirectoryEntry<'a>; fn next(&mut self) -> Option { if self.stack.is_empty() { return None; } let next = self.stack.last_mut().unwrap().next(); match next { Some(entry) => { if let DirectoryEntry::Directory(d) = entry { self.stack.push(d.children.values()); } return Some(entry); } None => { self.stack.pop(); } }; self.next() } } /// Iterates over all files in a [`FileTree`] /// /// [`FileTree`]: struct.FileTree.html pub struct FileTreeIterator<'a, 'b> { inner: TreeIterator<'a, 'b>, } impl<'a, 'b> FileTreeIterator<'a, 'b> { fn new(tree: &'b DirectoryContents<'a>) -> Self { Self { inner: TreeIterator::new(tree), } } } impl<'a> Iterator for FileTreeIterator<'a, '_> { type Item = &'a FileMetadata<'a>; fn next(&mut self) -> Option { if self.inner.stack.is_empty() { return None; } let next = self.inner.stack.last_mut().unwrap().next(); match next { Some(DirectoryEntry::File(f)) => { return Some(f); } Some(DirectoryEntry::Directory(d)) => { self.inner.stack.push(d.children.values()); } None => { self.inner.stack.pop(); } }; self.next() } } /// Iterates over all directories in a [`FileTree`] /// /// [`FileTree`]: struct.FileTree.html pub struct DirectoryTreeIterator<'a, 'b> { inner: TreeIterator<'a, 'b>, } impl<'a, 'b> DirectoryTreeIterator<'a, 'b> { fn new(tree: &'b DirectoryContents<'a>) -> Self { Self { inner: TreeIterator::new(tree), } } } impl<'a, 'b> Iterator for DirectoryTreeIterator<'a, 'b> { type Item = &'b Directory<'a>; fn next(&mut self) -> Option { if self.inner.stack.is_empty() { return None; } let next = self.inner.stack.last_mut().unwrap().next(); match next { Some(DirectoryEntry::Directory(d)) => { self.inner.stack.push(d.children.values()); return Some(d); } Some(DirectoryEntry::File(_f)) => {} None => { self.inner.stack.pop(); } }; self.next() } } piz-0.5.1/src/result.rs000064400000000000000000000034531046102023000131110ustar 00000000000000//! Error types and the related `Result` use camino::Utf8PathBuf; use thiserror::Error; pub type ZipResult = Result; #[derive(Debug, Error)] pub enum ZipError { /// An error from underlying I/O #[error("I/O Error")] Io(#[from] std::io::Error), /// The ZIP archive contained invalid data per the spec. #[error("Invalid Zip archive: {0}")] InvalidArchive(&'static str), /// Decoding a UTF-8 name or comment failed #[error("Invalid UTF-8")] Encoding(#[from] std::str::Utf8Error), /// The ZIP archive uses an unsupported feature #[error("Unsupported Zip archive: {0}")] UnsupportedArchive(String), /// The ZIP archive is prepended some unknown bytes. /// (Use [`ZipArchive::with_prepended_data()`] if this is okay.) /// /// [`ZipArchive::with_prepended_data()`]: ../read/struct.ZipArchive.html#method.with_prepended_data #[error("Archive prepended with {0} unknown bytes")] PrependedWithUnknownBytes(usize), /// The ZIP archive contained a nonsensical file hierarchy /// (duplicate entries, bad paths, etc.) #[error("Archive contained strange a strange file hierarchy: {0}")] Hierarchy(String), /// A file wasn't found at the provied path #[error("No file in the archive with the path {0}")] NoSuchFile(Utf8PathBuf), /// A user-provided path (not one from a ZIP archive) was invalid. #[error("Invalid path")] InvalidPath(String), /// A cast from a 64-bit int to a usize failed while mapping the file, /// probably on a 32-bit system. /// /// Future work could include a version of the reader that uses multiple /// file streams instead of a memory map to work with large files in 32 bits. #[error("Zip archive too large for address space")] InsufficientAddressSpace, } piz-0.5.1/src/spec.rs000064400000000000000000000621731046102023000125310ustar 00000000000000//! Code specific to the ZIP file format specification. //! //! We try to keep the nitty gritty here, //! and higher-level stuff in the [`read`] module. //! (This pattern, like several others, was inspired by the Zip crate.) //! //! Most comments quote the ZIP spec, [`APPNOTE.TXT`]. //! //! [_Zip Files: History, Explanation and Implementation_] //! is also a fantastic resource and a great read. //! //! [`read`]: ../read/index.html //! [`APPNOTE.TXT`]: https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.6.TXT //! [_Zip Files: History, Explanation and Implementation_]: https://www.hanshq.net/zip.html use std::borrow::Cow; use std::convert::TryInto; use camino::Utf8Path; use chrono::{NaiveDate, NaiveDateTime}; use codepage_437::*; use memchr::memmem; use crate::arch::usize; use crate::read::{CompressionMethod, FileMetadata}; use crate::result::*; // Magic numbers denoting various sections of a ZIP archive /// End of central directory magic number const EOCDR_MAGIC: [u8; 4] = [b'P', b'K', 5, 6]; /// Zip64 end of central directory magic number const ZIP64_EOCDR_MAGIC: [u8; 4] = [b'P', b'K', 6, 6]; /// Zip64 end of central directory locator magic number const ZIP64_EOCDR_LOCATOR_MAGIC: [u8; 4] = [b'P', b'K', 6, 7]; /// Central directory magic number const CENTRAL_DIRECTORY_MAGIC: [u8; 4] = [b'P', b'K', 1, 2]; /// Local file header magic number const LOCAL_FILE_HEADER_MAGIC: [u8; 4] = [b'P', b'K', 3, 4]; impl CompressionMethod { fn from_u16(u: u16) -> Self { match u { 0 => CompressionMethod::None, 8 => CompressionMethod::Deflate, // 12 => CompressionMethod::Bzip2, v => CompressionMethod::Unsupported(v), } } } /// The OS a file in the archive was compressed with. /// Used to decode additional metadata like permissions #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum System { Dos, Unix, Other(u8), } #[allow(dead_code)] impl System { fn from_source_version(source_version: u16) -> Self { // 4.4.2.1 The upper byte indicates the compatibility of the file // attribute information. If the external file attributes // are compatible with MS-DOS and can be read by PKZIP for // DOS version 2.04g then this value will be zero. If these // attributes are not compatible, then this value will // identify the host system on which the attributes are // compatible. Software can use this information to determine // the line record format for text files etc. // // 4.4.2.2 The current mappings are: // // 0 - MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems) // 1 - Amiga 2 - OpenVMS // 3 - UNIX 4 - VM/CMS // 5 - Atari ST 6 - OS/2 H.P.F.S. // 7 - Macintosh 8 - Z-System // 9 - CP/M 10 - Windows NTFS // 11 - MVS (OS/390 - Z/OS) 12 - VSE // 13 - Acorn Risc 14 - VFAT // 15 - alternate MVS 16 - BeOS // 17 - Tandem 18 - OS/400 // 19 - OS X (Darwin) 20 thru 255 - unused match source_version >> 8 { 0 => System::Dos, 3 => System::Unix, o => System::Other(o as u8), } } } // Straight from the Rust docs: /// Reads a little-endian u64 from the front of the provided slice, shrinking it. fn read_u64(input: &mut &[u8]) -> u64 { let (int_bytes, rest) = input.split_at(std::mem::size_of::()); *input = rest; u64::from_le_bytes(int_bytes.try_into().expect("less than eight bytes for u64")) } /// Reads a little-endian u32 from the front of the provided slice, shrinking it. fn read_u32(input: &mut &[u8]) -> u32 { let (int_bytes, rest) = input.split_at(std::mem::size_of::()); *input = rest; u32::from_le_bytes(int_bytes.try_into().expect("less than four bytes for u32")) } /// Reads a little-endian u16 from the front of the provided slice, shrinking it. fn read_u16(input: &mut &[u8]) -> u16 { let (int_bytes, rest) = input.split_at(std::mem::size_of::()); *input = rest; u16::from_le_bytes(int_bytes.try_into().expect("less than two bytes for u16")) } /// Data from the End of central directory record /// /// Found at the back of the ZIP archive and provides offsets for finding /// its central directory, along with lots of stuff that stopped being relevant /// when we stopped breaking ZIP archives onto multiple floppies. #[derive(Debug)] pub struct EndOfCentralDirectory<'a> { pub disk_number: u16, pub disk_with_central_directory: u16, pub entries_on_this_disk: u16, pub entries: u16, pub central_directory_size: u32, pub central_directory_offset: u32, pub file_comment: &'a [u8], } impl<'a> EndOfCentralDirectory<'a> { pub fn parse(mut eocdr: &'a [u8]) -> ZipResult { // 4.3.16 End of central directory record: // // end of central dir signature 4 bytes (0x06054b50) // number of this disk 2 bytes // number of the disk with the // start of the central directory 2 bytes // total number of entries in // the central dir on this disk 2 bytes // total number of entries in // the central dir 2 bytes // size of the central directory 4 bytes // offset of start of central // directory with respect to // the starting disk number 4 bytes // zipfile comment length 2 bytes // Assert the magic instead of checking for it // because the search should have found it. assert_eq!(eocdr[..4], EOCDR_MAGIC); eocdr = &eocdr[4..]; let disk_number = read_u16(&mut eocdr); let disk_with_central_directory = read_u16(&mut eocdr); let entries_on_this_disk = read_u16(&mut eocdr); let entries = read_u16(&mut eocdr); let central_directory_size = read_u32(&mut eocdr); let central_directory_offset = read_u32(&mut eocdr); let comment_length = read_u16(&mut eocdr); let file_comment = &eocdr[..usize(comment_length)?]; Ok(Self { disk_number, disk_with_central_directory, entries_on_this_disk, entries, central_directory_size, central_directory_offset, file_comment, }) } } /// Searches backward through `mapping` to find the /// End of central directory record. /// /// It should be right at the end of the file, /// but its variable size means we can't jump to a known offset. pub fn find_eocdr(mapping: &[u8]) -> ZipResult { memmem::rfind(mapping, &EOCDR_MAGIC).ok_or(ZipError::InvalidArchive( "Couldn't find End Of Central Directory Record", )) } /// Data from the Zip64 end of central directory locator /// /// This should immediately precede the End of central directory record /// on Zip64 files and tell us where to find the Zip64 end of central directory record. #[derive(Debug)] pub struct Zip64EndOfCentralDirectoryLocator { pub disk_with_central_directory: u32, pub zip64_eocdr_offset: u64, pub disks: u32, } impl Zip64EndOfCentralDirectoryLocator { pub fn parse(mut mapping: &[u8]) -> Option { // 4.3.15 Zip64 end of central directory locator // // zip64 end of central dir locator // signature 4 bytes (0x07064b50) // number of the disk with the // start of the zip64 end of // central directory 4 bytes // relative offset of the zip64 // end of central directory record 8 bytes // total number of disks 4 bytes if mapping[..4] != ZIP64_EOCDR_LOCATOR_MAGIC { return None; } mapping = &mapping[4..]; let disk_with_central_directory = read_u32(&mut mapping); let zip64_eocdr_offset = read_u64(&mut mapping); let disks = read_u32(&mut mapping); Some(Self { disk_with_central_directory, zip64_eocdr_offset, disks, }) } pub fn size_in_file() -> usize { 20 } } /// Data from the Zip64 end of central directory record /// /// This should immediately precede the "End of central directory" record /// on Zip64 files and tell us where to find the Zip64 end of central directory record. #[derive(Debug)] pub struct Zip64EndOfCentralDirectory<'a> { pub source_version: u16, pub minimum_extract_version: u16, pub disk_number: u32, pub disk_with_central_directory: u32, pub entries_on_this_disk: u64, pub entries: u64, pub central_directory_size: u64, pub central_directory_offset: u64, pub extensible_data: &'a [u8], } impl<'a> Zip64EndOfCentralDirectory<'a> { pub fn parse(mut eocdr: &'a [u8]) -> ZipResult { // 4.3.14 Zip64 end of central directory record // // zip64 end of central dir // signature 4 bytes (0x06064b50) // size of zip64 end of central // directory record 8 bytes // version made by 2 bytes // version needed to extract 2 bytes // number of this disk 4 bytes // number of the disk with the // start of the central directory 4 bytes // total number of entries in the // central directory on this disk 8 bytes // total number of entries in the // central directory 8 bytes // size of the central directory 8 bytes // offset of start of central // directory with respect to // the starting disk number 8 bytes // zip64 extensible data sector (variable size) // Assert the magic instead of checking for it // because the search should have found it. assert_eq!(eocdr[..4], ZIP64_EOCDR_MAGIC); eocdr = &eocdr[4..]; let eocdr_size = read_u64(&mut eocdr); let source_version = read_u16(&mut eocdr); let minimum_extract_version = read_u16(&mut eocdr); let disk_number = read_u32(&mut eocdr); let disk_with_central_directory = read_u32(&mut eocdr); let entries_on_this_disk = read_u64(&mut eocdr); let entries = read_u64(&mut eocdr); let central_directory_size = read_u64(&mut eocdr); let central_directory_offset = read_u64(&mut eocdr); // 4.3.14.1 The value stored into the "size of zip64 end of central // directory record" SHOULD be the size of the remaining // record and SHOULD NOT include the leading 12 bytes. // // Size = SizeOfFixedFields + SizeOfVariableData - 12. // (SizeOfVariableData = Size - SizeOfFixedFields + 12) // Check for underflow: let eocdr_size = usize(eocdr_size)?; if (eocdr_size + 12) < Self::fixed_size_in_file() { return Err(ZipError::InvalidArchive( "Invalid extensible data length in Zip64 End Of Central Directory Record", )); } // We should be left with just the extensible data: let extensible_data_length = eocdr_size + 12 - Self::fixed_size_in_file(); if eocdr.len() != extensible_data_length { return Err(ZipError::InvalidArchive( "Invalid extensible data length in Zip64 End Of Central Directory Record", )); } let extensible_data = eocdr; Ok(Self { source_version, minimum_extract_version, disk_number, disk_with_central_directory, entries, entries_on_this_disk, central_directory_size, central_directory_offset, extensible_data, }) } fn fixed_size_in_file() -> usize { 56 } } /// Finds the Zip64 end of central directory record in the given slice. /// /// The slice should start at the Zip64 EOCDR's nominal location, /// but we might have to do some searching since ZIP archives can have /// arbitrary junk up front. pub fn find_zip64_eocdr(mapping: &[u8]) -> ZipResult { memmem::find(mapping, &ZIP64_EOCDR_MAGIC).ok_or(ZipError::InvalidArchive( "Couldn't find zip64 End Of Central Directory Record", )) } /// Data from a central directory entry /// /// Each of these records contians information about a file or folder /// stored in the ZIP archive. #[derive(Debug)] pub struct CentralDirectoryEntry<'a> { pub source_version: u16, pub minimum_extract_version: u16, pub flags: u16, pub compression_method: u16, pub last_modified_time: u16, pub last_modified_date: u16, pub crc32: u32, pub compressed_size: u32, pub uncompressed_size: u32, pub disk_number: u16, pub internal_file_attributes: u16, pub external_file_attributes: u32, pub header_offset: u32, pub path: &'a [u8], pub extra_field: &'a [u8], pub file_comment: &'a [u8], } impl<'a> CentralDirectoryEntry<'a> { pub fn parse_and_consume(entry: &mut &'a [u8]) -> ZipResult { // 4.3.12 Central directory structure: // // [central directory header 1] // . // . // . // [central directory header n] // [digital signature] // // File header: // // central file header signature 4 bytes (0x02014b50) // version made by 2 bytes // version needed to extract 2 bytes // general purpose bit flag 2 bytes // compression method 2 bytes // last mod file time 2 bytes // last mod file date 2 bytes // crc-32 4 bytes // compressed size 4 bytes // uncompressed size 4 bytes // file name length 2 bytes // extra field length 2 bytes // file comment length 2 bytes // disk number start 2 bytes // internal file attributes 2 bytes // external file attributes 4 bytes // relative offset of local header 4 bytes // // file name (variable size) // extra field (variable size) // file comment (variable size) if entry[..4] != CENTRAL_DIRECTORY_MAGIC { return Err(ZipError::InvalidArchive("Invalid central directory entry")); } *entry = &entry[4..]; let source_version = read_u16(entry); let minimum_extract_version = read_u16(entry); let flags = read_u16(entry); let compression_method = read_u16(entry); let last_modified_time = read_u16(entry); let last_modified_date = read_u16(entry); let crc32 = read_u32(entry); let compressed_size = read_u32(entry); let uncompressed_size = read_u32(entry); let path_length = usize(read_u16(entry))?; let extra_field_length = usize(read_u16(entry))?; let file_comment_length = usize(read_u16(entry))?; let disk_number = read_u16(entry); let internal_file_attributes = read_u16(entry); let external_file_attributes = read_u32(entry); let header_offset = read_u32(entry); let (path, remaining) = entry.split_at(path_length); let (extra_field, remaining) = remaining.split_at(extra_field_length); let (file_comment, remaining) = remaining.split_at(file_comment_length); *entry = remaining; Ok(Self { source_version, minimum_extract_version, flags, compression_method, last_modified_time, last_modified_date, crc32, compressed_size, uncompressed_size, disk_number, internal_file_attributes, external_file_attributes, header_offset, path, extra_field, file_comment, }) } } /// Extracts the "is this text UTF-8?" bit from the 16-bit flags field. /// /// If false, text is assumped to be CP437. fn is_utf8(flags: u16) -> bool { // Bit 11: Language encoding flag (EFS). If this bit is set, // the filename and comment fields for this file // MUST be encoded using UTF-8. (see APPENDIX D) flags & (1 << 11) != 0 } /// Extracts the "is this file encrypted?" bit from the 16-bit flags field. fn is_encrypted(flags: u16) -> bool { // Bit 0: If set, indicates that the file is encrypted flags & 1 != 0 } impl<'a> FileMetadata<'a> { /// Extracts `FileMetadata` from a central directory entry pub(crate) fn from_cde(cde: &CentralDirectoryEntry<'a>) -> ZipResult { let is_utf8 = is_utf8(cde.flags); let path: Cow = if is_utf8 { let utf8 = std::str::from_utf8(cde.path).map_err(ZipError::Encoding)?; Cow::Borrowed(Utf8Path::new(utf8)) } else { let str_cow: Cow = Cow::borrow_from_cp437(cde.path, &CP437_CONTROL); // Annoying: doesn't seem to be any Cow -> Cow match str_cow { Cow::Borrowed(s) => Cow::Borrowed(Utf8Path::new(s)), Cow::Owned(s) => Cow::Owned(s.into()), } }; if cde.disk_number != 0 { return Err(ZipError::UnsupportedArchive(format!( "No support for multi-disk archives: file {path} claims to be on disk {}", cde.disk_number, ))); } let encrypted = is_encrypted(cde.flags); /* When we try to read; don't bomb if the archive has _any_ encrypted file if encrypted { return Err(ZipError::UnsupportedArchive(format!( "No support for encrypted files, as {} claims to be", path ))); } */ let compression_method = CompressionMethod::from_u16(cde.compression_method); // https://unix.stackexchange.com/questions/14705/the-zip-formats-external-file-attribute // There's a _lot_ to unpack here - see unzip's zipinfo.c. let unix_mode = match System::from_source_version(cde.source_version) { // I know this! System::Unix => Some((cde.external_file_attributes >> 16) as u16), _ => None, }; let mut metadata = Self { size: usize(cde.uncompressed_size)?, compressed_size: usize(cde.compressed_size)?, compression_method, crc32: cde.crc32, encrypted, path, last_modified: parse_msdos(cde.last_modified_time, cde.last_modified_date), unix_mode, header_offset: usize(cde.header_offset)?, }; parse_extra_field(&mut metadata, cde.extra_field)?; Ok(metadata) } /// Extract metadata from a local file header. /// /// Since the local header doesn't contain the offset /// (we're at it already if we're reading the thing), /// take the CDE-provided offset. /// Ditto for other things the local header lacks (file perms, etc.) pub(crate) fn from_local_header( local: &LocalFileHeader<'a>, cde_header: &Self, ) -> ZipResult { let is_utf8 = is_utf8(local.flags); let path: Cow = if is_utf8 { let utf8 = std::str::from_utf8(local.path).map_err(ZipError::Encoding)?; Cow::Borrowed(Utf8Path::new(utf8)) } else { let str_cow: Cow = Cow::borrow_from_cp437(local.path, &CP437_CONTROL); // Annoying: doesn't seem to be any Cow -> Cow match str_cow { Cow::Borrowed(s) => Cow::Borrowed(Utf8Path::new(s)), Cow::Owned(s) => Cow::Owned(s.into()), } }; let encrypted = is_encrypted(local.flags); let compression_method = CompressionMethod::from_u16(local.compression_method); let mut metadata = Self { size: usize(local.uncompressed_size)?, compressed_size: usize(local.compressed_size)?, compression_method, crc32: local.crc32, encrypted, path, last_modified: parse_msdos(local.last_modified_time, local.last_modified_date), ..*cde_header }; parse_extra_field(&mut metadata, local.extra_field)?; Ok(metadata) } } fn parse_msdos(time: u16, date: u16) -> NaiveDateTime { let seconds = (0b0000_0000_0001_1111 & time) as u32 * 2; // MSDOS uses 2-second precision let minutes = (0b0000_0111_1110_0000 & time) as u32 >> 5; let hours = (0b1111_1000_0000_0000 & time) as u32 >> 11; let days = (0b0000_0000_0001_1111 & date) as u32; let months = (0b0000_0001_1110_0000 & date) as u32 >> 5; // MSDOS uses years since 1980; Always interpreted as a positive value let years = ((0b1111_1110_0000_0000 & date) >> 9) as i32 + 1980; NaiveDate::from_ymd(years, months, days).and_hms(hours, minutes, seconds) } /// Parses the "extra fields" found in central directory entries /// and local file headers. /// /// Currently we just look for Zip64 info (64-bit values for files > 2^32 in size) fn parse_extra_field(metadata: &mut FileMetadata, mut extra_field: &[u8]) -> ZipResult<()> { // 4.5.1 In order to allow different programs and different types // of information to be stored in the 'extra' field in .ZIP // files, the following structure MUST be used for all // programs storing data in this field: // header1+data1 + header2+data2 . . . // Each header MUST consist of: // Header ID - 2 bytes // Data Size - 2 bytes while !extra_field.is_empty() { let kind = read_u16(&mut extra_field); let field_len = read_u16(&mut extra_field); let mut amount_left = field_len as i16; // Zip64 extended information extra field if kind == 0x0001 { if metadata.size == u32::MAX as usize { metadata.size = usize(read_u64(&mut extra_field))?; amount_left -= 8; } if metadata.compressed_size == u32::MAX as usize { metadata.compressed_size = usize(read_u64(&mut extra_field))?; amount_left -= 8; } if metadata.header_offset == u32::MAX as usize { metadata.header_offset = usize(read_u64(&mut extra_field))?; amount_left -= 8; } // We already checked many times that this isn't a multi-disk archive. if amount_left != 0 { return Err(ZipError::InvalidArchive( "Extra data field contains disk number", )); } } extra_field = &extra_field[amount_left as usize..]; } Ok(()) } /// Data from a local file header /// /// Each files' actual contents is preceded by this header. /// These headers alllow for "streaming" decompression without /// the use of the central directory, /// but we don't make use of this feature. #[derive(Debug)] pub struct LocalFileHeader<'a> { pub minimum_extract_version: u16, pub flags: u16, pub compression_method: u16, pub last_modified_time: u16, pub last_modified_date: u16, pub crc32: u32, pub compressed_size: u32, pub uncompressed_size: u32, pub path: &'a [u8], pub extra_field: &'a [u8], } impl<'a> LocalFileHeader<'a> { pub fn parse_and_consume(header: &mut &'a [u8]) -> ZipResult { // 4.3.7 Local file header: // // local file header signature 4 bytes (0x04034b50) // version needed to extract 2 bytes // general purpose bit flag 2 bytes // compression method 2 bytes // last mod file time 2 bytes // last mod file date 2 bytes // crc-32 4 bytes // compressed size 4 bytes // uncompressed size 4 bytes // file name length 2 bytes // extra field length 2 bytes // // file name (variable size) // extra field (variable size) assert_eq!(header[..4], LOCAL_FILE_HEADER_MAGIC); *header = &header[4..]; let minimum_extract_version = read_u16(header); let flags = read_u16(header); let compression_method = read_u16(header); let last_modified_time = read_u16(header); let last_modified_date = read_u16(header); let crc32 = read_u32(header); let compressed_size = read_u32(header); let uncompressed_size = read_u32(header); let path_length = usize(read_u16(header))?; let extra_field_length = usize(read_u16(header))?; let (path, remaining) = header.split_at(path_length); let (extra_field, remaining) = remaining.split_at(extra_field_length); *header = remaining; Ok(Self { minimum_extract_version, flags, compression_method, last_modified_time, last_modified_date, crc32, compressed_size, uncompressed_size, path, extra_field, }) } } piz-0.5.1/tests/create-inputs.sh000075500000000000000000000007571046102023000147260ustar 00000000000000#!/bin/bash set -euo pipefail cd tests/inputs echo "Setting up test environment..." rm -f *.zip # Hello Zip archive (small text files) zip -r9 hello.zip hello/ # An archive with some junk in the front echo "Some junk up front" | cat - hello.zip > hello-prefixed.zip # Create a Zip64 archive (one with files too large for original 32-bit fields) rm -rf zip64 mkdir zip64 truncate -s 100M zip64/zero100 truncate -s 4400M zip64/zero4400 truncate -s 5G zip64/zero5000 zip -r9 zip64.zip zip64/ piz-0.5.1/tests/inputs/.gitignore000064400000000000000000000000151046102023000151010ustar 00000000000000zip64/ *.zip piz-0.5.1/tests/inputs/hello/hi.txt000064400000000000000000000000141046102023000153540ustar 00000000000000Hello, ZIP! piz-0.5.1/tests/inputs/hello/rip.txt000064400000000000000000000000331046102023000155470ustar 00000000000000Phil Katz was a cool dude. piz-0.5.1/tests/inputs/hello/sr71.txt000064400000000000000000000154261046102023000155650ustar 00000000000000There were a lot of things we couldn't do in an SR-71, but we were the fastest guys on the block and loved reminding our fellow aviators of this fact. People often asked us if, because of this fact, it was fun to fly the jet. Fun would not be the first word I would use to describe flying this plane. Intense, maybe. Even cerebral. But there was one day in our Sled experience when we would have to say that it was pure fun to be the fastest guys out there, at least for a moment. It occurred when Walt and I were flying our final training sortie. We needed 100 hours in the jet to complete our training and attain Mission Ready status. Somewhere over Colorado we had passed the century mark. We had made the turn in Arizona and the jet was performing flawlessly. My gauges were wired in the front seat and we were starting to feel pretty good about ourselves, not only because we would soon be flying real missions but because we had gained a great deal of confidence in the plane in the past ten months. Ripping across the barren deserts 80,000 feet below us, I could already see the coast of California from the Arizona border. I was, finally, after many humbling months of simulators and study, ahead of the jet. I was beginning to feel a bit sorry for Walter in the back seat. There he was, with no really good view of the incredible sights before us, tasked with monitoring four different radios. This was good practice for him for when we began flying real missions, when a priority transmission from headquarters could be vital. It had been difficult, too, for me to relinquish control of the radios, as during my entire flying career I had controlled my own transmissions. But it was part of the division of duties in this plane and I had adjusted to it. I still insisted on talking on the radio while we were on the ground, however. Walt was so good at many things, but he couldn't match my expertise at sounding smooth on the radios, a skill that had been honed sharply with years in fighter squadrons where the slightest radio miscue was grounds for beheading. He understood that and allowed me that luxury. Just to get a sense of what Walt had to contend with, I pulled the radio toggle switches and monitored the frequencies along with him. The predominant radio chatter was from Los Angeles Center, far below us, controlling daily traffic in their sector. While they had us on their scope (albeit briefly), we were in uncontrolled airspace and normally would not talk to them unless we needed to descend into their airspace. We listened as the shaky voice of a lone Cessna pilot asked Center for a readout of his ground speed. Center replied: "November Charlie 175, I'm showing you at ninety knots on the ground." Now the thing to understand about Center controllers, was that whether they were talking to a rookie pilot in a Cessna, or to Air Force One, they always spoke in the exact same, calm, deep, professional, tone that made one feel important. I referred to it as the " Houston Center voice." I have always felt that after years of seeing documentaries on this country's space program and listening to the calm and distinct voice of the Houston controllers, that all other controllers since then wanted to sound like that, and that they basically did. And it didn't matter what sector of the country we would be flying in, it always seemed like the same guy was talking. Over the years that tone of voice had become somewhat of a comforting sound to pilots everywhere. Conversely, over the years, pilots always wanted to ensure that, when transmitting, they sounded like Chuck Yeager, or at least like John Wayne. Better to die than sound bad on the radios. Just moments after the Cessna's inquiry, a Twin Beech piped up on frequency, in a rather superior tone, asking for his ground speed. "I have you at one hundred and twenty-five knots of ground speed." Boy, I thought, the Beechcraft really must think he is dazzling his Cessna brethren. Then out of the blue, a navy F-18 pilot out of NAS Lemoore came up on frequency. You knew right away it was a Navy jock because he sounded very cool on the radios. "Center, Dusty 52 ground speed check". Before Center could reply, I'm thinking to myself, hey, Dusty 52 has a ground speed indicator in that million-dollar cockpit, so why is he asking Center for a readout? Then I got it, ol' Dusty here is making sure that every bug smasher from Mount Whitney to the Mojave knows what true speed is. He's the fastest dude in the valley today, and he just wants everyone to know how much fun he is having in his new Hornet. And the reply, always with that same, calm, voice, with more distinct alliteration than emotion: "Dusty 52, Center, we have you at 620 on the ground." And I thought to myself, is this a ripe situation, or what? As my hand instinctively reached for the mic button, I had to remind myself that Walt was in control of the radios. Still, I thought, it must be done - in mere seconds we'll be out of the sector and the opportunity will be lost. That Hornet must die, and die now. I thought about all of our Sim training and how important it was that we developed well as a crew and knew that to jump in on the radios now would destroy the integrity of all that we had worked toward becoming. I was torn. Somewhere, 13 miles above Arizona, there was a pilot screaming inside his space helmet. Then, I heard it. The click of the mic button from the back seat. That was the very moment that I knew Walter and I had become a crew. Very professionally, and with no emotion, Walter spoke: "Los Angeles Center, Aspen 20, can you give us a ground speed check?" There was no hesitation, and the replay came as if was an everyday request. "Aspen 20, I show you at one thousand eight hundred and forty-two knots, across the ground." I think it was the forty-two knots that I liked the best, so accurate and proud was Center to deliver that information without hesitation, and you just knew he was smiling. But the precise point at which I knew that Walt and I were going to be really good friends for a long time was when he keyed the mic once again to say, in his most fighter-pilot-like voice: "Ah, Center, much thanks, we're showing closer to nineteen hundred on the money." For a moment Walter was a god. And we finally heard a little crack in the armor of the Houston Center voice, when L.A.came back with, "Roger that Aspen, Your equipment is probably more accurate than ours. You boys have a good one." It all had lasted for just moments, but in that short, memorable sprint across the southwest, the Navy had been flamed, all mortal airplanes on freq were forced to bow before the King of Speed, and more importantly, Walter and I had crossed the threshold of being a crew. A fine day's work. We never heard another transmission on that frequency all the way to the coast. For just one day, it truly was fun being the fastest guys out there. piz-0.5.1/tests/smoke.rs000064400000000000000000000063051046102023000132630ustar 00000000000000use std::fs::File; use std::io; use std::process::Command; use anyhow::{Context, Result}; use camino::Utf8Path; use log::*; use memmap::Mmap; use rayon::prelude::*; use piz::read::*; use piz::result::ZipError; #[test] fn smoke() -> Result<()> { let _ = env_logger::builder().is_test(true).try_init(); let inputs = [ "tests/inputs/hello.zip", "tests/inputs/hello-prefixed.zip", "tests/inputs/zip64.zip", ]; if inputs.iter().any(|i| !Utf8Path::new(i).exists()) { Command::new("tests/create-inputs.sh") .status() .expect("Couldn't set up input files"); } for input in &inputs { read_zip(input)?; } Ok(()) } fn read_zip(zip_path: &str) -> Result<()> { info!("Memory mapping {:#?}", zip_path); let zip_file = File::open(zip_path).context("Couldn't open zip file")?; let mapping = unsafe { Mmap::map(&zip_file).context("Couldn't mmap zip file")? }; let archive = ZipArchive::with_prepended_data(&mapping) .context("Couldn't load archive")? .0; // Make sure we can treeify the entries (i.e., they form a valid directory) let tree = as_tree(archive.entries())?; match zip_path { "tests/inputs/hello.zip" | "tests/inputs/hello-prefixed.zip" => { tree.lookup("hello/hi.txt")?; tree.lookup("hello/rip.txt")?; tree.lookup("hello/sr71.txt")?; let no_such_file = Utf8Path::new("no/such/file"); match tree.lookup(no_such_file) { Err(ZipError::NoSuchFile(p)) => { assert_eq!(no_such_file, p); } Err(other) => panic!("Got incorrect error from path with no file: {:?}", other), Ok(_) => panic!("Got a file back from a path with no file"), }; let no_such_file = Utf8Path::new("top-level-no-such-file"); match tree.lookup(no_such_file) { Err(ZipError::NoSuchFile(p)) => { assert_eq!(no_such_file, p); } Err(other) => panic!("Got incorrect error from path with no file: {:?}", other), Ok(_) => panic!("Got a file back from a path with no file"), }; let invalid_path = Utf8Path::new("../nope"); match tree.lookup(invalid_path) { Err(ZipError::InvalidPath(_)) => { /* Cool. */ } Err(other) => panic!("Got incorrect error from invalid path: {:?}", other), Ok(_) => panic!("Got a file back from invalid path"), }; } "tests/inputs/zip64.zip" => { tree.lookup("zip64/zero100")?; tree.lookup("zip64/zero4400")?; tree.lookup("zip64/zero5000")?; } wut => unreachable!("{}", wut), }; // Try reading out each file in the archive. // (When the reader gets dropped, the file's CRC32 will be checked // against the one stored in the archive.) tree.files() .map(|e| archive.read(e)) .par_bridge() .try_for_each::<_, Result<()>>(|reader| { let mut sink = io::sink(); io::copy(&mut reader?, &mut sink)?; Ok(()) })?; Ok(()) }